1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-29 10:47:07 +01:00

Compare commits

...

45 Commits

Author SHA1 Message Date
Peter Boyle
12e239dd9f Merge branch 'release/dirac-ITT-2020' 2020-10-13 13:38:29 -04:00
Peter Boyle
af2301afbb Merge pull request #312 from i-kanamori/debug_512
add reordring of random number generators in IO
2020-10-13 11:42:12 -04:00
Peter Boyle
f98856a26f Merge pull request #314 from smangham/issue_readme_precision
Fix for deprecated configure options in documentation (issue #313)
2020-10-13 11:41:38 -04:00
Sam Mangham
d55cc5b380 Fixed typo on --enable-comm, removed all references to --enable-precision except for config options, where it is listed as deprecated. Removed travis test for single precision. 2020-10-12 12:33:13 +01:00
c2b688abc9 Benchmark_IO: reducing max local volume to 32^4 2020-10-10 16:52:56 +01:00
b0d61b9687 Benchmark_IO cleaner output 2020-10-09 21:46:45 +01:00
5f893bf9af Benchmark_IO procurement sizes 2020-10-09 21:31:59 +01:00
0e17bd6597 I/O benchmark cleanup 2020-10-09 20:29:57 +01:00
22caa158cc multi-pass I/O benchmark, with statistic and robustness summary 2020-10-09 20:29:40 +01:00
b24a504d7c hook to access last parallel I/O performance measurement 2020-10-09 20:28:54 +01:00
Peter Boyle
992ef6e9fc more runtime 2020-10-08 22:19:20 -04:00
Peter Boyle
f32a320bc3 Single prec benchmark in double prec compile 2020-10-08 19:52:08 -04:00
Peter Boyle
5f0fe029d2 Improve meemory benchmarks for GPU (avoid host mem ping pong) 2020-10-08 19:51:28 -04:00
6b1486e89b fixing number of colours defaulting to 4 in most cases 2020-10-08 16:31:24 +01:00
Peter Boyle
3f9c427a3a Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2020-10-07 13:12:57 -04:00
Peter Boyle
d201277652 Expose Nc as a compile time configure option.
Remove precision option
2020-10-07 13:07:00 -04:00
fdda7cf9cf Merge branch 'feature/benchmark-io-update' into develop 2020-10-07 15:57:53 +01:00
e22d30f715 Merge branch 'develop' into feature/benchmark-io-update 2020-10-07 15:56:39 +01:00
1ba25a0d8c more I/O benchmark code cleaning 2020-10-07 15:38:41 +01:00
9ba3647bdf script to convert I/O benchmark logs to CSV 2020-10-07 15:35:03 +01:00
5ee832f738 I/O benchmark code cleaning 2020-10-07 15:31:51 +01:00
Peter Boyle
35a69a5133 SU4 x SU4 2020-10-06 21:48:35 -04:00
e9c5a271a8 fixing potential issues with log alignment and timer I/O 2020-10-06 17:58:16 +01:00
acac2d6938 standard C/C++ I/O in benchmark 2020-10-06 17:57:00 +01:00
97db2b8d20 add reordring of random number generator in IO 2020-10-06 17:25:59 +09:00
Peter Boyle
ace9cd64bb dpcpp happy 2020-09-29 08:03:46 -07:00
Peter Boyle
a3e2aeb603 dpcpp options happiness 2020-09-29 06:50:10 -07:00
Peter Boyle
049dd25785 Revert accidental commit thanks michael 2020-09-23 04:13:50 -04:00
Peter Boyle
d43d372294 Merge pull request #311 from mmphys/bugfix/MPIasynch
Asynchronous calls removed - reflect this in Communicator_none.cc
2020-09-22 10:41:48 -04:00
Michael Marshall
b71a081cba Asynchronous calls removed - reflect this in Communicator_none.cc
(Opportunistic doc update - OpenMP support on Mac OS)
2020-09-21 09:33:23 +01:00
Peter Boyle
c48909590b MPI asynch call removal 2020-09-17 20:47:32 +01:00
Peter Boyle
446ef40570 HIP IPC 2020-09-17 20:31:46 +01:00
Peter Boyle
81441e98f4 HIP runs sensible 2020-09-16 03:35:03 +01:00
Peter Boyle
ecd3f890f5 Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2020-09-16 02:30:14 +01:00
Peter Boyle
1c881ce23c HIP does not like half2 visible members x and y so must define own Half2 2020-09-16 02:28:33 +01:00
Peter Boyle
dacbbdd051 Hip Happy Birthday 2020-09-16 00:37:02 +01:00
Peter Boyle
2859955a03 HIP requires "inline" 2020-09-16 00:36:13 +01:00
Peter Boyle
cc220abd1d inline for HIP 2020-09-16 00:35:38 +01:00
Peter Boyle
d1c0c0197e HipCC requires inline on definition 2020-09-16 00:35:06 +01:00
Peter Boyle
fd9424ef27 innlines required to make HIP happy 2020-09-16 00:34:32 +01:00
Peter Boyle
a5c35c4024 Make HIP / Vega happy 2020-09-16 00:33:53 +01:00
Peter Boyle
e03b64dc06 HIP default flaags to work on ROCM 2020-09-16 00:33:09 +01:00
Peter Boyle
4677c40195 HIP improvements 2020-09-16 00:32:27 +01:00
Peter Boyle
288c615782 Hip improvements 2020-09-16 00:31:50 +01:00
Peter Boyle
48e81cf6f8 Hip Pragmas 2020-09-16 00:31:03 +01:00
161 changed files with 1290 additions and 496 deletions

View File

@@ -9,11 +9,6 @@ matrix:
- os: osx - os: osx
osx_image: xcode8.3 osx_image: xcode8.3
compiler: clang compiler: clang
env: PREC=single
- os: osx
osx_image: xcode8.3
compiler: clang
env: PREC=double
before_install: before_install:
- export GRIDDIR=`pwd` - export GRIDDIR=`pwd`
@@ -55,7 +50,7 @@ script:
- make -j4 - make -j4
- make install - make install
- cd $CWD/build - cd $CWD/build
- ../configure --enable-precision=$PREC --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF} - ../configure --enable-simd=SSE4 --enable-comms=none --with-lime=$CWD/build/lime/install ${EXTRACONF}
- make -j4 - make -j4
- ./benchmarks/Benchmark_dwf --threads 1 --debug-signals - ./benchmarks/Benchmark_dwf --threads 1 --debug-signals
- make check - make check

View File

@@ -34,6 +34,12 @@
#define __SYCL__REDEFINE__ #define __SYCL__REDEFINE__
#endif #endif
/* HIP save and restore compile environment*/
#ifdef GRID_HIP
#pragma push
#pragma push_macro("__HIP_DEVICE_COMPILE__")
#endif
#define EIGEN_NO_HIP
#include <Grid/Eigen/Dense> #include <Grid/Eigen/Dense>
#include <Grid/Eigen/unsupported/CXX11/Tensor> #include <Grid/Eigen/unsupported/CXX11/Tensor>
@@ -52,6 +58,12 @@
#pragma pop #pragma pop
#endif #endif
/*HIP restore*/
#ifdef __HIP__REDEFINE__
#pragma pop_macro("__HIP_DEVICE_COMPILE__")
#pragma pop
#endif
#if defined __GNUC__ #if defined __GNUC__
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#endif #endif

View File

@@ -138,21 +138,6 @@ public:
int recv_from_rank, int recv_from_rank,
int bytes); int bytes);
void SendRecvPacket(void *xmit,
void *recv,
int xmit_to_rank,
int recv_from_rank,
int bytes);
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int xmit_to_rank,
void *recv,
int recv_from_rank,
int bytes);
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
double StencilSendToRecvFrom(void *xmit, double StencilSendToRecvFrom(void *xmit,
int xmit_to_rank, int xmit_to_rank,
void *recv, void *recv,

View File

@@ -77,15 +77,6 @@ void CartesianCommunicator::GlobalSumVector(uint64_t *,int N){}
void CartesianCommunicator::GlobalXOR(uint32_t &){} void CartesianCommunicator::GlobalXOR(uint32_t &){}
void CartesianCommunicator::GlobalXOR(uint64_t &){} void CartesianCommunicator::GlobalXOR(uint64_t &){}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int xmit_to_rank,
int recv_from_rank,
int bytes)
{
assert(0);
}
// Basic Halo comms primitive -- should never call in single node // Basic Halo comms primitive -- should never call in single node
void CartesianCommunicator::SendToRecvFrom(void *xmit, void CartesianCommunicator::SendToRecvFrom(void *xmit,
@@ -96,20 +87,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
{ {
assert(0); assert(0);
} }
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
assert(0);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
assert(0);
}
void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes) void CartesianCommunicator::AllToAll(int dim,void *in,void *out,uint64_t words,uint64_t bytes)
{ {
bcopy(in,out,bytes*words); bcopy(in,out,bytes*words);
@@ -137,10 +114,6 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
int recv_from_rank, int recv_from_rank,
int bytes, int dir) int bytes, int dir)
{ {
std::vector<CommsRequest_t> list;
// Discard the "dir"
SendToRecvFromBegin (list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
SendToRecvFromComplete(list);
return 2.0*bytes; return 2.0*bytes;
} }
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
@@ -150,13 +123,10 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
int recv_from_rank, int recv_from_rank,
int bytes, int dir) int bytes, int dir)
{ {
// Discard the "dir"
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
return 2.0*bytes; return 2.0*bytes;
} }
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir) void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall,int dir)
{ {
SendToRecvFromComplete(waitall);
} }
void CartesianCommunicator::StencilBarrier(void){}; void CartesianCommunicator::StencilBarrier(void){};

View File

@@ -32,6 +32,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_CUDA #ifdef GRID_CUDA
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#endif #endif
#ifdef GRID_HIP
#include <hip/hip_runtime_api.h>
#endif
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
#define header "SharedMemoryMpi: " #define header "SharedMemoryMpi: "
@@ -425,7 +428,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended // Hugetlbfs mapping intended
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_CUDA #if defined(GRID_CUDA) ||defined(GRID_HIP)
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags) void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{ {
void * ShmCommBuf ; void * ShmCommBuf ;
@@ -448,21 +451,15 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer // Each MPI rank should allocate our own buffer
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef GRID_MPI3_SHM_NONE ShmCommBuf = acceleratorAllocDevice(bytes);
auto err = cudaMalloc(&ShmCommBuf, bytes);
#else
auto err = cudaMallocManaged(&ShmCommBuf, bytes);
#endif
if ( err != cudaSuccess) {
std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed for " << bytes<<" bytes " <<cudaGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE);
}
if (ShmCommBuf == (void *)NULL ) { if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryMPI.cc cudaMallocManaged failed NULL pointer for " << bytes<<" bytes " << std::endl; std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
if ( WorldRank == 0 ){ if ( WorldRank == 0 ){
std::cout << header " SharedMemoryMPI.cc cudaMalloc "<< bytes << "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl; std::cout << header " SharedMemoryMPI.cc cudaMalloc "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
} }
SharedMemoryZero(ShmCommBuf,bytes); SharedMemoryZero(ShmCommBuf,bytes);
@@ -475,15 +472,26 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////// //////////////////////////////////////////////////
// If it is me, pass around the IPC access key // If it is me, pass around the IPC access key
////////////////////////////////////////////////// //////////////////////////////////////////////////
#ifdef GRID_CUDA
cudaIpcMemHandle_t handle; cudaIpcMemHandle_t handle;
if ( r==WorldShmRank ) { if ( r==WorldShmRank ) {
err = cudaIpcGetMemHandle(&handle,ShmCommBuf); auto err = cudaIpcGetMemHandle(&handle,ShmCommBuf);
if ( err != cudaSuccess) { if ( err != cudaSuccess) {
std::cerr << " SharedMemoryMPI.cc cudaIpcGetMemHandle failed for rank" << r <<" "<<cudaGetErrorString(err)<< std::endl; std::cerr << " SharedMemoryMPI.cc cudaIpcGetMemHandle failed for rank" << r <<" "<<cudaGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }
#endif
#ifdef GRID_HIP
hipIpcMemHandle_t handle;
if ( r==WorldShmRank ) {
auto err = hipIpcGetMemHandle(&handle,ShmCommBuf);
if ( err != hipSuccess) {
std::cerr << " SharedMemoryMPI.cc hipIpcGetMemHandle failed for rank" << r <<" "<<hipGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE);
}
}
#endif
////////////////////////////////////////////////// //////////////////////////////////////////////////
// Share this IPC handle across the Shm Comm // Share this IPC handle across the Shm Comm
////////////////////////////////////////////////// //////////////////////////////////////////////////
@@ -500,13 +508,24 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// If I am not the source, overwrite thisBuf with remote buffer // If I am not the source, overwrite thisBuf with remote buffer
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
void * thisBuf = ShmCommBuf; void * thisBuf = ShmCommBuf;
#ifdef GRID_CUDA
if ( r!=WorldShmRank ) { if ( r!=WorldShmRank ) {
err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess); auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess);
if ( err != cudaSuccess) { if ( err != cudaSuccess) {
std::cerr << " SharedMemoryMPI.cc cudaIpcOpenMemHandle failed for rank" << r <<" "<<cudaGetErrorString(err)<< std::endl; std::cerr << " SharedMemoryMPI.cc cudaIpcOpenMemHandle failed for rank" << r <<" "<<cudaGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }
#endif
#ifdef GRID_HIP
if ( r!=WorldShmRank ) {
auto err = hipIpcOpenMemHandle(&thisBuf,handle,hipIpcMemLazyEnablePeerAccess);
if ( err != hipSuccess) {
std::cerr << " SharedMemoryMPI.cc hipIpcOpenMemHandle failed for rank" << r <<" "<<hipGetErrorString(err)<< std::endl;
exit(EXIT_FAILURE);
}
}
#endif
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
// Save a copy of the device buffers // Save a copy of the device buffers
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////

View File

@@ -60,9 +60,9 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
autoView( lhs_v , lhs, AcceleratorRead); autoView( lhs_v , lhs, AcceleratorRead);
autoView( rhs_v , rhs, AcceleratorRead); autoView( rhs_v , rhs, AcceleratorRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp;
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
auto rhs_t=rhs_v(ss); auto rhs_t=rhs_v(ss);
auto tmp =ret_v(ss);
mac(&tmp,&lhs_t,&rhs_t); mac(&tmp,&lhs_t,&rhs_t);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);
}); });
@@ -124,7 +124,7 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
autoView( ret_v , ret, AcceleratorWrite); autoView( ret_v , ret, AcceleratorWrite);
autoView( lhs_v , lhs, AcceleratorRead); autoView( lhs_v , lhs, AcceleratorRead);
accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,lhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; auto tmp =ret_v(ss);
auto lhs_t=lhs_v(ss); auto lhs_t=lhs_v(ss);
mac(&tmp,&lhs_t,&rhs); mac(&tmp,&lhs_t,&rhs);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);
@@ -182,7 +182,7 @@ void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
autoView( ret_v , ret, AcceleratorWrite); autoView( ret_v , ret, AcceleratorWrite);
autoView( rhs_v , lhs, AcceleratorRead); autoView( rhs_v , lhs, AcceleratorRead);
accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{ accelerator_for(ss,rhs_v.size(),obj1::Nsimd(),{
decltype(coalescedRead(obj1())) tmp; auto tmp =ret_v(ss);
auto rhs_t=rhs_v(ss); auto rhs_t=rhs_v(ss);
mac(&tmp,&lhs,&rhs_t); mac(&tmp,&lhs,&rhs_t);
coalescedWrite(ret_v[ss],tmp); coalescedWrite(ret_v[ss],tmp);

View File

@@ -2,12 +2,13 @@ NAMESPACE_BEGIN(Grid);
#ifdef GRID_HIP #ifdef GRID_HIP
extern hipDeviceProp_t *gpu_props; extern hipDeviceProp_t *gpu_props;
#define WARP_SIZE 64
#endif #endif
#ifdef GRID_CUDA #ifdef GRID_CUDA
extern cudaDeviceProp *gpu_props; extern cudaDeviceProp *gpu_props;
#define WARP_SIZE 32
#endif #endif
#define WARP_SIZE 32
__device__ unsigned int retirementCount = 0; __device__ unsigned int retirementCount = 0;
template <class Iterator> template <class Iterator>
@@ -64,7 +65,7 @@ __device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid
// cannot use overloaded operators for sobj as they are not volatile-qualified // cannot use overloaded operators for sobj as they are not volatile-qualified
memcpy((void *)&sdata[tid], (void *)&mySum, sizeof(sobj)); memcpy((void *)&sdata[tid], (void *)&mySum, sizeof(sobj));
__syncwarp(); acceleratorSynchronise();
const Iterator VEC = WARP_SIZE; const Iterator VEC = WARP_SIZE;
const Iterator vid = tid & (VEC-1); const Iterator vid = tid & (VEC-1);
@@ -78,9 +79,9 @@ __device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid
beta += temp; beta += temp;
memcpy((void *)&sdata[tid], (void *)&beta, sizeof(sobj)); memcpy((void *)&sdata[tid], (void *)&beta, sizeof(sobj));
} }
__syncwarp(); acceleratorSynchronise();
} }
__syncthreads(); acceleratorSynchroniseAll();
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
beta = Zero(); beta = Zero();
@@ -90,7 +91,7 @@ __device__ void reduceBlock(volatile sobj *sdata, sobj mySum, const Iterator tid
} }
memcpy((void *)&sdata[0], (void *)&beta, sizeof(sobj)); memcpy((void *)&sdata[0], (void *)&beta, sizeof(sobj));
} }
__syncthreads(); acceleratorSynchroniseAll();
} }

View File

@@ -130,6 +130,8 @@ public:
friend std::ostream& operator<< (std::ostream& stream, Logger& log){ friend std::ostream& operator<< (std::ostream& stream, Logger& log){
if ( log.active ) { if ( log.active ) {
std::ios_base::fmtflags f(stream.flags());
stream << log.background()<< std::left; stream << log.background()<< std::left;
if (log.topWidth > 0) if (log.topWidth > 0)
{ {
@@ -152,6 +154,8 @@ public:
<< now << log.background() << " : " ; << now << log.background() << " : " ;
} }
stream << log.colour(); stream << log.colour();
stream.flags(f);
return stream; return stream;
} else { } else {
return devnull; return devnull;

View File

@@ -1,3 +1,4 @@
#include <Grid/GridCore.h> #include <Grid/GridCore.h>
int Grid::BinaryIO::latticeWriteMaxRetry = -1; int Grid::BinaryIO::latticeWriteMaxRetry = -1;
Grid::BinaryIO::IoPerf Grid::BinaryIO::lastPerf;

View File

@@ -79,6 +79,13 @@ inline void removeWhitespace(std::string &key)
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
class BinaryIO { class BinaryIO {
public: public:
struct IoPerf
{
uint64_t size{0},time{0};
double mbytesPerSecond{0.};
};
static IoPerf lastPerf;
static int latticeWriteMaxRetry; static int latticeWriteMaxRetry;
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
@@ -502,12 +509,15 @@ class BinaryIO {
timer.Stop(); timer.Stop();
} }
lastPerf.size = sizeof(fobj)*iodata.size()*nrank;
lastPerf.time = timer.useconds();
lastPerf.mbytesPerSecond = lastPerf.size/1024./1024./(lastPerf.time/1.0e6);
std::cout<<GridLogMessage<<"IOobject: "; std::cout<<GridLogMessage<<"IOobject: ";
if ( control & BINARYIO_READ) std::cout << " read "; if ( control & BINARYIO_READ) std::cout << " read ";
else std::cout << " write "; else std::cout << " write ";
uint64_t bytes = sizeof(fobj)*iodata.size()*nrank; uint64_t bytes = sizeof(fobj)*iodata.size()*nrank;
std::cout<< bytes <<" bytes in "<<timer.Elapsed() <<" " std::cout<< lastPerf.size <<" bytes in "<< timer.Elapsed() <<" "
<< (double)bytes/ (double)timer.useconds() <<" MB/s "<<std::endl; << lastPerf.mbytesPerSecond <<" MB/s "<<std::endl;
std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl; std::cout<<GridLogMessage<<"IOobject: endian and checksum overhead "<<bstimer.Elapsed() <<std::endl;
@@ -663,10 +673,15 @@ class BinaryIO {
nersc_csum,scidac_csuma,scidac_csumb); nersc_csum,scidac_csuma,scidac_csumb);
timer.Start(); timer.Start();
thread_for(lidx,lsites,{ thread_for(lidx,lsites,{ // FIX ME, suboptimal implementation
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin()); std::copy(iodata[lidx].begin(),iodata[lidx].end(),tmp.begin());
parallel_rng.SetState(tmp,lidx); Coordinate lcoor;
grid->LocalIndexToLocalCoor(lidx, lcoor);
int o_idx=grid->oIndex(lcoor);
int i_idx=grid->iIndex(lcoor);
int gidx=parallel_rng.generator_idx(o_idx,i_idx);
parallel_rng.SetState(tmp,gidx);
}); });
timer.Stop(); timer.Stop();
@@ -723,7 +738,12 @@ class BinaryIO {
std::vector<RNGstate> iodata(lsites); std::vector<RNGstate> iodata(lsites);
thread_for(lidx,lsites,{ thread_for(lidx,lsites,{
std::vector<RngStateType> tmp(RngStateCount); std::vector<RngStateType> tmp(RngStateCount);
parallel_rng.GetState(tmp,lidx); Coordinate lcoor;
grid->LocalIndexToLocalCoor(lidx, lcoor);
int o_idx=grid->oIndex(lcoor);
int i_idx=grid->iIndex(lcoor);
int gidx=parallel_rng.generator_idx(o_idx,i_idx);
parallel_rng.GetState(tmp,gidx);
std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin()); std::copy(tmp.begin(),tmp.end(),iodata[lidx].begin());
}); });
timer.Stop(); timer.Stop();

View File

@@ -47,7 +47,7 @@ static constexpr int Ym = 5;
static constexpr int Zm = 6; static constexpr int Zm = 6;
static constexpr int Tm = 7; static constexpr int Tm = 7;
static constexpr int Nc=3; static constexpr int Nc=Config_Nc;
static constexpr int Ns=4; static constexpr int Ns=4;
static constexpr int Nd=4; static constexpr int Nd=4;
static constexpr int Nhs=2; // half spinor static constexpr int Nhs=2; // half spinor

View File

@@ -63,17 +63,20 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// Generic Nc kernels // Generic Nc kernels
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
template<int Naik> accelerator_inline template<int Naik>
static accelerator_inline
void DhopSiteGeneric(StencilView &st, void DhopSiteGeneric(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag); const FermionFieldView &in, FermionFieldView &out,int dag);
template<int Naik> accelerator_inline
template<int Naik> static accelerator_inline
void DhopSiteGenericInt(StencilView &st, void DhopSiteGenericInt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag); const FermionFieldView &in, FermionFieldView &out,int dag);
template<int Naik> accelerator_inline
template<int Naik> static accelerator_inline
void DhopSiteGenericExt(StencilView &st, void DhopSiteGenericExt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,
@@ -82,17 +85,20 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// Nc=3 specific kernels // Nc=3 specific kernels
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
template<int Naik> accelerator_inline
template<int Naik> static accelerator_inline
void DhopSiteHand(StencilView &st, void DhopSiteHand(StencilView &st,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag); const FermionFieldView &in, FermionFieldView &out,int dag);
template<int Naik> accelerator_inline
template<int Naik> static accelerator_inline
void DhopSiteHandInt(StencilView &st, void DhopSiteHandInt(StencilView &st,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,
const FermionFieldView &in, FermionFieldView &out,int dag); const FermionFieldView &in, FermionFieldView &out,int dag);
template<int Naik> accelerator_inline
template<int Naik> static accelerator_inline
void DhopSiteHandExt(StencilView &st, void DhopSiteHandExt(StencilView &st,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,
@@ -101,6 +107,7 @@ template<class Impl> class StaggeredKernels : public FermionOperator<Impl> , pub
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
// Asm Nc=3 specific kernels // Asm Nc=3 specific kernels
/////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////
void DhopSiteAsm(StencilView &st, void DhopSiteAsm(StencilView &st,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor * buf, int LLs, int sU, SiteSpinor * buf, int LLs, int sU,

View File

@@ -799,7 +799,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField tmp(UGrid); PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid); PropagatorField Utmp(UGrid);
LatticeInteger zz (UGrid); zz=0.0; PropagatorField zz (UGrid); zz=0.0;
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
for (int s=0;s<Ls;s++) { for (int s=0;s<Ls;s++) {
@@ -850,7 +850,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
PropagatorField tmp(UGrid); PropagatorField tmp(UGrid);
PropagatorField Utmp(UGrid); PropagatorField Utmp(UGrid);
LatticeInteger zz (UGrid); zz=0.0; PropagatorField zz (UGrid); zz=0.0;
LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1); LatticeInteger lcoor(UGrid); LatticeCoordinate(lcoor,Nd-1);
for(int s=0;s<Ls;s++){ for(int s=0;s<Ls;s++){

View File

@@ -146,7 +146,7 @@ NAMESPACE_BEGIN(Grid);
template <class Impl> template <class Impl>
template <int Naik> template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st, void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U,DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
@@ -221,7 +221,7 @@ void StaggeredKernels<Impl>::DhopSiteHand(StencilView &st,
template <class Impl> template <class Impl>
template <int Naik> template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
@@ -300,7 +300,7 @@ void StaggeredKernels<Impl>::DhopSiteHandInt(StencilView &st,
template <class Impl> template <class Impl>
template <int Naik> template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteHandExt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,

View File

@@ -78,7 +78,7 @@ StaggeredKernels<Impl>::StaggeredKernels(const ImplParams &p) : Base(p){};
// Int, Ext, Int+Ext cases for comms overlap // Int, Ext, Int+Ext cases for comms overlap
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl>
template <int Naik> template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
@@ -126,7 +126,7 @@ void StaggeredKernels<Impl>::DhopSiteGeneric(StencilView &st,
// Only contributions from interior of our node // Only contributions from interior of our node
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
template <class Impl> template <class Impl>
template <int Naik> template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,
@@ -174,7 +174,7 @@ void StaggeredKernels<Impl>::DhopSiteGenericInt(StencilView &st,
// Only contributions from exterior of our node // Only contributions from exterior of our node
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
template <class Impl> template <class Impl>
template <int Naik> template <int Naik> accelerator_inline
void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st, void StaggeredKernels<Impl>::DhopSiteGenericExt(StencilView &st,
DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU, DoubledGaugeFieldView &U, DoubledGaugeFieldView &UUU,
SiteSpinor *buf, int sF, int sU, SiteSpinor *buf, int sF, int sU,

View File

@@ -646,7 +646,7 @@ NAMESPACE_BEGIN(Grid);
HAND_RESULT_EXT(ss,F) HAND_RESULT_EXT(ss,F)
#define HAND_SPECIALISE_GPARITY(IMPL) \ #define HAND_SPECIALISE_GPARITY(IMPL) \
template<> void \ template<> accelerator_inline void \
WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
@@ -662,7 +662,7 @@ NAMESPACE_BEGIN(Grid);
HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ HAND_DOP_SITE(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \ } \
\ \
template<> void \ template<> accelerator_inline void \
WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
@@ -678,7 +678,7 @@ NAMESPACE_BEGIN(Grid);
HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ HAND_DOP_SITE_DAG(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \ } \
\ \
template<> void \ template<> accelerator_inline void \
WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
@@ -694,7 +694,7 @@ NAMESPACE_BEGIN(Grid);
HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ HAND_DOP_SITE_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \ } \
\ \
template<> void \ template<> accelerator_inline void \
WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
@@ -710,7 +710,7 @@ NAMESPACE_BEGIN(Grid);
HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ HAND_DOP_SITE_DAG_INT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \ } \
\ \
template<> void \ template<> accelerator_inline void \
WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \
@@ -727,7 +727,7 @@ NAMESPACE_BEGIN(Grid);
nmu = 0; \ nmu = 0; \
HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \ HAND_DOP_SITE_EXT(1, LOAD_CHI_GPARITY,LOAD_CHIMU_GPARITY,MULT_2SPIN_GPARITY); \
} \ } \
template<> void \ template<> accelerator_inline void \
WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \ WilsonKernels<IMPL>::HandDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, \
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \ int ss,int sU,const FermionFieldView &in, FermionFieldView &out) \
{ \ { \

View File

@@ -495,7 +495,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid); NAMESPACE_BEGIN(Grid);
template<class Impl> void template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
@@ -519,7 +519,7 @@ WilsonKernels<Impl>::HandDhopSite(StencilView &st, DoubledGaugeFieldView &U,Site
HAND_RESULT(ss); HAND_RESULT(ss);
} }
template<class Impl> template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
@@ -542,7 +542,7 @@ void WilsonKernels<Impl>::HandDhopSiteDag(StencilView &st,DoubledGaugeFieldView
HAND_RESULT(ss); HAND_RESULT(ss);
} }
template<class Impl> void template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
@@ -566,7 +566,7 @@ WilsonKernels<Impl>::HandDhopSiteInt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_RESULT(ss); HAND_RESULT(ss);
} }
template<class Impl> template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
@@ -589,7 +589,7 @@ void WilsonKernels<Impl>::HandDhopSiteDagInt(StencilView &st,DoubledGaugeFieldVi
HAND_RESULT(ss); HAND_RESULT(ss);
} }
template<class Impl> void template<class Impl> accelerator_inline void
WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {
@@ -614,7 +614,7 @@ WilsonKernels<Impl>::HandDhopSiteExt(StencilView &st,DoubledGaugeFieldView &U,Si
HAND_RESULT_EXT(ss); HAND_RESULT_EXT(ss);
} }
template<class Impl> template<class Impl> accelerator_inline
void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf, void WilsonKernels<Impl>::HandDhopSiteDagExt(StencilView &st,DoubledGaugeFieldView &U,SiteHalfSpinor *buf,
int ss,int sU,const FermionFieldView &in, FermionFieldView &out) int ss,int sU,const FermionFieldView &in, FermionFieldView &out)
{ {

View File

@@ -114,7 +114,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// All legs kernels ; comms then compute // All legs kernels ; comms then compute
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U, void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
@@ -140,7 +140,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDag(StencilView &st, DoubledGaugeFieldV
coalescedWrite(out[sF],result,lane); coalescedWrite(out[sF],result,lane);
}; };
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U, void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
@@ -169,7 +169,7 @@ void WilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Interior kernels // Interior kernels
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U, void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
@@ -197,7 +197,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st, DoubledGaugeFi
coalescedWrite(out[sF], result,lane); coalescedWrite(out[sF], result,lane);
}; };
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U, void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
@@ -227,7 +227,7 @@ void WilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st, DoubledGaugeField
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// Exterior kernels // Exterior kernels
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U, void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
@@ -258,7 +258,7 @@ void WilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st, DoubledGaugeFi
} }
}; };
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U, void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeFieldView &U,
SiteHalfSpinor *buf, int sF, SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out) int sU, const FermionFieldView &in, FermionFieldView &out)
@@ -290,7 +290,7 @@ void WilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st, DoubledGaugeField
}; };
#define DhopDirMacro(Dir,spProj,spRecon) \ #define DhopDirMacro(Dir,spProj,spRecon) \
template <class Impl> \ template <class Impl> accelerator_inline \
void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \ void WilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, \
int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \ int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
{ \ { \
@@ -318,7 +318,7 @@ DhopDirMacro(Ym,spProjYm,spReconYm);
DhopDirMacro(Zm,spProjZm,spReconZm); DhopDirMacro(Zm,spProjZm,spReconZm);
DhopDirMacro(Tm,spProjTm,spReconTm); DhopDirMacro(Tm,spProjTm,spReconTm);
template <class Impl> template <class Impl> accelerator_inline
void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF, void WilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteHalfSpinor *buf, int sF,
int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma) int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
{ {

View File

@@ -41,6 +41,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
namespace Grid { namespace Grid {
#if (!defined(GRID_CUDA)) && (!defined(GRID_HIP))
typedef struct { uint16_t x;} half;
#endif
typedef struct Half2_t { half x; half y; } Half2;
#define COALESCE_GRANULARITY ( GEN_SIMD_WIDTH ) #define COALESCE_GRANULARITY ( GEN_SIMD_WIDTH )
template<class pair> template<class pair>
@@ -125,14 +130,14 @@ inline accelerator GpuVector<N,datum> operator/(const GpuVector<N,datum> l,const
} }
constexpr int NSIMD_RealH = COALESCE_GRANULARITY / sizeof(half); constexpr int NSIMD_RealH = COALESCE_GRANULARITY / sizeof(half);
constexpr int NSIMD_ComplexH = COALESCE_GRANULARITY / sizeof(half2); constexpr int NSIMD_ComplexH = COALESCE_GRANULARITY / sizeof(Half2);
constexpr int NSIMD_RealF = COALESCE_GRANULARITY / sizeof(float); constexpr int NSIMD_RealF = COALESCE_GRANULARITY / sizeof(float);
constexpr int NSIMD_ComplexF = COALESCE_GRANULARITY / sizeof(float2); constexpr int NSIMD_ComplexF = COALESCE_GRANULARITY / sizeof(float2);
constexpr int NSIMD_RealD = COALESCE_GRANULARITY / sizeof(double); constexpr int NSIMD_RealD = COALESCE_GRANULARITY / sizeof(double);
constexpr int NSIMD_ComplexD = COALESCE_GRANULARITY / sizeof(double2); constexpr int NSIMD_ComplexD = COALESCE_GRANULARITY / sizeof(double2);
constexpr int NSIMD_Integer = COALESCE_GRANULARITY / sizeof(Integer); constexpr int NSIMD_Integer = COALESCE_GRANULARITY / sizeof(Integer);
typedef GpuComplex<half2 > GpuComplexH; typedef GpuComplex<Half2 > GpuComplexH;
typedef GpuComplex<float2 > GpuComplexF; typedef GpuComplex<float2 > GpuComplexF;
typedef GpuComplex<double2> GpuComplexD; typedef GpuComplex<double2> GpuComplexD;
@@ -147,11 +152,9 @@ typedef GpuVector<NSIMD_Integer, Integer > GpuVectorI;
accelerator_inline float half2float(half h) accelerator_inline float half2float(half h)
{ {
float f; float f;
#ifdef GRID_SIMT #if defined(GRID_CUDA) || defined(GRID_HIP)
f = __half2float(h); f = __half2float(h);
#else #else
//f = __half2float(h);
__half_raw hr(h);
Grid_half hh; Grid_half hh;
hh.x = hr.x; hh.x = hr.x;
f= sfw_half_to_float(hh); f= sfw_half_to_float(hh);
@@ -161,13 +164,11 @@ accelerator_inline float half2float(half h)
accelerator_inline half float2half(float f) accelerator_inline half float2half(float f)
{ {
half h; half h;
#ifdef GRID_SIMT #if defined(GRID_CUDA) || defined(GRID_HIP)
h = __float2half(f); h = __float2half(f);
#else #else
Grid_half hh = sfw_float_to_half(f); Grid_half hh = sfw_float_to_half(f);
__half_raw hr; h.x = hh.x;
hr.x = hh.x;
h = __half(hr);
#endif #endif
return h; return h;
} }
@@ -523,7 +524,7 @@ namespace Optimization {
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
// Single / Half // Single / Half
//////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////
static accelerator_inline GpuVectorCH StoH (GpuVectorCF a,GpuVectorCF b) { static accelerator_inline GpuVectorCH StoH (GpuVectorCF a,GpuVectorCF b) {
int N = GpuVectorCF::N; int N = GpuVectorCF::N;
GpuVectorCH h; GpuVectorCH h;
for(int i=0;i<N;i++) { for(int i=0;i<N;i++) {

View File

@@ -55,6 +55,7 @@ void acceleratorInit(void)
printf("AcceleratorCudaInit[%d]: ========================\n",rank); printf("AcceleratorCudaInit[%d]: ========================\n",rank);
printf("AcceleratorCudaInit[%d]: Device identifier: %s\n",rank, prop.name); printf("AcceleratorCudaInit[%d]: Device identifier: %s\n",rank, prop.name);
GPU_PROP_FMT(totalGlobalMem,"%lld"); GPU_PROP_FMT(totalGlobalMem,"%lld");
GPU_PROP(managedMemory); GPU_PROP(managedMemory);
GPU_PROP(isMultiGpuBoard); GPU_PROP(isMultiGpuBoard);
@@ -109,20 +110,24 @@ void acceleratorInit(void)
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);} if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
printf("world_rank %d has %d devices\n",world_rank,nDevices);
size_t totalDeviceMem=0;
for (int i = 0; i < nDevices; i++) { for (int i = 0; i < nDevices; i++) {
#define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory); #define GPU_PROP_FMT(canMapHostMemory,FMT) printf("AcceleratorHipInit: " #canMapHostMemory ": " FMT" \n",prop.canMapHostMemory);
#define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d"); #define GPU_PROP(canMapHostMemory) GPU_PROP_FMT(canMapHostMemory,"%d");
hipGetDeviceProperties(&gpu_props[i], i); hipGetDeviceProperties(&gpu_props[i], i);
hipDeviceProp_t prop;
prop = gpu_props[i];
totalDeviceMem = prop.totalGlobalMem;
if ( world_rank == 0) { if ( world_rank == 0) {
hipDeviceProp_t prop;
prop = gpu_props[i];
printf("AcceleratorHipInit: ========================\n"); printf("AcceleratorHipInit: ========================\n");
printf("AcceleratorHipInit: Device Number : %d\n", i); printf("AcceleratorHipInit: Device Number : %d\n", i);
printf("AcceleratorHipInit: ========================\n"); printf("AcceleratorHipInit: ========================\n");
printf("AcceleratorHipInit: Device identifier: %s\n", prop.name); printf("AcceleratorHipInit: Device identifier: %s\n", prop.name);
GPU_PROP_FMT(totalGlobalMem,"%lu");
// GPU_PROP(managedMemory); // GPU_PROP(managedMemory);
GPU_PROP(isMultiGpuBoard); GPU_PROP(isMultiGpuBoard);
GPU_PROP(warpSize); GPU_PROP(warpSize);
@@ -131,6 +136,7 @@ void acceleratorInit(void)
// GPU_PROP(singleToDoublePrecisionPerfRatio); // GPU_PROP(singleToDoublePrecisionPerfRatio);
} }
} }
MemoryManager::DeviceMaxBytes = (8*totalDeviceMem)/10; // Assume 80% ours
#undef GPU_PROP_FMT #undef GPU_PROP_FMT
#undef GPU_PROP #undef GPU_PROP
#ifdef GRID_IBM_SUMMIT #ifdef GRID_IBM_SUMMIT

View File

@@ -307,17 +307,13 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
inline void *acceleratorAllocShared(size_t bytes) inline void *acceleratorAllocShared(size_t bytes)
{ {
#if 0
void *ptr=NULL; void *ptr=NULL;
auto err = hipMallocManaged((void **)&ptr,bytes); auto err = hipMallocManaged((void **)&ptr,bytes);
if( err != hipSuccess ) { if( err != hipSuccess ) {
ptr = (void *) NULL; ptr = (void *) NULL;
printf(" hipMallocManaged failed for %d %s \n",bytes,hipGetErrorString(err)); printf(" hipMallocManaged failed for %ld %s \n",bytes,hipGetErrorString(err));
} }
return ptr; return ptr;
#else
return malloc(bytes);
#endif
}; };
inline int acceleratorIsCommunicable(void *ptr){ return 1; } inline int acceleratorIsCommunicable(void *ptr){ return 1; }
@@ -327,7 +323,7 @@ inline void *acceleratorAllocDevice(size_t bytes)
auto err = hipMalloc((void **)&ptr,bytes); auto err = hipMalloc((void **)&ptr,bytes);
if( err != hipSuccess ) { if( err != hipSuccess ) {
ptr = (void *) NULL; ptr = (void *) NULL;
printf(" hipMalloc failed for %d %s \n",bytes,hipGetErrorString(err)); printf(" hipMalloc failed for %ld %s \n",bytes,hipGetErrorString(err));
} }
return ptr; return ptr;
}; };

33
README
View File

@@ -111,11 +111,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
``` bash ``` bash
mkdir build; cd build mkdir build; cd build
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path> ../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
``` ```
where `--enable-precision=` set the default precision, where `--enable-simd=` set the SIMD type, `--enable-
`--enable-simd=` set the SIMD type, `--enable-
comms=`, and `<path>` should be replaced by the prefix path where you want to comms=`, and `<path>` should be replaced by the prefix path where you want to
install Grid. Other options are detailed in the next section, you can also use `configure install Grid. Other options are detailed in the next section, you can also use `configure
--help` to display them. Like with any other program using GNU autotool, the --help` to display them. Like with any other program using GNU autotool, the
@@ -146,8 +145,8 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-numa`: enable NUMA first touch optimisation - `--enable-numa`: enable NUMA first touch optimisation
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. - `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`). - `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option**
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers. - `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests. - `--enable-chroma`: enable Chroma regression tests.
@@ -201,8 +200,7 @@ Alternatively, some CPU codenames can be directly used:
The following configuration is recommended for the Intel Knights Landing platform: The following configuration is recommended for the Intel Knights Landing platform:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi-auto \ --enable-comms=mpi-auto \
--enable-mkl \ --enable-mkl \
CXX=icpc MPICXX=mpiicpc CXX=icpc MPICXX=mpiicpc
@@ -212,8 +210,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -232,8 +229,7 @@ for interior communication. This is the mpi3 communications implementation.
We recommend four ranks per node for best performance, but optimum is local volume dependent. We recommend four ranks per node for best performance, but optimum is local volume dependent.
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi3-auto \ --enable-comms=mpi3-auto \
--enable-mkl \ --enable-mkl \
CC=icpc MPICXX=mpiicpc CC=icpc MPICXX=mpiicpc
@@ -244,8 +240,7 @@ We recommend four ranks per node for best performance, but optimum is local volu
The following configuration is recommended for the Intel Haswell platform: The following configuration is recommended for the Intel Haswell platform:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi3-auto \ --enable-comms=mpi3-auto \
--enable-mkl \ --enable-mkl \
CXX=icpc MPICXX=mpiicpc CXX=icpc MPICXX=mpiicpc
@@ -262,8 +257,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -280,8 +274,7 @@ This is the default.
The following configuration is recommended for the Intel Skylake platform: The following configuration is recommended for the Intel Skylake platform:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX512 \
--enable-simd=AVX512 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
--enable-mkl \ --enable-mkl \
CXX=mpiicpc CXX=mpiicpc
@@ -298,8 +291,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX512 \
--enable-simd=AVX512 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -330,8 +322,7 @@ and 8 threads per rank.
The following configuration is recommended for the AMD EPYC platform. The following configuration is recommended for the AMD EPYC platform.
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
CXX=mpicxx CXX=mpicxx
``` ```

View File

@@ -115,11 +115,10 @@ Now you can execute the `configure` script to generate makefiles (here from a bu
``` bash ``` bash
mkdir build; cd build mkdir build; cd build
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path> ../configure --enable-simd=AVX --enable-comms=mpi-auto --prefix=<path>
``` ```
where `--enable-precision=` set the default precision, where `--enable-simd=` set the SIMD type, `--enable-
`--enable-simd=` set the SIMD type, `--enable-
comms=`, and `<path>` should be replaced by the prefix path where you want to comms=`, and `<path>` should be replaced by the prefix path where you want to
install Grid. Other options are detailed in the next section, you can also use `configure install Grid. Other options are detailed in the next section, you can also use `configure
--help` to display them. Like with any other program using GNU autotool, the --help` to display them. Like with any other program using GNU autotool, the
@@ -150,8 +149,8 @@ If you want to build all the tests at once just use `make tests`.
- `--enable-numa`: enable NUMA first touch optimisation - `--enable-numa`: enable NUMA first touch optimisation
- `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below. - `--enable-simd=<code>`: setup Grid for the SIMD target `<code>` (default: `GEN`). A list of possible SIMD targets is detailed in a section below.
- `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). - `--enable-gen-simd-width=<size>`: select the size (in bytes) of the generic SIMD vector type (default: 32 bytes).
- `--enable-precision={single|double}`: set the default precision (default: `double`). - `--enable-precision={single|double}`: set the default precision (default: `double`). **Deprecated option**
- `--enable-precision=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below. - `--enable-comms=<comm>`: Use `<comm>` for message passing (default: `none`). A list of possible SIMD targets is detailed in a section below.
- `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `). - `--enable-rng={sitmo|ranlux48|mt19937}`: choose the RNG (default: `sitmo `).
- `--disable-timers`: disable system dependent high-resolution timers. - `--disable-timers`: disable system dependent high-resolution timers.
- `--enable-chroma`: enable Chroma regression tests. - `--enable-chroma`: enable Chroma regression tests.
@@ -205,8 +204,7 @@ Alternatively, some CPU codenames can be directly used:
The following configuration is recommended for the Intel Knights Landing platform: The following configuration is recommended for the Intel Knights Landing platform:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi-auto \ --enable-comms=mpi-auto \
--enable-mkl \ --enable-mkl \
CXX=icpc MPICXX=mpiicpc CXX=icpc MPICXX=mpiicpc
@@ -216,8 +214,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -236,8 +233,7 @@ for interior communication. This is the mpi3 communications implementation.
We recommend four ranks per node for best performance, but optimum is local volume dependent. We recommend four ranks per node for best performance, but optimum is local volume dependent.
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi3-auto \ --enable-comms=mpi3-auto \
--enable-mkl \ --enable-mkl \
CC=icpc MPICXX=mpiicpc CC=icpc MPICXX=mpiicpc
@@ -248,8 +244,7 @@ We recommend four ranks per node for best performance, but optimum is local volu
The following configuration is recommended for the Intel Haswell platform: The following configuration is recommended for the Intel Haswell platform:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi3-auto \ --enable-comms=mpi3-auto \
--enable-mkl \ --enable-mkl \
CXX=icpc MPICXX=mpiicpc CXX=icpc MPICXX=mpiicpc
@@ -266,8 +261,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -284,8 +278,7 @@ This is the default.
The following configuration is recommended for the Intel Skylake platform: The following configuration is recommended for the Intel Skylake platform:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX512 \
--enable-simd=AVX512 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
--enable-mkl \ --enable-mkl \
CXX=mpiicpc CXX=mpiicpc
@@ -302,8 +295,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX512 \
--enable-simd=AVX512 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -334,8 +326,7 @@ and 8 threads per rank.
The following configuration is recommended for the AMD EPYC platform. The following configuration is recommended for the AMD EPYC platform.
``` bash ``` bash
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi3 \ --enable-comms=mpi3 \
CXX=mpicxx CXX=mpicxx
``` ```

View File

@@ -12,31 +12,31 @@ module load mpi/openmpi-aarch64
scl enable gcc-toolset-10 bash scl enable gcc-toolset-10 bash
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++ CC=gcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
* gcc 10.1 prebuild w/ MPI, QPACE4 interactive login * gcc 10.1 prebuild w/ MPI, QPACE4 interactive login
scl enable gcc-toolset-10 bash scl enable gcc-toolset-10 bash
module load mpi/openmpi-aarch64 module load mpi/openmpi-aarch64
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi-auto --enable-shm=shmget --enable-openmp CXX=mpicxx CC=mpicc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
* armclang 20.2 (qp4) * armclang 20.2 (qp4)
../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN" ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DARMCLANGCOMPAT -DA64FXASM -DDSLASHINTRIN"
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
* gcc 10.0.1 VLA (merlin) * gcc 10.0.1 VLA (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static ../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
* gcc 10.0.1 fixed-size ACLE (merlin) * gcc 10.0.1 fixed-size ACLE (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN" ../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=g++-10.0.1 CC=gcc-10.0.1 CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN"
* gcc 10.0.1 fixed-size ACLE (fjt) w/ MPI * gcc 10.0.1 fixed-size ACLE (fjt) w/ MPI
@@ -46,34 +46,34 @@ export OMPI_CXX=g++-10.0.1
export MPICH_CC=gcc-10.0.1 export MPICH_CC=gcc-10.0.1
export MPICH_CXX=g++-10.0.1 export MPICH_CXX=g++-10.0.1
$ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt" $ ../configure --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -march=armv8-a+sve -msve-vector-bits=512 -fno-gcse -DA64FXFIXEDSIZE -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64 -lrt"
-------------------------------------------------------- --------------------------------------------------------
* armclang 20.0 VLA (merlin) * armclang 20.0 VLA (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static ../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -fno-unroll-loops -mllvm -vectorizer-min-trip-count=2 -march=armv8-a+sve -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
TODO check ARMCLANGCOMPAT TODO check ARMCLANGCOMPAT
* armclang 20.1 VLA (merlin) * armclang 20.1 VLA (merlin)
../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static ../configure --with-lime=/home/men04359/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN" LDFLAGS=-static GRID_LDFLAGS=-static MPI_CXXLDFLAGS=-static
TODO check ARMCLANGCOMPAT TODO check ARMCLANGCOMPAT
* armclang 20.1 VLA (fjt cluster) * armclang 20.1 VLA (fjt cluster)
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU" ../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp CXX=armclang++ CC=armclang CXXFLAGS="-std=c++11 -mcpu=a64fx -DARMCLANGCOMPAT -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"
TODO check ARMCLANGCOMPAT TODO check ARMCLANGCOMPAT
* armclang 20.1 VLA w/MPI (fjt cluster) * armclang 20.1 VLA w/MPI (fjt cluster)
../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64" ../configure --with-lime=$HOME/local --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi3 --enable-openmp CXX=mpiFCC CC=mpifcc CXXFLAGS="-std=c++11 -mcpu=a64fx -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU -I/opt/FJSVxtclanga/tcsds-1.2.25/include/mpi/fujitsu -lrt" LDFLAGS="-L/opt/FJSVxtclanga/tcsds-1.2.25/lib64"
No ARMCLANGCOMPAT -> still correct ? No ARMCLANGCOMPAT -> still correct ?
@@ -81,9 +81,9 @@ No ARMCLANGCOMPAT -> still correct ?
* Fujitsu fcc * Fujitsu fcc
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN" ../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=none --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=FCC CC=fcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN"
* Fujitsu fcc w/ MPI * Fujitsu fcc w/ MPI
../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-precision=double --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU" ../configure --with-lime=$HOME/grid-a64fx/lime/c-lime --without-hdf5 --enable-gen-simd-width=64 --enable-simd=GEN --enable-comms=mpi --enable-openmp --with-mpfr=/home/users/gre/gre-1/grid-a64fx/mpfr-build/install CXX=mpiFCC CC=mpifcc CXXFLAGS="-Nclang -Kfast -DA64FX -DA64FXASM -DDSLASHINTRIN -DTOFU"

View File

@@ -1,8 +1,16 @@
#include "Benchmark_IO.hpp" #include "Benchmark_IO.hpp"
#ifndef BENCH_IO_LMIN
#define BENCH_IO_LMIN 8
#endif
#ifndef BENCH_IO_LMAX #ifndef BENCH_IO_LMAX
#define BENCH_IO_LMAX 40 #define BENCH_IO_LMAX 32
#endif
#ifndef BENCH_IO_NPASS
#define BENCH_IO_NPASS 10
#endif #endif
using namespace Grid; using namespace Grid;
@@ -12,37 +20,179 @@ std::string filestem(const int l)
return "iobench_l" + std::to_string(l); return "iobench_l" + std::to_string(l);
} }
int vol(const int i)
{
return BENCH_IO_LMIN + 2*i;
}
int volInd(const int l)
{
return (l - BENCH_IO_LMIN)/2;
}
template <typename Mat>
void stats(Mat &mean, Mat &stdDev, const std::vector<Mat> &data)
{
auto nr = data[0].rows(), nc = data[0].cols();
Eigen::MatrixXd sqSum(nr, nc);
double n = static_cast<double>(data.size());
assert(n > 1.);
mean = Mat::Zero(nr, nc);
sqSum = Mat::Zero(nr, nc);
for (auto &d: data)
{
mean += d;
sqSum += d.cwiseProduct(d);
}
stdDev = ((sqSum - mean.cwiseProduct(mean)/n)/(n - 1.)).cwiseSqrt();
mean /= n;
}
#define grid_printf(...) \
{\
char _buf[1024];\
sprintf(_buf, __VA_ARGS__);\
MSG << _buf;\
}
enum {sRead = 0, sWrite = 1, gRead = 2, gWrite = 3};
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
#ifdef HAVE_LIME
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
int64_t threads = GridThread::GetThreads(); int64_t threads = GridThread::GetThreads();
auto mpi = GridDefaultMpi();
unsigned int nVol = (BENCH_IO_LMAX - BENCH_IO_LMIN)/2 + 1;
unsigned int nRelVol = (BENCH_IO_LMAX - 24)/2 + 1;
std::vector<Eigen::MatrixXd> perf(BENCH_IO_NPASS, Eigen::MatrixXd::Zero(nVol, 4));
std::vector<Eigen::VectorXd> avPerf(BENCH_IO_NPASS, Eigen::VectorXd::Zero(4));
std::vector<int> latt;
MSG << "Grid is setup to use " << threads << " threads" << std::endl; MSG << "Grid is setup to use " << threads << " threads" << std::endl;
MSG << SEP << std::endl; MSG << "MPI partition " << mpi << std::endl;
MSG << "Benchmark Lime write" << std::endl; for (unsigned int i = 0; i < BENCH_IO_NPASS; ++i)
MSG << SEP << std::endl;
for (int l = 4; l <= BENCH_IO_LMAX; l += 2)
{ {
auto mpi = GridDefaultMpi(); MSG << BIGSEP << std::endl;
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]}; MSG << "Pass " << i + 1 << "/" << BENCH_IO_NPASS << std::endl;
MSG << BIGSEP << std::endl;
MSG << SEP << std::endl;
MSG << "Benchmark std write" << std::endl;
MSG << SEP << std::endl;
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
{
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
std::cout << "-- Local volume " << l << "^4" << std::endl; MSG << "-- Local volume " << l << "^4" << std::endl;
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>); writeBenchmark<LatticeFermion>(latt, filestem(l), stdWrite<LatticeFermion>);
perf[i](volInd(l), sWrite) = BinaryIO::lastPerf.mbytesPerSecond;
}
MSG << SEP << std::endl;
MSG << "Benchmark std read" << std::endl;
MSG << SEP << std::endl;
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
{
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
MSG << "-- Local volume " << l << "^4" << std::endl;
readBenchmark<LatticeFermion>(latt, filestem(l), stdRead<LatticeFermion>);
perf[i](volInd(l), sRead) = BinaryIO::lastPerf.mbytesPerSecond;
}
#ifdef HAVE_LIME
MSG << SEP << std::endl;
MSG << "Benchmark Grid C-Lime write" << std::endl;
MSG << SEP << std::endl;
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
{
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
MSG << "-- Local volume " << l << "^4" << std::endl;
writeBenchmark<LatticeFermion>(latt, filestem(l), limeWrite<LatticeFermion>);
perf[i](volInd(l), gWrite) = BinaryIO::lastPerf.mbytesPerSecond;
}
MSG << SEP << std::endl;
MSG << "Benchmark Grid C-Lime read" << std::endl;
MSG << SEP << std::endl;
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
{
latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
MSG << "-- Local volume " << l << "^4" << std::endl;
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>);
perf[i](volInd(l), gRead) = BinaryIO::lastPerf.mbytesPerSecond;
}
#endif
avPerf[i].fill(0.);
for (int f = 0; f < 4; ++f)
for (int l = 24; l <= BENCH_IO_LMAX; l += 2)
{
avPerf[i](f) += perf[i](volInd(l), f);
}
avPerf[i] /= nRelVol;
} }
MSG << "Benchmark Lime read" << std::endl; Eigen::MatrixXd mean(nVol, 4), stdDev(nVol, 4), rob(nVol, 4);
MSG << SEP << std::endl; Eigen::VectorXd avMean(4), avStdDev(4), avRob(4);
for (int l = 4; l <= BENCH_IO_LMAX; l += 2) double n = BENCH_IO_NPASS;
{
auto mpi = GridDefaultMpi();
std::vector<int> latt = {l*mpi[0], l*mpi[1], l*mpi[2], l*mpi[3]};
std::cout << "-- Local volume " << l << "^4" << std::endl; stats(mean, stdDev, perf);
readBenchmark<LatticeFermion>(latt, filestem(l), limeRead<LatticeFermion>); stats(avMean, avStdDev, avPerf);
rob.fill(100.);
rob -= 100.*stdDev.cwiseQuotient(mean.cwiseAbs());
avRob.fill(100.);
avRob -= 100.*avStdDev.cwiseQuotient(avMean.cwiseAbs());
MSG << BIGSEP << std::endl;
MSG << "SUMMARY" << std::endl;
MSG << BIGSEP << std::endl;
MSG << "Summary of individual results (all results in MB/s)." << std::endl;
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
MSG << std::endl;
grid_printf("%4s %12s %12s %12s %12s %12s %12s %12s %12s\n",
"L", "std read", "std dev", "std write", "std dev",
"Grid read", "std dev", "Grid write", "std dev");
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
{
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
l, mean(volInd(l), sRead), stdDev(volInd(l), sRead),
mean(volInd(l), sWrite), stdDev(volInd(l), sWrite),
mean(volInd(l), gRead), stdDev(volInd(l), gRead),
mean(volInd(l), gWrite), stdDev(volInd(l), gWrite));
} }
MSG << std::endl;
MSG << "Robustness of individual results, in \%. (rob = 100\% - std dev / mean)" << std::endl;
MSG << std::endl;
grid_printf("%4s %12s %12s %12s %12s\n",
"L", "std read", "std write", "Grid read", "Grid write");
for (int l = BENCH_IO_LMIN; l <= BENCH_IO_LMAX; l += 2)
{
grid_printf("%4d %12.1f %12.1f %12.1f %12.1f\n",
l, rob(volInd(l), sRead), rob(volInd(l), sWrite),
rob(volInd(l), gRead), rob(volInd(l), gWrite));
}
MSG << std::endl;
MSG << "Summary of results averaged over local volumes 24^4-" << BENCH_IO_LMAX << "^4 (all results in MB/s)." << std::endl;
MSG << "Every second colum gives the standard deviation of the previous column." << std::endl;
MSG << std::endl;
grid_printf("%12s %12s %12s %12s %12s %12s %12s %12s\n",
"std read", "std dev", "std write", "std dev",
"Grid read", "std dev", "Grid write", "std dev");
grid_printf("%12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f %12.1f\n",
avMean(sRead), avStdDev(sRead), avMean(sWrite), avStdDev(sWrite),
avMean(gRead), avStdDev(gRead), avMean(gWrite), avStdDev(gWrite));
MSG << std::endl;
MSG << "Robustness of volume-averaged results, in \%. (rob = 100\% - std dev / mean)" << std::endl;
MSG << std::endl;
grid_printf("%12s %12s %12s %12s\n",
"std read", "std write", "Grid read", "Grid write");
grid_printf("%12.1f %12.1f %12.1f %12.1f\n",
avRob(sRead), avRob(sWrite), avRob(gRead), avRob(gWrite));
Grid_finalize(); Grid_finalize();
#endif
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@@ -5,6 +5,8 @@
#ifdef HAVE_LIME #ifdef HAVE_LIME
#define MSG std::cout << GridLogMessage #define MSG std::cout << GridLogMessage
#define SEP \ #define SEP \
"-----------------------------------------------------------------------------"
#define BIGSEP \
"=============================================================================" "============================================================================="
namespace Grid { namespace Grid {
@@ -14,13 +16,152 @@ using WriterFn = std::function<void(const std::string, Field &)> ;
template <typename Field> template <typename Field>
using ReaderFn = std::function<void(Field &, const std::string)>; using ReaderFn = std::function<void(Field &, const std::string)>;
// AP 06/10/2020: Standard C version in case one is suspicious of the C++ API
//
// template <typename Field>
// void stdWrite(const std::string filestem, Field &vec)
// {
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "wb");
// size_t size;
// uint32_t crc;
// GridStopWatch ioWatch, crcWatch;
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
// autoView(vec_v, vec, CpuRead);
// crcWatch.Start();
// crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
// std::fwrite(&crc, sizeof(uint32_t), 1, file);
// crcWatch.Stop();
// MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
// ioWatch.Start();
// std::fwrite(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
// ioWatch.Stop();
// std::fclose(file);
// size *= vec.Grid()->ProcessorCount();
// auto &p = BinaryIO::lastPerf;
// p.size = size;
// p.time = ioWatch.useconds();
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
// MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
// MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
// }
//
// template <typename Field>
// void stdRead(Field &vec, const std::string filestem)
// {
// std::string rankStr = std::to_string(vec.Grid()->ThisRank());
// std::FILE *file = std::fopen((filestem + "." + rankStr + ".bin").c_str(), "rb");
// size_t size;
// uint32_t crcRead, crcData;
// GridStopWatch ioWatch, crcWatch;
// size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
// crcWatch.Start();
// std::fread(&crcRead, sizeof(uint32_t), 1, file);
// crcWatch.Stop();
// {
// autoView(vec_v, vec, CpuWrite);
// ioWatch.Start();
// std::fread(vec_v.cpu_ptr, sizeof(typename Field::scalar_object), vec.Grid()->lSites(), file);
// ioWatch.Stop();
// std::fclose(file);
// }
// {
// autoView(vec_v, vec, CpuRead);
// crcWatch.Start();
// crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
// crcWatch.Stop();
// }
// MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
// assert(crcData == crcRead);
// size *= vec.Grid()->ProcessorCount();
// auto &p = BinaryIO::lastPerf;
// p.size = size;
// p.time = ioWatch.useconds();
// p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
// MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
// << ", " << p.mbytesPerSecond << " MB/s" << std::endl;
// MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
// }
template <typename Field>
void stdWrite(const std::string filestem, Field &vec)
{
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
std::ofstream file(filestem + "." + rankStr + ".bin", std::ios::out | std::ios::binary);
size_t size, sizec;
uint32_t crc;
GridStopWatch ioWatch, crcWatch;
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
sizec = size/sizeof(char); // just in case of...
autoView(vec_v, vec, CpuRead);
crcWatch.Start();
crc = GridChecksum::crc32(vec_v.cpu_ptr, size);
file.write(reinterpret_cast<char *>(&crc), sizeof(uint32_t)/sizeof(char));
crcWatch.Stop();
MSG << "Std I/O write: Data CRC32 " << std::hex << crc << std::dec << std::endl;
ioWatch.Start();
file.write(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
file.flush();
ioWatch.Stop();
size *= vec.Grid()->ProcessorCount();
auto &p = BinaryIO::lastPerf;
p.size = size;
p.time = ioWatch.useconds();
p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
MSG << "Std I/O write: Wrote " << p.size << " bytes in " << ioWatch.Elapsed()
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
MSG << "Std I/O write: checksum overhead " << crcWatch.Elapsed() << std::endl;
}
template <typename Field>
void stdRead(Field &vec, const std::string filestem)
{
std::string rankStr = std::to_string(vec.Grid()->ThisRank());
std::ifstream file(filestem + "." + rankStr + ".bin", std::ios::in | std::ios::binary);
size_t size, sizec;
uint32_t crcRead, crcData;
GridStopWatch ioWatch, crcWatch;
size = vec.Grid()->lSites()*sizeof(typename Field::scalar_object);
sizec = size/sizeof(char); // just in case of...
crcWatch.Start();
file.read(reinterpret_cast<char *>(&crcRead), sizeof(uint32_t)/sizeof(char));
crcWatch.Stop();
{
autoView(vec_v, vec, CpuWrite);
ioWatch.Start();
file.read(reinterpret_cast<char *>(vec_v.cpu_ptr), sizec);
ioWatch.Stop();
}
{
autoView(vec_v, vec, CpuRead);
crcWatch.Start();
crcData = GridChecksum::crc32(vec_v.cpu_ptr, size);
crcWatch.Stop();
}
MSG << "Std I/O read: Data CRC32 " << std::hex << crcData << std::dec << std::endl;
assert(crcData == crcRead);
size *= vec.Grid()->ProcessorCount();
auto &p = BinaryIO::lastPerf;
p.size = size;
p.time = ioWatch.useconds();
p.mbytesPerSecond = size/1024./1024./(ioWatch.useconds()/1.e6);
MSG << "Std I/O read: Read " << p.size << " bytes in " << ioWatch.Elapsed()
<< ", " << p.mbytesPerSecond << " MB/s" << std::endl;
MSG << "Std I/O read: checksum overhead " << crcWatch.Elapsed() << std::endl;
}
template <typename Field> template <typename Field>
void limeWrite(const std::string filestem, Field &vec) void limeWrite(const std::string filestem, Field &vec)
{ {
emptyUserRecord record; emptyUserRecord record;
ScidacWriter binWriter(vec.Grid()->IsBoss()); ScidacWriter binWriter(vec.Grid()->IsBoss());
binWriter.open(filestem + ".bin"); binWriter.open(filestem + ".lime.bin");
binWriter.writeScidacFieldRecord(vec, record); binWriter.writeScidacFieldRecord(vec, record);
binWriter.close(); binWriter.close();
} }
@@ -31,7 +172,7 @@ void limeRead(Field &vec, const std::string filestem)
emptyUserRecord record; emptyUserRecord record;
ScidacReader binReader; ScidacReader binReader;
binReader.open(filestem + ".bin"); binReader.open(filestem + ".lime.bin");
binReader.readScidacFieldRecord(vec, record); binReader.readScidacFieldRecord(vec, record);
binReader.close(); binReader.close();
} }
@@ -73,12 +214,18 @@ void writeBenchmark(const Coordinate &latt, const std::string filename,
auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd()); auto simd = GridDefaultSimd(latt.size(), Field::vector_type::Nsimd());
std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi)); std::shared_ptr<GridCartesian> gBasePt(SpaceTimeGrid::makeFourDimGrid(latt, simd, mpi));
std::shared_ptr<GridBase> gPt; std::shared_ptr<GridBase> gPt;
std::random_device rd;
makeGrid(gPt, gBasePt, Ls, rb); makeGrid(gPt, gBasePt, Ls, rb);
GridBase *g = gPt.get(); GridBase *g = gPt.get();
GridParallelRNG rng(g); GridParallelRNG rng(g);
Field vec(g); Field vec(g);
rng.SeedFixedIntegers({static_cast<int>(rd()), static_cast<int>(rd()),
static_cast<int>(rd()), static_cast<int>(rd()),
static_cast<int>(rd()), static_cast<int>(rd()),
static_cast<int>(rd()), static_cast<int>(rd())});
random(rng, vec); random(rng, vec);
write(filename, vec); write(filename, vec);
@@ -96,8 +243,8 @@ void readBenchmark(const Coordinate &latt, const std::string filename,
makeGrid(gPt, gBasePt, Ls, rb); makeGrid(gPt, gBasePt, Ls, rb);
GridBase *g = gPt.get(); GridBase *g = gPt.get();
Field vec(g); Field vec(g);
read(vec, filename); read(vec, filename);
} }

View File

@@ -1,14 +1,9 @@
#include "Benchmark_IO.hpp" #include "Benchmark_IO.hpp"
#define MSG std::cout << GridLogMessage
#define SEP \
"============================================================================="
using namespace Grid; using namespace Grid;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
#ifdef HAVE_LIME
std::vector<std::string> dir; std::vector<std::string> dir;
unsigned int Ls; unsigned int Ls;
bool rb; bool rb;
@@ -34,46 +29,71 @@ int main (int argc, char ** argv)
} }
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
int64_t threads = GridThread::GetThreads(); int64_t threads = GridThread::GetThreads();
auto mpi = GridDefaultMpi();
MSG << "Grid is setup to use " << threads << " threads" << std::endl; MSG << "Grid is setup to use " << threads << " threads" << std::endl;
MSG << SEP << std::endl; MSG << "MPI partition " << mpi << std::endl;
MSG << "Benchmark double precision Lime write" << std::endl;
MSG << SEP << std::endl;
for (auto &d: dir)
{
MSG << "-- Directory " << d << std::endl;
writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermion>, Ls, rb);
}
MSG << SEP << std::endl; MSG << SEP << std::endl;
MSG << "Benchmark double precision Lime read" << std::endl; MSG << "Benchmark Grid std write" << std::endl;
MSG << SEP << std::endl; MSG << SEP << std::endl;
for (auto &d: dir) for (auto &d: dir)
{ {
MSG << "-- Directory " << d << std::endl; MSG << "-- Directory " << d << std::endl;
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermion>, Ls, rb); writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
stdWrite<LatticeFermion>, Ls, rb);
}
MSG << SEP << std::endl;
MSG << "Benchmark Grid std read" << std::endl;
MSG << SEP << std::endl;
for (auto &d: dir)
{
MSG << "-- Directory " << d << std::endl;
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
stdRead<LatticeFermion>, Ls, rb);
} }
#ifdef HAVE_LIME
MSG << SEP << std::endl; MSG << SEP << std::endl;
MSG << "Benchmark single precision Lime write" << std::endl; MSG << "Benchmark Grid C-Lime write" << std::endl;
MSG << SEP << std::endl; MSG << SEP << std::endl;
for (auto &d: dir) for (auto &d: dir)
{ {
MSG << "-- Directory " << d << std::endl; MSG << "-- Directory " << d << std::endl;
writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb); writeBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
limeWrite<LatticeFermion>, Ls, rb);
} }
MSG << SEP << std::endl;
MSG << "Benchmark Grid C-Lime read" << std::endl;
MSG << SEP << std::endl;
for (auto &d: dir)
{
MSG << "-- Directory " << d << std::endl;
readBenchmark<LatticeFermion>(GridDefaultLatt(), d + "/ioBench",
limeRead<LatticeFermion>, Ls, rb);
}
#endif
MSG << SEP << std::endl; // MSG << SEP << std::endl;
MSG << "Benchmark single precision Lime read" << std::endl; // MSG << "Benchmark single precision Lime write" << std::endl;
MSG << SEP << std::endl; // MSG << SEP << std::endl;
for (auto &d: dir) // for (auto &d: dir)
{ // {
MSG << "-- Directory " << d << std::endl; // MSG << "-- Directory " << d << std::endl;
readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb); // writeBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeWrite<LatticeFermionF>, Ls, rb);
} // }
// MSG << SEP << std::endl;
// MSG << "Benchmark single precision Lime read" << std::endl;
// MSG << SEP << std::endl;
// for (auto &d: dir)
// {
// MSG << "-- Directory " << d << std::endl;
// readBenchmark<LatticeFermionF>(GridDefaultLatt(), d + "/ioBench", limeRead<LatticeFermionF>, Ls, rb);
// }
Grid_finalize(); Grid_finalize();
#endif
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@@ -1,4 +1,4 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
@@ -125,7 +125,7 @@ public:
lat*mpi_layout[1], lat*mpi_layout[1],
lat*mpi_layout[2], lat*mpi_layout[2],
lat*mpi_layout[3]}); lat*mpi_layout[3]});
std::cout << GridLogMessage<< latt_size <<std::endl;
GridCartesian Grid(latt_size,simd_layout,mpi_layout); GridCartesian Grid(latt_size,simd_layout,mpi_layout);
RealD Nrank = Grid._Nprocessors; RealD Nrank = Grid._Nprocessors;
RealD Nnode = Grid.NodeCount(); RealD Nnode = Grid.NodeCount();
@@ -137,8 +137,8 @@ public:
for(int d=0;d<8;d++){ for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); // bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
} }
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
@@ -202,6 +202,8 @@ public:
return; return;
} }
static void Memory(void) static void Memory(void)
{ {
const int Nvec=8; const int Nvec=8;
@@ -222,7 +224,7 @@ public:
uint64_t lmax=32; uint64_t lmax=32;
#define NLOOP (100*lmax*lmax*lmax*lmax/lat/lat/lat/lat) #define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=8){ for(int lat=8;lat<=lmax;lat+=8){
@@ -247,11 +249,6 @@ public:
double start=usecond(); double start=usecond();
for(int i=0;i<Nloop;i++){ for(int i=0;i<Nloop;i++){
z=a*x-y; z=a*x-y;
autoView( x_v , x, CpuWrite);
autoView( y_v , y, CpuWrite);
autoView( z_v , z, CpuRead);
x_v[0]=z_v[0]; // force serial dependency to prevent optimise away
y_v[4]=z_v[4];
} }
double stop=usecond(); double stop=usecond();
double time = (stop-start)/Nloop*1000; double time = (stop-start)/Nloop*1000;
@@ -266,6 +263,61 @@ public:
}; };
static void SU4(void)
{
const int Nc4=4;
typedef Lattice< iMatrix< vComplexF,Nc4> > LatticeSU4;
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking z = y*x SU(4) bandwidth"<<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<< "\t\tGB/s / node"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
uint64_t NN;
uint64_t lmax=32;
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=8){
Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol= latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
NN =Grid.NodeCount();
LatticeSU4 z(&Grid); z=Zero();
LatticeSU4 x(&Grid); x=Zero();
LatticeSU4 y(&Grid); y=Zero();
double a=2.0;
uint64_t Nloop=NLOOP;
double start=usecond();
for(int i=0;i<Nloop;i++){
z=x*y;
}
double stop=usecond();
double time = (stop-start)/Nloop*1000;
double flops=vol*Nc4*Nc4*(6+(Nc4-1)*8);// mul,add
double bytes=3.0*vol*Nc4*Nc4*2*sizeof(RealF);
std::cout<<GridLogMessage<<std::setprecision(3)
<< lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t"<<flops/time<<"\t\t"<<(stop-start)/1000./1000.
<< "\t\t"<< bytes/time/NN <<std::endl;
}
};
static double DWF(int Ls,int L) static double DWF(int Ls,int L)
{ {
RealD mass=0.1; RealD mass=0.1;
@@ -296,6 +348,7 @@ public:
///////// Welcome message //////////// ///////// Welcome message ////////////
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl; std::cout<<GridLogMessage << "Benchmark DWF on "<<L<<"^4 local volume "<<std::endl;
std::cout<<GridLogMessage << "* Nc : "<<Nc<<std::endl;
std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl; std::cout<<GridLogMessage << "* Global volume : "<<GridCmdVectorIntToString(latt4)<<std::endl;
std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl; std::cout<<GridLogMessage << "* Ls : "<<Ls<<std::endl;
std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl; std::cout<<GridLogMessage << "* ranks : "<<NP <<std::endl;
@@ -324,7 +377,7 @@ public:
typedef LatticeGaugeFieldF Gauge; typedef LatticeGaugeFieldF Gauge;
///////// Source preparation //////////// ///////// Source preparation ////////////
Gauge Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); Gauge Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
Fermion src (FGrid); random(RNG5,src); Fermion src (FGrid); random(RNG5,src);
Fermion src_e (FrbGrid); Fermion src_e (FrbGrid);
Fermion src_o (FrbGrid); Fermion src_o (FrbGrid);
@@ -369,7 +422,7 @@ public:
} }
FGrid->Barrier(); FGrid->Barrier();
double t1=usecond(); double t1=usecond();
uint64_t ncall = 50; uint64_t ncall = 500;
FGrid->Broadcast(0,&ncall,sizeof(ncall)); FGrid->Broadcast(0,&ncall,sizeof(ncall));
@@ -387,7 +440,13 @@ public:
FGrid->Barrier(); FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu]; double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(1344.0*volume)/2;
// Nc=3 gives
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
// double flops=(1344.0*volume)/2;
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
double flops=(fps*volume)/2;
double mf_hi, mf_lo, mf_err; double mf_hi, mf_lo, mf_err;
timestat.statistics(t_time); timestat.statistics(t_time);
@@ -402,6 +461,7 @@ public:
if ( mflops>mflops_best ) mflops_best = mflops; if ( mflops>mflops_best ) mflops_best = mflops;
if ( mflops<mflops_worst) mflops_worst= mflops; if ( mflops<mflops_worst) mflops_worst= mflops;
std::cout<<GridLogMessage<< "Deo FlopsPerSite is "<<fps<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
@@ -478,7 +538,7 @@ public:
typedef typename Action::FermionField Fermion; typedef typename Action::FermionField Fermion;
typedef LatticeGaugeFieldF Gauge; typedef LatticeGaugeFieldF Gauge;
Gauge Umu(FGrid); SU3::HotConfiguration(RNG4,Umu); Gauge Umu(FGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
typename Action::ImplParams params; typename Action::ImplParams params;
Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params); Action Ds(Umu,Umu,*FGrid,*FrbGrid,mass,c1,c2,u0,params);
@@ -596,11 +656,12 @@ int main (int argc, char ** argv)
#endif #endif
Benchmark::Decomposition(); Benchmark::Decomposition();
int do_su4=1;
int do_memory=1; int do_memory=1;
int do_comms =1; int do_comms =1;
int sel=2; int sel=4;
std::vector<int> L_list({16,24,32}); std::vector<int> L_list({8,12,16,24,32});
int selm1=sel-1; int selm1=sel-1;
std::vector<double> wilson; std::vector<double> wilson;
@@ -624,7 +685,6 @@ int main (int argc, char ** argv)
dwf4.push_back(result); dwf4.push_back(result);
} }
/*
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Improved Staggered dslash 4D vectorised" <<std::endl; std::cout<<GridLogMessage << " Improved Staggered dslash 4D vectorised" <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
@@ -632,14 +692,13 @@ int main (int argc, char ** argv)
double result = Benchmark::Staggered(L_list[l]) ; double result = Benchmark::Staggered(L_list[l]) ;
staggered.push_back(result); staggered.push_back(result);
} }
*/
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl; std::cout<<GridLogMessage << " Summary table Ls="<<Ls <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\tt Staggered" <<std::endl; std::cout<<GridLogMessage << "L \t\t Wilson \t\t DWF4 \t\tt Staggered" <<std::endl;
for(int l=0;l<L_list.size();l++){ for(int l=0;l<L_list.size();l++){
std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] <<std::endl; std::cout<<GridLogMessage << L_list[l] <<" \t\t "<< wilson[l]<<" \t\t "<<dwf4[l] << " \t\t "<< staggered[l]<<std::endl;
} }
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
@@ -651,6 +710,13 @@ int main (int argc, char ** argv)
Benchmark::Memory(); Benchmark::Memory();
} }
if ( do_su4 ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Memory benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
Benchmark::SU4();
}
if ( do_comms && (NN>1) ) { if ( do_comms && (NN>1) ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl; std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Communications benchmark " <<std::endl; std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;

View File

@@ -108,7 +108,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Drawing gauge field" << std::endl; std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl; std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
#if 0 #if 0
Umu=1.0; Umu=1.0;

View File

@@ -0,0 +1,364 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./benchmarks/Benchmark_dwf.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#ifdef GRID_CUDA
#define CUDA_PROFILE
#endif
#ifdef CUDA_PROFILE
#include <cuda_profiler_api.h>
#endif
using namespace std;
using namespace Grid;
template<class d>
struct scal {
d internal;
};
Gamma::Algebra Gmu [] = {
Gamma::Algebra::GammaX,
Gamma::Algebra::GammaY,
Gamma::Algebra::GammaZ,
Gamma::Algebra::GammaT
};
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
int threads = GridThread::GetThreads();
Coordinate latt4 = GridDefaultLatt();
int Ls=8;
for(int i=0;i<argc;i++)
if(std::string(argv[i]) == "-Ls"){
std::stringstream ss(argv[i+1]); ss >> Ls;
}
GridLogLayout();
long unsigned int single_site_flops = 8*Nc*(7+16*Nc);
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::cout << GridLogMessage << "Making s innermost grids"<<std::endl;
GridCartesian * sUGrid = SpaceTimeGrid::makeFourDimDWFGrid(GridDefaultLatt(),GridDefaultMpi());
GridRedBlackCartesian * sUrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(sUGrid);
GridCartesian * sFGrid = SpaceTimeGrid::makeFiveDimDWFGrid(Ls,UGrid);
GridRedBlackCartesian * sFrbGrid = SpaceTimeGrid::makeFiveDimDWFRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
std::cout << GridLogMessage << "Initialising 4d RNG" << std::endl;
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString(std::string("The 4D RNG"));
std::cout << GridLogMessage << "Initialising 5d RNG" << std::endl;
GridParallelRNG RNG5(FGrid); RNG5.SeedUniqueString(std::string("The 5D RNG"));
std::cout << GridLogMessage << "Initialised RNGs" << std::endl;
LatticeFermionF src (FGrid); random(RNG5,src);
#if 0
src = Zero();
{
Coordinate origin({0,0,0,latt4[2]-1,0});
SpinColourVectorF tmp;
tmp=Zero();
tmp()(0)(0)=Complex(-2.0,0.0);
std::cout << " source site 0 " << tmp<<std::endl;
pokeSite(tmp,src,origin);
}
#else
RealD N2 = 1.0/::sqrt(norm2(src));
src = src*N2;
#endif
LatticeFermionF result(FGrid); result=Zero();
LatticeFermionF ref(FGrid); ref=Zero();
LatticeFermionF tmp(FGrid);
LatticeFermionF err(FGrid);
std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeFieldF Umu(UGrid);
SU<Nc>::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
#if 0
Umu=1.0;
for(int mu=0;mu<Nd;mu++){
LatticeColourMatrixF ttmp(UGrid);
ttmp = PeekIndex<LorentzIndex>(Umu,mu);
// if (mu !=2 ) ttmp = 0;
// ttmp = ttmp* pow(10.0,mu);
PokeIndex<LorentzIndex>(Umu,ttmp,mu);
}
std::cout << GridLogMessage << "Forced to diagonal " << std::endl;
#endif
////////////////////////////////////
// Naive wilson implementation
////////////////////////////////////
// replicate across fifth dimension
LatticeGaugeFieldF Umu5d(FGrid);
std::vector<LatticeColourMatrixF> U(4,FGrid);
{
autoView( Umu5d_v, Umu5d, CpuWrite);
autoView( Umu_v , Umu , CpuRead);
for(int ss=0;ss<Umu.Grid()->oSites();ss++){
for(int s=0;s<Ls;s++){
Umu5d_v[Ls*ss+s] = Umu_v[ss];
}
}
}
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu5d,mu);
}
std::cout << GridLogMessage << "Setting up Cshift based reference " << std::endl;
if (1)
{
ref = Zero();
for(int mu=0;mu<Nd;mu++){
tmp = U[mu]*Cshift(src,mu+1,1);
ref=ref + tmp - Gamma(Gmu[mu])*tmp;
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1);
ref=ref + tmp + Gamma(Gmu[mu])*tmp;
}
ref = -0.5*ref;
}
RealD mass=0.1;
RealD M5 =1.8;
RealD NP = UGrid->_Nprocessors;
RealD NN = UGrid->NodeCount();
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Kernel options --dslash-generic, --dslash-unroll, --dslash-asm" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionR::Dhop "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
std::cout << GridLogMessage<< "* VComplexF size is "<<sizeof(vComplexF)<< " B"<<std::endl;
if ( sizeof(RealF)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(RealF)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*****************************************************************" <<std::endl;
DomainWallFermionF Dw(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int ncall =1000;
if (1) {
FGrid->Barrier();
Dw.ZeroCounters();
Dw.Dhop(src,result,0);
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=single_site_flops*volume*ncall;
auto nsimd = vComplex::Nsimd();
auto simdwidth = sizeof(vComplex);
// RF: Nd Wilson * Ls, Nd gauge * Ls, Nc colors
double data_rf = volume * ((2*Nd+1)*Nd*Nc + 2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
// mem: Nd Wilson * Ls, Nd gauge, Nc colors
double data_mem = (volume * (2*Nd+1)*Nd*Nc + (volume/Ls) *2*Nd*Nc*Nc) * simdwidth / nsimd * ncall / (1024.*1024.*1024.);
std::cout<<GridLogMessage << "Called Dw "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
// std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
// std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
std::cout<<GridLogMessage << "RF GiB/s (base 2) = "<< 1000000. * data_rf/((t1-t0))<<std::endl;
std::cout<<GridLogMessage << "mem GiB/s (base 2) = "<< 1000000. * data_mem/((t1-t0))<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
//exit(0);
if(( norm2(err)>1.0e-4) ) {
/*
std::cout << "RESULT\n " << result<<std::endl;
std::cout << "REF \n " << ref <<std::endl;
std::cout << "ERR \n " << err <<std::endl;
*/
std::cout<<GridLogMessage << "WRONG RESULT" << std::endl;
FGrid->Barrier();
exit(-1);
}
assert (norm2(err)< 1.0e-4 );
Dw.Report();
}
if (1)
{ // Naive wilson dag implementation
ref = Zero();
for(int mu=0;mu<Nd;mu++){
// ref = src - Gamma(Gamma::Algebra::GammaX)* src ; // 1+gamma_x
tmp = U[mu]*Cshift(src,mu+1,1);
{
autoView( ref_v, ref, CpuWrite);
autoView( tmp_v, tmp, CpuRead);
for(int i=0;i<ref_v.size();i++){
ref_v[i]+= tmp_v[i] + Gamma(Gmu[mu])*tmp_v[i]; ;
}
}
tmp =adj(U[mu])*src;
tmp =Cshift(tmp,mu+1,-1);
{
autoView( ref_v, ref, CpuWrite);
autoView( tmp_v, tmp, CpuRead);
for(int i=0;i<ref_v.size();i++){
ref_v[i]+= tmp_v[i] - Gamma(Gmu[mu])*tmp_v[i]; ;
}
}
}
ref = -0.5*ref;
}
// dump=1;
Dw.Dhop(src,result,1);
std::cout << GridLogMessage << "Compare to naive wilson implementation Dag to verify correctness" << std::endl;
std::cout<<GridLogMessage << "Called DwDag"<<std::endl;
std::cout<<GridLogMessage << "norm dag result "<< norm2(result)<<std::endl;
std::cout<<GridLogMessage << "norm dag ref "<< norm2(ref)<<std::endl;
err = ref-result;
std::cout<<GridLogMessage << "norm dag diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){
/*
std::cout<< "DAG RESULT\n " <<ref << std::endl;
std::cout<< "DAG sRESULT\n " <<result << std::endl;
std::cout<< "DAG ERR \n " << err <<std::endl;
*/
}
LatticeFermionF src_e (FrbGrid);
LatticeFermionF src_o (FrbGrid);
LatticeFermionF r_e (FrbGrid);
LatticeFermionF r_o (FrbGrid);
LatticeFermionF r_eo (FGrid);
std::cout<<GridLogMessage << "Calling Deo and Doe and //assert Deo+Doe == Dunprec"<<std::endl;
pickCheckerboard(Even,src_e,src);
pickCheckerboard(Odd,src_o,src);
std::cout<<GridLogMessage << "src_e"<<norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "src_o"<<norm2(src_o)<<std::endl;
// S-direction is INNERMOST and takes no part in the parity.
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
std::cout << GridLogMessage<< "* Benchmarking DomainWallFermionF::DhopEO "<<std::endl;
std::cout << GridLogMessage<< "* Vectorising space-time by "<<vComplexF::Nsimd()<<std::endl;
if ( sizeof(RealF)==4 ) std::cout << GridLogMessage<< "* SINGLE precision "<<std::endl;
if ( sizeof(RealF)==8 ) std::cout << GridLogMessage<< "* DOUBLE precision "<<std::endl;
#ifdef GRID_OMP
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute ) std::cout << GridLogMessage<< "* Using Overlapped Comms/Compute" <<std::endl;
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsThenCompute) std::cout << GridLogMessage<< "* Using sequential comms compute" <<std::endl;
#endif
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptGeneric ) std::cout << GridLogMessage<< "* Using GENERIC Nc WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptHandUnroll) std::cout << GridLogMessage<< "* Using Nc=3 WilsonKernels" <<std::endl;
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
{
Dw.ZeroCounters();
FGrid->Barrier();
Dw.DhopEO(src_o,r_e,DaggerNo);
double t0=usecond();
for(int i=0;i<ncall;i++){
#ifdef CUDA_PROFILE
if(i==10) cudaProfilerStart();
#endif
Dw.DhopEO(src_o,r_e,DaggerNo);
#ifdef CUDA_PROFILE
if(i==20) cudaProfilerStop();
#endif
}
double t1=usecond();
FGrid->Barrier();
double volume=Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt4[mu];
double flops=(single_site_flops*volume*ncall)/2.0;
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
Dw.Report();
}
Dw.DhopEO(src_o,r_e,DaggerNo);
Dw.DhopOE(src_e,r_o,DaggerNo);
Dw.Dhop (src ,result,DaggerNo);
std::cout<<GridLogMessage << "r_e"<<norm2(r_e)<<std::endl;
std::cout<<GridLogMessage << "r_o"<<norm2(r_o)<<std::endl;
std::cout<<GridLogMessage << "res"<<norm2(result)<<std::endl;
setCheckerboard(r_eo,r_o);
setCheckerboard(r_eo,r_e);
err = r_eo-result;
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
if((norm2(err)>1.0e-4)){
/*
std::cout<< "Deo RESULT\n " <<r_eo << std::endl;
std::cout<< "Deo REF\n " <<result << std::endl;
std::cout<< "Deo ERR \n " << err <<std::endl;
*/
}
pickCheckerboard(Even,src_e,err);
pickCheckerboard(Odd,src_o,err);
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
assert(norm2(src_e)<1.0e-4);
assert(norm2(src_o)<1.0e-4);
Grid_finalize();
exit(0);
}

View File

@@ -63,7 +63,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Drawing gauge field" << std::endl; std::cout << GridLogMessage << "Drawing gauge field" << std::endl;
LatticeGaugeFieldF Umu(UGrid); LatticeGaugeFieldF Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "Random gauge initialised " << std::endl; std::cout << GridLogMessage << "Random gauge initialised " << std::endl;
RealD mass=0.1; RealD mass=0.1;

View File

@@ -30,7 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
int main (int argc, char ** argv) int main (int argc, char ** argv)
@@ -53,7 +53,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
std::cout << GridLogMessage << "Seeded"<<std::endl; std::cout << GridLogMessage << "Seeded"<<std::endl;
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
std::cout << GridLogMessage << "made random gauge fields"<<std::endl; std::cout << GridLogMessage << "made random gauge fields"<<std::endl;

View File

@@ -36,12 +36,12 @@ int main (int argc, char ** argv)
{ {
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
#define LMAX (48) #define LMAX (40)
#define LMIN (8) #define LMIN (8)
#define LADD (8) #define LADD (8)
int64_t Nwarm=50; int64_t Nwarm=10;
int64_t Nloop=500; int64_t Nloop=100;
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
Coordinate mpi_layout = GridDefaultMpi(); Coordinate mpi_layout = GridDefaultMpi();
@@ -118,6 +118,41 @@ int main (int argc, char ** argv)
} }
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z=z+ x*y"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
for(int lat=LMIN;lat<=LMAX;lat+=LADD){
Coordinate latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeColourMatrix z(&Grid); random(pRNG,z);
LatticeColourMatrix x(&Grid); random(pRNG,x);
LatticeColourMatrix y(&Grid); random(pRNG,y);
for(int64_t i=0;i<Nwarm;i++){
z=z+x*y;
}
double start=usecond();
for(int64_t i=0;i<Nloop;i++){
z=z+x*y;
}
double stop=usecond();
double time = (stop-start)/Nloop*1000.0;
double bytes=4*vol*Nc*Nc*sizeof(Complex);
double flops=Nc*Nc*(6+8+8)*vol;
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
}
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl; std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 mult(z,x,y)"<<std::endl;
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
@@ -143,7 +178,6 @@ int main (int argc, char ** argv)
double start=usecond(); double start=usecond();
for(int64_t i=0;i<Nloop;i++){ for(int64_t i=0;i<Nloop;i++){
mult(z,x,y); mult(z,x,y);
// mac(z,x,y);
} }
double stop=usecond(); double stop=usecond();
double time = (stop-start)/Nloop*1000.0; double time = (stop-start)/Nloop*1000.0;

View File

@@ -187,7 +187,8 @@ int main (int argc, char ** argv)
auto xx = coalescedRead(x_v[ss]); auto xx = coalescedRead(x_v[ss]);
auto yy = coalescedRead(y_v[ss]); auto yy = coalescedRead(y_v[ss]);
auto zz = coalescedRead(z_v[ss]); auto zz = coalescedRead(z_v[ss]);
zz = zz+xx*yy; //zz = zz+xx*yy;
mac(&zz,&xx,&yy);
coalescedWrite(z_v[ss],zz); coalescedWrite(z_v[ss],zz);
}); });
} }

View File

@@ -123,6 +123,24 @@ case ${ac_LAPACK} in
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);; AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
esac esac
############### Nc
AC_ARG_ENABLE([Nc],
[AC_HELP_STRING([--enable-Nc=2|3|4], [enable number of colours])],
[ac_Nc=${enable_Nc}], [ac_Nc=3])
case ${ac_Nc} in
2)
AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);;
3)
AC_DEFINE([Config_Nc],[3],[Gauge group Nc]);;
4)
AC_DEFINE([Config_Nc],[4],[Gauge group Nc]);;
5)
AC_DEFINE([Config_Nc],[5],[Gauge group Nc]);;
*)
AC_MSG_ERROR(["Unsupport gauge group choice Nc = ${ac_Nc}"]);;
esac
############### FP16 conversions ############### FP16 conversions
AC_ARG_ENABLE([sfw-fp16], AC_ARG_ENABLE([sfw-fp16],
[AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])], [AC_HELP_STRING([--enable-sfw-fp16=yes|no], [enable software fp16 comms])],
@@ -330,12 +348,18 @@ case ${CXXTEST} in
fi fi
;; ;;
hipcc) hipcc)
CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr" # CXXFLAGS="$CXXFLAGS -Xcompiler -fno-strict-aliasing --expt-extended-lambda --expt-relaxed-constexpr"
CXXFLAGS="$CXXFLAGS -fno-strict-aliasing"
CXXLD=${CXX} CXXLD=${CXX}
if test $ac_openmp = yes; then if test $ac_openmp = yes; then
CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp" CXXFLAGS="$CXXFLAGS -Xcompiler -fopenmp"
fi fi
;; ;;
dpcpp)
LDFLAGS="$LDFLAGS"
CXXFLAGS="$CXXFLAGS"
CXXLD=${CXX}
;;
*) *)
CXXLD=${CXX} CXXLD=${CXX}
CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" CXXFLAGS="$CXXFLAGS -fno-strict-aliasing"
@@ -453,23 +477,24 @@ esac
AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS" AM_CXXFLAGS="$SIMD_FLAGS $AM_CXXFLAGS"
AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS" AM_CFLAGS="$SIMD_FLAGS $AM_CFLAGS"
############### Precision selection ############### Precision selection - deprecate
AC_ARG_ENABLE([precision], #AC_ARG_ENABLE([precision],
[AC_HELP_STRING([--enable-precision=single|double], # [AC_HELP_STRING([--enable-precision=single|double],
[Select default word size of Real])], # [Select default word size of Real])],
[ac_PRECISION=${enable_precision}],[ac_PRECISION=double]) # [ac_PRECISION=${enable_precision}],[ac_PRECISION=double])
case ${ac_PRECISION} in AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] )
single)
AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] ) #case ${ac_PRECISION} in
;; # single)
double) # AC_DEFINE([GRID_DEFAULT_PRECISION_SINGLE],[1],[GRID_DEFAULT_PRECISION is SINGLE] )
AC_DEFINE([GRID_DEFAULT_PRECISION_DOUBLE],[1],[GRID_DEFAULT_PRECISION is DOUBLE] ) # ;;
;; # double)
*) # ;;
AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]); # *)
;; # AC_MSG_ERROR([${ac_PRECISION} unsupported --enable-precision option]);
esac # ;;
#esac
###################### Shared memory allocation technique under MPI3 ###################### Shared memory allocation technique under MPI3
AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone], AC_ARG_ENABLE([shm],[AC_HELP_STRING([--enable-shm=shmopen|shmget|hugetlbfs|shmnone],
@@ -650,6 +675,7 @@ os (target) : $target_os
compiler vendor : ${ax_cv_cxx_compiler_vendor} compiler vendor : ${ax_cv_cxx_compiler_vendor}
compiler version : ${ax_cv_gxx_version} compiler version : ${ax_cv_gxx_version}
----- BUILD OPTIONS ----------------------------------- ----- BUILD OPTIONS -----------------------------------
Nc : ${ac_Nc}
SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG} SIMD : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
Threading : ${ac_openmp} Threading : ${ac_openmp}
Acceleration : ${ac_ACCELERATOR} Acceleration : ${ac_ACCELERATOR}

View File

@@ -184,19 +184,19 @@ Below are shown the `configure` script invocations for three recommended configu
This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging: This is the build for every day developing and debugging with Xcode. It uses the Xcode clang c++ compiler, without MPI, and defaults to double-precision. Xcode builds the `Debug` configuration with debug symbols for full debugging:
../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=double --prefix=$GridPre/GridDebug --enable-comms=none ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Debug
#### 2. `Release` #### 2. `Release`
Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`, except using single-precision (handy for validation): Since Grid itself doesn't really have debug configurations, the release build is recommended to be the same as `Debug`:
../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=single --prefix=$GridPre/GridRelease --enable-comms=none ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=none --prefix=$GridPre/Release
#### 3. `MPIDebug` #### 3. `MPIDebug`
Debug configuration with MPI: Debug configuration with MPI:
../configure CXX=clang++ --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-precision=double --prefix=$GridPre/GridMPIDebug --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx ../configure CXX=clang++ CXXFLAGS="-I$GridPkg/include/libomp -Xpreprocessor -fopenmp -std=c++11" LDFLAGS="-L$GridPkg/lib/libomp" LIBS="-lomp" --with-hdf5=$GridPkg --with-gmp=$GridPkg --with-mpfr=$GridPkg --with-fftw=$GridPkg --with-lime=$GridPre --enable-simd=GEN --enable-comms=mpi-auto MPICXX=$GridPre/bin/mpicxx --prefix=$GridPre/MPIDebug
### 5.3 Build Grid ### 5.3 Build Grid

View File

@@ -178,15 +178,10 @@ Then enter the cloned directory and set up the build system::
Now you can execute the `configure` script to generate makefiles (here from a build directory):: Now you can execute the `configure` script to generate makefiles (here from a build directory)::
mkdir build; cd build mkdir build; cd build
../configure --enable-precision=double --enable-simd=AVX --enable-comms=mpi-auto \ ../configure --enable-simd=AVX --enable-comms=mpi-auto \
--prefix=<path> --prefix=<path>
where:: ::
--enable-precision=single|double
sets the **default precision**. Since this is largely a benchmarking convenience, it is anticipated that the default precision may be removed in future implementations,
and that explicit type selection be made at all points. Naturally, most code will be type templated in any case.::
--enable-simd=GEN|SSE4|AVX|AVXFMA|AVXFMA4|AVX2|AVX512|NEONv8|QPX --enable-simd=GEN|SSE4|AVX|AVXFMA|AVXFMA4|AVX2|AVX512|NEONv8|QPX
@@ -236,7 +231,7 @@ Detailed build configuration options
--enable-mkl[=path] use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional). --enable-mkl[=path] use Intel MKL for FFT (and LAPACK if enabled) routines. A UNIX prefix containing the library can be specified (optional).
--enable-simd=code setup Grid for the SIMD target `<code>`(default: `GEN`). A list of possible SIMD targets is detailed in a section below. --enable-simd=code setup Grid for the SIMD target `<code>`(default: `GEN`). A list of possible SIMD targets is detailed in a section below.
--enable-gen-simd-width=size select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). E.g. SSE 128 bit corresponds to 16 bytes. --enable-gen-simd-width=size select the size (in bytes) of the generic SIMD vector type (default: 32 bytes). E.g. SSE 128 bit corresponds to 16 bytes.
--enable-precision=single|double set the default precision (default: `double`). --enable-precision=single|double set the default precision (default: `double`). **Deprecated option**
--enable-comms=mpi|none use `<comm>` for message passing (default: `none`). --enable-comms=mpi|none use `<comm>` for message passing (default: `none`).
--enable-rng=sitmo|ranlux48|mt19937 choose the RNG (default: `sitmo`). --enable-rng=sitmo|ranlux48|mt19937 choose the RNG (default: `sitmo`).
--disable-timers disable system dependent high-resolution timers. --disable-timers disable system dependent high-resolution timers.
@@ -304,8 +299,7 @@ Build setup for Intel Knights Landing platform
The following configuration is recommended for the Intel Knights Landing platform:: The following configuration is recommended for the Intel Knights Landing platform::
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi-auto \ --enable-comms=mpi-auto \
--enable-mkl \ --enable-mkl \
CXX=icpc MPICXX=mpiicpc CXX=icpc MPICXX=mpiicpc
@@ -314,8 +308,7 @@ The MKL flag enables use of BLAS and FFTW from the Intel Math Kernels Library.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -332,8 +325,7 @@ presently performs better with use of more than one rank per node, using shared
for interior communication. for interior communication.
We recommend four ranks per node for best performance, but optimum is local volume dependent. :: We recommend four ranks per node for best performance, but optimum is local volume dependent. ::
../configure --enable-precision=double\ ../configure --enable-simd=KNL \
--enable-simd=KNL \
--enable-comms=mpi-auto \ --enable-comms=mpi-auto \
--enable-mkl \ --enable-mkl \
CC=icpc MPICXX=mpiicpc CC=icpc MPICXX=mpiicpc
@@ -343,8 +335,7 @@ Build setup for Intel Haswell Xeon platform
The following configuration is recommended for the Intel Haswell platform:: The following configuration is recommended for the Intel Haswell platform::
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi-auto \ --enable-comms=mpi-auto \
--enable-mkl \ --enable-mkl \
CXX=icpc MPICXX=mpiicpc CXX=icpc MPICXX=mpiicpc
@@ -360,8 +351,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -379,8 +369,7 @@ Build setup for Intel Skylake Xeon platform
The following configuration is recommended for the Intel Skylake platform:: The following configuration is recommended for the Intel Skylake platform::
../configure --enable-precision=double\ ../configure --enable-simd=AVX512 \
--enable-simd=AVX512 \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-mkl \ --enable-mkl \
CXX=mpiicpc CXX=mpiicpc
@@ -396,8 +385,7 @@ where `<path>` is the UNIX prefix where GMP and MPFR are installed.
If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use:: If you are working on a Cray machine that does not use the `mpiicpc` wrapper, please use::
../configure --enable-precision=double\ ../configure --enable-simd=AVX512 \
--enable-simd=AVX512 \
--enable-comms=mpi \ --enable-comms=mpi \
--enable-mkl \ --enable-mkl \
CXX=CC CC=cc CXX=CC CC=cc
@@ -422,8 +410,7 @@ and 8 threads per rank.
The following configuration is recommended for the AMD EPYC platform:: The following configuration is recommended for the AMD EPYC platform::
../configure --enable-precision=double\ ../configure --enable-simd=AVX2 \
--enable-simd=AVX2 \
--enable-comms=mpi \ --enable-comms=mpi \
CXX=mpicxx CXX=mpicxx

View File

@@ -69,7 +69,7 @@ int main (int argc, char ** argv)
std::vector<LatticeColourMatrix> U(4,&Fine); std::vector<LatticeColourMatrix> U(4,&Fine);
SU3::HotConfiguration(pRNGa,Umu); SU<Nc>::HotConfiguration(pRNGa,Umu);
FieldMetaData header; FieldMetaData header;

View File

@@ -84,7 +84,7 @@ int main (int argc, char ** argv)
std::vector<LatticeColourMatrix> U(4,&Fine); std::vector<LatticeColourMatrix> U(4,&Fine);
SU3::HotConfiguration(pRNGa,Umu); SU<Nc>::HotConfiguration(pRNGa,Umu);
FieldMetaData header; FieldMetaData header;
std::string file("./ckpoint_lat.4000"); std::string file("./ckpoint_lat.4000");

View File

@@ -80,7 +80,7 @@ int main (int argc, char ** argv)
GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5); GridParallelRNG sRNG5(sFGrid); sRNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
RealD mass=0.1; RealD mass=0.1;
RealD M5 =1.8; RealD M5 =1.8;

View File

@@ -202,7 +202,7 @@ int main (int argc, char ** argv) {
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1,2,3,4});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
// FieldMetaData header; // FieldMetaData header;
// NerscIO::readConfiguration(Umu,header,Params.config); // NerscIO::readConfiguration(Umu,header,Params.config);

View File

@@ -71,7 +71,7 @@ int main (int argc, char ** argv)
LatticeGaugeFieldD Umu(UGrid); LatticeGaugeFieldD Umu(UGrid);
LatticeGaugeFieldF Umu_f(UGrid_f); LatticeGaugeFieldF Umu_f(UGrid_f);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
precisionChange(Umu_f,Umu); precisionChange(Umu_f,Umu);

View File

@@ -69,7 +69,7 @@ int main (int argc, char ** argv)
LatticeGaugeFieldD Umu(UGrid); LatticeGaugeFieldD Umu(UGrid);
LatticeGaugeFieldF Umu_f(UGrid_f); LatticeGaugeFieldF Umu_f(UGrid_f);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
precisionChange(Umu_f,Umu); precisionChange(Umu_f,Umu);

View File

@@ -64,7 +64,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(FGrid); ref=Zero(); LatticeFermion ref(FGrid); ref=Zero();
LatticeFermion tmp(FGrid); LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid); LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){

View File

@@ -131,7 +131,7 @@ int main (int argc, char ** argv)
// LatticeFermion result(FGrid); result=Zero(); // LatticeFermion result(FGrid); result=Zero();
// LatticeGaugeField Umu(UGrid); // LatticeGaugeField Umu(UGrid);
// SU3::HotConfiguration(RNG4,Umu); // SU<Nc>::HotConfiguration(RNG4,Umu);
// std::vector<LatticeColourMatrix> U(4,UGrid); // std::vector<LatticeColourMatrix> U(4,UGrid);
// for(int mu=0;mu<Nd;mu++){ // for(int mu=0;mu<Nd;mu++){

View File

@@ -69,7 +69,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
RealD mass=0.1; RealD mass=0.1;

View File

@@ -73,7 +73,7 @@ int main (int argc, char ** argv)
LatticeFermion ref (FGrid); ref = Zero(); LatticeFermion ref (FGrid); ref = Zero();
LatticeFermion tmp (FGrid); tmp = Zero(); LatticeFermion tmp (FGrid); tmp = Zero();
LatticeFermion err (FGrid); err = Zero(); LatticeFermion err (FGrid); err = Zero();
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu); LatticeGaugeField Umu (UGrid); SU<Nc>::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y) // Only one non-zero (y)

View File

@@ -72,7 +72,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(FGrid); ref=Zero(); LatticeFermion ref(FGrid); ref=Zero();
LatticeFermion tmp(FGrid); tmp=Zero(); LatticeFermion tmp(FGrid); tmp=Zero();
LatticeFermion err(FGrid); tmp=Zero(); LatticeFermion err(FGrid); tmp=Zero();
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y) // Only one non-zero (y)

View File

@@ -138,7 +138,7 @@ int main (int argc, char ** argv)
LatticeGaugeFieldD Umu(&GRID); LatticeGaugeFieldD Umu(&GRID);
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
// Umu=Zero(); // Umu=Zero();
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Wilson test // Wilson test

View File

@@ -73,11 +73,11 @@ int main (int argc, char ** argv)
LatticeColourMatrix xform2(&GRID); // Gauge xform LatticeColourMatrix xform2(&GRID); // Gauge xform
LatticeColourMatrix xform3(&GRID); // Gauge xform LatticeColourMatrix xform3(&GRID); // Gauge xform
SU3::ColdConfiguration(pRNG,Umu); // Unit gauge SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
Uorg=Umu; Uorg=Umu;
Urnd=Umu; Urnd=Umu;
SU3::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge SU<Nc>::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge
Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Initial plaquette "<<plaq << std::endl; std::cout << " Initial plaquette "<<plaq << std::endl;
@@ -121,7 +121,7 @@ int main (int argc, char ** argv)
std::cout<< "* Testing non-unit configuration *" <<std::endl; std::cout<< "* Testing non-unit configuration *" <<std::endl;
std::cout<< "*****************************************************************" <<std::endl; std::cout<< "*****************************************************************" <<std::endl;
SU3::HotConfiguration(pRNG,Umu); // Unit gauge SU<Nc>::HotConfiguration(pRNG,Umu); // Unit gauge
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Initial plaquette "<<plaq << std::endl; std::cout << " Initial plaquette "<<plaq << std::endl;
@@ -136,7 +136,7 @@ int main (int argc, char ** argv)
std::cout<< "*****************************************************************" <<std::endl; std::cout<< "*****************************************************************" <<std::endl;
Umu=Urnd; Umu=Urnd;
SU3::HotConfiguration(pRNG,Umu); // Unit gauge SU<Nc>::HotConfiguration(pRNG,Umu); // Unit gauge
plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu);
std::cout << " Initial plaquette "<<plaq << std::endl; std::cout << " Initial plaquette "<<plaq << std::endl;

View File

@@ -114,7 +114,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4); GridParallelRNG RNG4_2f(UGrid_2f); RNG4_2f.SeedFixedIntegers(seeds4);
GparityGaugeField Umu_2f(UGrid_2f); GparityGaugeField Umu_2f(UGrid_2f);
SU3::HotConfiguration(RNG4_2f,Umu_2f); SU<Nc>::HotConfiguration(RNG4_2f,Umu_2f);
StandardFermionField src (FGrid_2f); StandardFermionField src (FGrid_2f);
StandardFermionField tmpsrc(FGrid_2f); StandardFermionField tmpsrc(FGrid_2f);

View File

@@ -61,7 +61,7 @@ int main (int argc, char ** argv)
FermionField ref(&Grid); ref=Zero(); FermionField ref(&Grid); ref=Zero();
FermionField tmp(&Grid); tmp=Zero(); FermionField tmp(&Grid); tmp=Zero();
FermionField err(&Grid); tmp=Zero(); FermionField err(&Grid); tmp=Zero();
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid); std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1; double volume=1;

View File

@@ -66,14 +66,14 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
std::cout << GridLogMessage << "* Generators for SU(3)" << std::endl; std::cout << GridLogMessage << "* Generators for SU(Nc" << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
SU3::printGenerators(); SU<Nc>::printGenerators();
std::cout << "Dimension of adjoint representation: "<< SU3Adjoint::Dimension << std::endl; std::cout << "Dimension of adjoint representation: "<< SU<Nc>Adjoint::Dimension << std::endl;
SU3Adjoint::printGenerators(); SU<Nc>Adjoint::printGenerators();
SU3::testGenerators(); SU<Nc>::testGenerators();
SU3Adjoint::testGenerators(); SU<Nc>Adjoint::testGenerators();
std::cout<<GridLogMessage<<"*********************************************"<<std::endl; std::cout<<GridLogMessage<<"*********************************************"<<std::endl;
std::cout<<GridLogMessage<<"* Generators for SU(4)"<<std::endl; std::cout<<GridLogMessage<<"* Generators for SU(4)"<<std::endl;
@@ -87,22 +87,22 @@ int main(int argc, char** argv) {
// Projectors // Projectors
GridParallelRNG gridRNG(grid); GridParallelRNG gridRNG(grid);
gridRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9})); gridRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
SU3Adjoint::LatticeAdjMatrix Gauss(grid); SU<Nc>Adjoint::LatticeAdjMatrix Gauss(grid);
SU3::LatticeAlgebraVector ha(grid); SU<Nc>::LatticeAlgebraVector ha(grid);
SU3::LatticeAlgebraVector hb(grid); SU<Nc>::LatticeAlgebraVector hb(grid);
random(gridRNG,Gauss); random(gridRNG,Gauss);
std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl;
SU3Adjoint::projectOnAlgebra(ha, Gauss); SU<Nc>Adjoint::projectOnAlgebra(ha, Gauss);
std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl;
std::cout << GridLogMessage << "Start projector" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl;
SU3Adjoint::projector(hb, Gauss); SU<Nc>Adjoint::projector(hb, Gauss);
std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "end projector" << std::endl;
std::cout << GridLogMessage << "ReStart projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl;
SU3Adjoint::projector(hb, Gauss); SU<Nc>Adjoint::projector(hb, Gauss);
std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "end projector" << std::endl;
SU3::LatticeAlgebraVector diff = ha -hb; SU<Nc>::LatticeAlgebraVector diff = ha -hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl;
@@ -260,20 +260,20 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors" std::cout << GridLogMessage << "Test for the Two Index Symmetric projectors"
<< std::endl; << std::endl;
// Projectors // Projectors
SU3TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid); SU<Nc>TwoIndexSymm::LatticeTwoIndexMatrix Gauss2(grid);
random(gridRNG,Gauss2); random(gridRNG,Gauss2);
std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl;
SU3TwoIndexSymm::projectOnAlgebra(ha, Gauss2); SU<Nc>TwoIndexSymm::projectOnAlgebra(ha, Gauss2);
std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl;
std::cout << GridLogMessage << "Start projector" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl;
SU3TwoIndexSymm::projector(hb, Gauss2); SU<Nc>TwoIndexSymm::projector(hb, Gauss2);
std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "end projector" << std::endl;
std::cout << GridLogMessage << "ReStart projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl;
SU3TwoIndexSymm::projector(hb, Gauss2); SU<Nc>TwoIndexSymm::projector(hb, Gauss2);
std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "end projector" << std::endl;
SU3::LatticeAlgebraVector diff2 = ha - hb; SU<Nc>::LatticeAlgebraVector diff2 = ha - hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl; std::cout << GridLogMessage << "Difference: " << norm2(diff) << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;
@@ -284,20 +284,20 @@ int main(int argc, char** argv) {
std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors" std::cout << GridLogMessage << "Test for the Two index anti-Symmetric projectors"
<< std::endl; << std::endl;
// Projectors // Projectors
SU3TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid); SU<Nc>TwoIndexAntiSymm::LatticeTwoIndexMatrix Gauss2a(grid);
random(gridRNG,Gauss2a); random(gridRNG,Gauss2a);
std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "Start projectOnAlgebra" << std::endl;
SU3TwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a); SU<Nc>TwoIndexAntiSymm::projectOnAlgebra(ha, Gauss2a);
std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl; std::cout << GridLogMessage << "end projectOnAlgebra" << std::endl;
std::cout << GridLogMessage << "Start projector" << std::endl; std::cout << GridLogMessage << "Start projector" << std::endl;
SU3TwoIndexAntiSymm::projector(hb, Gauss2a); SU<Nc>TwoIndexAntiSymm::projector(hb, Gauss2a);
std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "end projector" << std::endl;
std::cout << GridLogMessage << "ReStart projector" << std::endl; std::cout << GridLogMessage << "ReStart projector" << std::endl;
SU3TwoIndexAntiSymm::projector(hb, Gauss2a); SU<Nc>TwoIndexAntiSymm::projector(hb, Gauss2a);
std::cout << GridLogMessage << "end projector" << std::endl; std::cout << GridLogMessage << "end projector" << std::endl;
SU3::LatticeAlgebraVector diff2a = ha - hb; SU<Nc>::LatticeAlgebraVector diff2a = ha - hb;
std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl; std::cout << GridLogMessage << "Difference: " << norm2(diff2a) << std::endl;
std::cout << GridLogMessage << "*********************************************" std::cout << GridLogMessage << "*********************************************"
<< std::endl; << std::endl;

View File

@@ -444,7 +444,7 @@ int main(int argc, char **argv) {
// Lattice 12x12 GEMM // Lattice 12x12 GEMM
scFooBar = scFoo * scBar; scFooBar = scFoo * scBar;
// Benchmark some simple operations LatticeSU3 * Lattice SU3. // Benchmark some simple operations LatticeSU<Nc> * Lattice SU<Nc>.
double t0, t1, flops; double t0, t1, flops;
double bytes; double bytes;
int ncall = 5000; int ncall = 5000;

View File

@@ -73,7 +73,7 @@ int main (int argc, char ** argv)
LatticeFermion ref (FGrid); ref = Zero(); LatticeFermion ref (FGrid); ref = Zero();
LatticeFermion tmp (FGrid); tmp = Zero(); LatticeFermion tmp (FGrid); tmp = Zero();
LatticeFermion err (FGrid); err = Zero(); LatticeFermion err (FGrid); err = Zero();
LatticeGaugeField Umu (UGrid); SU3::HotConfiguration(RNG4, Umu); LatticeGaugeField Umu (UGrid); SU<Nc>::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
// Only one non-zero (y) // Only one non-zero (y)

View File

@@ -55,7 +55,7 @@ int main (int argc, char ** argv)
GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds); GridParallelRNG pRNG(grid); pRNG.SeedFixedIntegers(pseeds);
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds); GridSerialRNG sRNG; sRNG.SeedFixedIntegers(sseeds);
// SU3 colour operatoions // SU<Nc> colour operatoions
LatticeColourMatrix link(grid); LatticeColourMatrix link(grid);
LatticeColourMatrix staple(grid); LatticeColourMatrix staple(grid);
@@ -87,10 +87,10 @@ int main (int argc, char ** argv)
link = PeekIndex<LorentzIndex>(Umu,mu); link = PeekIndex<LorentzIndex>(Umu,mu);
for( int subgroup=0;subgroup<SU3::su2subgroups();subgroup++ ) { for( int subgroup=0;subgroup<SU<Nc>::su2subgroups();subgroup++ ) {
// update Even checkerboard // update Even checkerboard
SU3::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask); SU<Nc>::SubGroupHeatBath(sRNG,pRNG,beta,link,staple,subgroup,20,mask);
} }

View File

@@ -64,7 +64,7 @@ int main (int argc, char ** argv)
FermionField err(&Grid); tmp=Zero(); FermionField err(&Grid); tmp=Zero();
FermionField phi (&Grid); random(pRNG,phi); FermionField phi (&Grid); random(pRNG,phi);
FermionField chi (&Grid); random(pRNG,chi); FermionField chi (&Grid); random(pRNG,chi);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid); std::vector<LatticeColourMatrix> U(4,&Grid);

View File

@@ -75,7 +75,7 @@ int main (int argc, char ** argv)
FermionField phi (FGrid); random(pRNG5,phi); FermionField phi (FGrid); random(pRNG5,phi);
FermionField chi (FGrid); random(pRNG5,chi); FermionField chi (FGrid); random(pRNG5,chi);
LatticeGaugeField Umu(UGrid); SU3::ColdConfiguration(pRNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::ColdConfiguration(pRNG4,Umu);
LatticeGaugeField Umua(UGrid); Umua=Umu; LatticeGaugeField Umua(UGrid); Umua=Umu;
double volume=Ls; double volume=Ls;

View File

@@ -84,7 +84,7 @@ int main (int argc, char ** argv)
FermionField chi (FGrid); random(pRNG5,chi); FermionField chi (FGrid); random(pRNG5,chi);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(pRNG4,Umu); SU<Nc>::HotConfiguration(pRNG4,Umu);
/* /*
for(int mu=1;mu<4;mu++){ for(int mu=1;mu<4;mu++){

View File

@@ -83,7 +83,7 @@ int main (int argc, char ** argv)
FermionField chi (FGrid); random(pRNG5,chi); FermionField chi (FGrid); random(pRNG5,chi);
LatticeGaugeFieldF Umu(UGrid); LatticeGaugeFieldF Umu(UGrid);
SU3::HotConfiguration(pRNG4,Umu); SU<Nc>::HotConfiguration(pRNG4,Umu);
/* /*
for(int mu=1;mu<4;mu++){ for(int mu=1;mu<4;mu++){

View File

@@ -64,7 +64,7 @@ int main (int argc, char ** argv)
FermionField err(&Grid); tmp=Zero(); FermionField err(&Grid); tmp=Zero();
FermionField phi (&Grid); random(pRNG,phi); FermionField phi (&Grid); random(pRNG,phi);
FermionField chi (&Grid); random(pRNG,chi); FermionField chi (&Grid); random(pRNG,chi);
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid); std::vector<LatticeColourMatrix> U(4,&Grid);

View File

@@ -74,7 +74,7 @@ int main(int argc, char **argv)
FermionField chi(&Grid); FermionField chi(&Grid);
random(pRNG, chi); random(pRNG, chi);
LatticeGaugeField Umu(&Grid); LatticeGaugeField Umu(&Grid);
SU3::HotConfiguration(pRNG, Umu); SU<Nc>::HotConfiguration(pRNG, Umu);
std::vector<LatticeColourMatrix> U(4, &Grid); std::vector<LatticeColourMatrix> U(4, &Grid);
double volume = 1; double volume = 1;

View File

@@ -70,7 +70,7 @@ int main (int argc, char ** argv)
LatticeFermion tmp(&Grid); tmp=Zero(); LatticeFermion tmp(&Grid); tmp=Zero();
LatticeFermion err(&Grid); tmp=Zero(); LatticeFermion err(&Grid); tmp=Zero();
LatticeGaugeField Umu(&Grid); LatticeGaugeField Umu(&Grid);
SU3::HotConfiguration(pRNG,Umu); SU<Nc>::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid); std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1; double volume=1;

View File

@@ -71,7 +71,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(&Grid); ref=Zero(); LatticeFermion ref(&Grid); ref=Zero();
LatticeFermion tmp(&Grid); tmp=Zero(); LatticeFermion tmp(&Grid); tmp=Zero();
LatticeFermion err(&Grid); tmp=Zero(); LatticeFermion err(&Grid); tmp=Zero();
LatticeGaugeField Umu(&Grid); SU3::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(&Grid); SU<Nc>::HotConfiguration(pRNG,Umu);
std::vector<LatticeColourMatrix> U(4,&Grid); std::vector<LatticeColourMatrix> U(4,&Grid);
double volume=1; double volume=1;

View File

@@ -116,7 +116,7 @@ int main (int argc, char ** argv)
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
LatticeGaugeFieldF UmuF(UGridF); LatticeGaugeFieldF UmuF(UGridF);
SU3::HotConfiguration(RNG4,Umu); SU<Nc>::HotConfiguration(RNG4,Umu);
precisionChange(UmuF,Umu); precisionChange(UmuF,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);

View File

@@ -77,7 +77,7 @@ int main (int argc, char ** argv)
LatticeFermion ref(FGrid); ref=Zero(); LatticeFermion ref(FGrid); ref=Zero();
LatticeFermion tmp(FGrid); LatticeFermion tmp(FGrid);
LatticeFermion err(FGrid); LatticeFermion err(FGrid);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
#if 0 #if 0
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);

View File

@@ -70,7 +70,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(RNG4,Umu); LatticeGaugeField Umu(UGrid); SU<Nc>::HotConfiguration(RNG4,Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
RealD mass=0.1; RealD mass=0.1;

View File

@@ -71,9 +71,9 @@ int main (int argc, char ** argv)
std::string file("./ckpoint_lat.400"); std::string file("./ckpoint_lat.400");
NerscIO::readConfiguration(Umu,header,file); NerscIO::readConfiguration(Umu,header,file);
// SU3::ColdConfiguration(RNG4,Umu); // SU<Nc>::ColdConfiguration(RNG4,Umu);
// SU3::TepidConfiguration(RNG4,Umu); // SU<Nc>::TepidConfiguration(RNG4,Umu);
// SU3::HotConfiguration(RNG4,Umu); // SU<Nc>::HotConfiguration(RNG4,Umu);
// Umu=Zero(); // Umu=Zero();
RealD mass=0.1; RealD mass=0.1;

View File

@@ -108,8 +108,8 @@ int main (int argc, char ** argv)
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::ColdConfiguration(Umu); SU<Nc>::ColdConfiguration(Umu);
// SU3::HotConfiguration(RNG4,Umu); // SU<Nc>::HotConfiguration(RNG4,Umu);
RealD mass=0.3; RealD mass=0.3;
RealD M5 =1.0; RealD M5 =1.0;

View File

@@ -73,7 +73,7 @@ int main(int argc, char** argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5); DomainWallEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5);
DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5); DomainWallEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5);

View File

@@ -77,7 +77,7 @@ int main(int argc, char** argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
// GparityDomainWallFermionR::ImplParams params; // GparityDomainWallFermionR::ImplParams params;
FermionAction::ImplParams params; FermionAction::ImplParams params;

View File

@@ -75,7 +75,7 @@ int main(int argc, char** argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c); MobiusEOFAFermionR Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c);
MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c); MobiusEOFAFermionR Rop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mpv, mf, mpv, -1.0, 1, M5, b, c);

View File

@@ -79,7 +79,7 @@ int main(int argc, char** argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
FermionAction::ImplParams params; FermionAction::ImplParams params;
FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params); FermionAction Lop(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, mf, mpv, 0.0, -1, M5, b, c, params);

View File

@@ -102,7 +102,7 @@ int main(int argc, char **argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators // Initialize RHMC fermion operators
DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5); DomainWallFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5);

View File

@@ -104,7 +104,7 @@ int main(int argc, char **argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators // Initialize RHMC fermion operators
GparityDomainWallFermionR::ImplParams params; GparityDomainWallFermionR::ImplParams params;

View File

@@ -104,7 +104,7 @@ int main(int argc, char **argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators // Initialize RHMC fermion operators
MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c); MobiusFermionR Ddwf_f(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mf, M5, b, c);

View File

@@ -106,7 +106,7 @@ int main(int argc, char **argv)
// Random gauge field // Random gauge field
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
// Initialize RHMC fermion operators // Initialize RHMC fermion operators
GparityDomainWallFermionR::ImplParams params; GparityDomainWallFermionR::ImplParams params;

View File

@@ -59,7 +59,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -93,7 +93,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -60,7 +60,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -94,7 +94,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -72,7 +72,7 @@ int main (int argc, char** argv)
LatticeFermion MphiPrime (FGrid); LatticeFermion MphiPrime (FGrid);
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -105,7 +105,7 @@ int main (int argc, char** argv)
for(int mu=0; mu<Nd; mu++){ for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu); PokeIndex<LorentzIndex>(mom, mommu, mu);

View File

@@ -63,8 +63,8 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
// SU3::ColdConfiguration(pRNG,U); // SU<Nc>::ColdConfiguration(pRNG,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -112,7 +112,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
Hmom -= real(sum(trace(mommu*mommu))); Hmom -= real(sum(trace(mommu*mommu)));

View File

@@ -75,7 +75,7 @@ int main (int argc, char** argv)
FermionField MphiPrime (FGrid); FermionField MphiPrime (FGrid);
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -109,7 +109,7 @@ int main (int argc, char** argv)
for(int mu=0; mu<Nd; mu++){ for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu); PokeIndex<LorentzIndex>(mom, mommu, mu);

View File

@@ -51,7 +51,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(&Grid); LatticeGaugeField U(&Grid);
SU3::HotConfiguration(pRNG,U); SU<Nc>::HotConfiguration(pRNG,U);
double beta = 1.0; double beta = 1.0;
ConjugateWilsonGaugeActionR Action(beta); ConjugateWilsonGaugeActionR Action(beta);
@@ -80,7 +80,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -54,7 +54,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(&Grid); LatticeGaugeField U(&Grid);
SU3::HotConfiguration(pRNG,U); SU<Nc>::HotConfiguration(pRNG,U);
double beta = 1.0; double beta = 1.0;
double c1 = 0.331; double c1 = 0.331;
@@ -82,7 +82,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -63,7 +63,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -100,7 +100,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -57,7 +57,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -94,7 +94,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
// Traceless antihermitian momentum; gaussian in lie alg // Traceless antihermitian momentum; gaussian in lie alg
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu);
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -58,7 +58,7 @@ int main (int argc, char ** argv)
PokeIndex<LorentzIndex>(P, P_mu, mu); PokeIndex<LorentzIndex>(P, P_mu, mu);
} }
SU3::HotConfiguration(pRNG,U); SU<Nc>::HotConfiguration(pRNG,U);
ConjugateGradient<LatticeGaugeField> CG(1.0e-8, 10000); ConjugateGradient<LatticeGaugeField> CG(1.0e-8, 10000);
@@ -95,7 +95,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "Update the U " << std::endl; std::cout << GridLogMessage << "Update the U " << std::endl;
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
// Traceless antihermitian momentum; gaussian in lie algebra // Traceless antihermitian momentum; gaussian in lie algebra
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
auto Umu = PeekIndex<LorentzIndex>(U, mu); auto Umu = PeekIndex<LorentzIndex>(U, mu);
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);
Umu = expMat(mommu, dt, 12) * Umu; Umu = expMat(mommu, dt, 12) * Umu;

View File

@@ -60,7 +60,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -96,7 +96,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -72,7 +72,7 @@ int main (int argc, char** argv)
LatticeFermion MphiPrime (FGrid); LatticeFermion MphiPrime (FGrid);
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -107,7 +107,7 @@ int main (int argc, char** argv)
for(int mu=0; mu<Nd; mu++){ for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu); PokeIndex<LorentzIndex>(mom, mommu, mu);

View File

@@ -76,7 +76,7 @@ int main (int argc, char** argv)
FermionField MphiPrime (FGrid); FermionField MphiPrime (FGrid);
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -112,7 +112,7 @@ int main (int argc, char** argv)
for(int mu=0; mu<Nd; mu++){ for(int mu=0; mu<Nd; mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom, mommu, mu); PokeIndex<LorentzIndex>(mom, mommu, mu);
autoView( U_v , U, CpuRead); autoView( U_v , U, CpuRead);

View File

@@ -62,7 +62,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -96,7 +96,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -54,7 +54,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(&Grid); LatticeGaugeField U(&Grid);
SU3::HotConfiguration(pRNG,U); SU<Nc>::HotConfiguration(pRNG,U);
double beta = 1.0; double beta = 1.0;
double c1 = -0.331; double c1 = -0.331;
@@ -82,7 +82,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -61,7 +61,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(&Grid); LatticeGaugeField U(&Grid);
//SU2::HotConfiguration(pRNG,U); //SU2::HotConfiguration(pRNG,U);
SU3::ColdConfiguration(pRNG,U); SU<Nc>::ColdConfiguration(pRNG,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -98,7 +98,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
// Traceless antihermitian momentum; gaussian in lie alg // Traceless antihermitian momentum; gaussian in lie alg
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
Hmom -= real(sum(trace(mommu*mommu))); Hmom -= real(sum(trace(mommu*mommu)));

View File

@@ -62,8 +62,8 @@ int main(int argc, char **argv)
LatticeGaugeField U(&Grid); LatticeGaugeField U(&Grid);
SU3::HotConfiguration(pRNG, U); SU<Nc>::HotConfiguration(pRNG, U);
//SU3::ColdConfiguration(pRNG, U);// Clover term Zero() //SU<Nc>::ColdConfiguration(pRNG, U);// Clover term Zero()
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -101,7 +101,7 @@ int main(int argc, char **argv)
for (int mu = 0; mu < Nd; mu++) for (int mu = 0; mu < Nd; mu++)
{ {
// Traceless antihermitian momentum; gaussian in lie alg // Traceless antihermitian momentum; gaussian in lie alg
SU3::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu); SU<Nc>::GaussianFundamentalLieAlgebraMatrix(pRNG, mommu);
Hmom -= real(sum(trace(mommu * mommu))); Hmom -= real(sum(trace(mommu * mommu)));
PokeIndex<LorentzIndex>(mom, mommu, mu); PokeIndex<LorentzIndex>(mom, mommu, mu);

View File

@@ -59,7 +59,7 @@ int main (int argc, char ** argv)
LatticeGaugeField U(UGrid); LatticeGaugeField U(UGrid);
SU3::HotConfiguration(RNG4,U); SU<Nc>::HotConfiguration(RNG4,U);
//////////////////////////////////// ////////////////////////////////////
// Unmodified matrix element // Unmodified matrix element
@@ -109,7 +109,7 @@ int main (int argc, char ** argv)
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
SU3::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg SU<Nc>::GaussianFundamentalLieAlgebraMatrix(RNG4, mommu); // Traceless antihermitian momentum; gaussian in lie alg
PokeIndex<LorentzIndex>(mom,mommu,mu); PokeIndex<LorentzIndex>(mom,mommu,mu);

View File

@@ -293,7 +293,7 @@ int main (int argc, char ** argv) {
{ {
std::vector<int> seeds4({1,2,3,4}); std::vector<int> seeds4({1,2,3,4});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
} }
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl; std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;

View File

@@ -54,7 +54,7 @@ int main (int argc, char ** argv)
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5); GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
std::vector<LatticeColourMatrix> U(4,UGrid); std::vector<LatticeColourMatrix> U(4,UGrid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){

View File

@@ -61,7 +61,7 @@ int main(int argc, char** argv) {
RNG5.SeedFixedIntegers(seeds5); RNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid); LatticeGaugeField Umu(UGrid);
SU3::HotConfiguration(RNG4, Umu); SU<Nc>::HotConfiguration(RNG4, Umu);
/* /*
std::vector<LatticeColourMatrix> U(4, UGrid); std::vector<LatticeColourMatrix> U(4, UGrid);

View File

@@ -280,7 +280,7 @@ void make_gauge(GaugeField &Umu, Grid::LatticePropagator &q1,Grid::LatticePropag
Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu.Grid(); Grid::GridCartesian *UGrid = (Grid::GridCartesian *)Umu.Grid();
Grid::GridParallelRNG RNG4(UGrid); Grid::GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4); RNG4.SeedFixedIntegers(seeds4);
Grid::SU3::HotConfiguration(RNG4, Umu); Grid::SU<Nc>::HotConfiguration(RNG4, Umu);
// Propagator // Propagator
Grid::gaussian(RNG4, q1); Grid::gaussian(RNG4, q1);

View File

@@ -277,7 +277,7 @@ double calc_grid_p(Grid::LatticeGaugeField & Umu)
Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid(); Grid::GridCartesian * UGrid = (Grid::GridCartesian *) Umu.Grid();
Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4); Grid::GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
Grid::SU3::HotConfiguration(RNG4,Umu); Grid::SU<Nc>::HotConfiguration(RNG4,Umu);
Grid::LatticeColourMatrix tmp(UGrid); Grid::LatticeColourMatrix tmp(UGrid);
tmp = Grid::zero; tmp = Grid::zero;

Some files were not shown because too many files have changed in this diff Show More