1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-18 15:57:05 +01:00

Compare commits

..

36 Commits

Author SHA1 Message Date
b2493d6d25 Switching Block lanczos precision to explicitly single
Adding sample run script and input file
2022-03-08 10:18:36 -08:00
8fd16686dc Checking block lanczos
deleted some diag outputs
2021-09-05 23:04:41 -04:00
23b9c6b5f5 Merge branch 'develop' of https://github.com/paboyle/Grid into feature/block_lanczos 2021-09-03 17:38:10 -04:00
b06526bc1e Comment update 2021-08-30 21:15:39 -04:00
3044419111 Some sample code 2021-08-30 20:32:11 -04:00
114920b8de Some example clean up 2021-08-25 12:24:17 +01:00
0d588b95f4 Bug fix to Example_Laplacian test 2021-08-23 23:14:26 +01:00
5b3c530aa7 Return value 2021-08-23 15:30:45 +01:00
c6a5499c8b Fail on non-apple 2021-08-22 18:40:55 +01:00
ec9c3fe77a Remove the file 2021-08-22 18:28:39 +01:00
6135ad530e Extra examples / solutions 2021-08-22 18:25:07 +01:00
40098424c7 Examples 2021-08-22 14:17:12 +01:00
7163b31a26 Examples 2021-08-20 01:15:23 +01:00
ffbdd91e0e Apple happiness 2021-08-20 01:15:00 +01:00
5d29e175d8 Typo fix 2021-08-10 18:25:43 +01:00
1eda4d8e0b Merge branch 'develop' of https://github.com/paboyle/Grid into develop 2021-08-10 05:41:18 -07:00
50181f16e5 Level 0 IPC set up 2021-08-10 05:35:15 -07:00
75030637cc Improved comms benchmark, same as benchmark_comms_host_device 2021-08-10 05:16:30 -07:00
fe5aaf7677 Make comms benchmark same as Benchmark_comms_host_device 2021-08-09 04:06:30 -07:00
80ac2a73ca Check is wrong (HtoD / DtoH) 2021-08-05 18:33:20 -04:00
29a22ae603 Simpler SYCL setup 2021-06-22 17:57:20 +00:00
403bff1a47 Force reqd subgroup size fo SYCL 2021-06-22 17:56:10 +00:00
6cd9224dd7 SYCL comms buffer allocate 2021-06-16 17:10:55 +00:00
4bf8196ff1 Merge branch 'develop' of https://www.github.com/paboyle/Grid into develop 2021-06-15 21:45:36 +00:00
4c5440fb06 const happy for sycl 2021-06-15 21:45:07 +00:00
43943008bf Merge branch 'develop' of https://github.com/paboyle/Grid into feature/block_lanczos 2020-10-01 15:35:29 -04:00
8ee26f9112 Checking in before pulling develop 2020-10-01 15:35:03 -04:00
f91e3af97f Checking in before trying to reduce memory footprint 2020-08-08 22:11:14 -04:00
43298ef681 Make precision switchable for cublas routines 2020-07-22 14:49:32 -04:00
7e70df27e4 Confirmed double precision working 2020-07-21 00:48:46 -04:00
c55d657736 Merge branch 'dev-BlockLanczosOpt' of https://github.com/yongchull/Grid into feature/block_lanczos 2020-07-20 16:36:46 -04:00
b89b1280d5 use gemm twice to complete the Gram Schmidt 2020-03-31 05:39:31 -04:00
ac7090e6d3 block Lanczos cublas buffer is set at the inital step; buffer width is fixed to the block size then cublas Zgemm is called multiple times 2020-03-30 22:25:50 -04:00
02edbe624f first working version of Gram Schmidt using cublas gemm; explicit data type and site vector size has to be removed 2020-03-30 18:36:21 -04:00
9266b89ad8 fix rngs issue; block Lanczos is working 2020-03-25 15:45:50 -04:00
2db7e6f8ab merge manually Block Lanczos files from Chulwoo's update (last state = commit 731a05 + untracked files) to develop branch; namespace QCD is removed; FIXME: multiple starting vectors result in nan after initial orthogonalization 2020-03-24 01:03:24 -04:00
34 changed files with 3402 additions and 530 deletions

3
.gitignore vendored
View File

@ -10,6 +10,8 @@
*~
*#
*.sublime-*
.ctags
tags
# Precompiled Headers #
#######################
@ -88,7 +90,6 @@ Thumbs.db
# build directory #
###################
build*/*
Documentation/_build
# IDE related files #
#####################

View File

@ -240,12 +240,14 @@ public:
Field T0(grid); T0 = in;
Field T1(grid);
Field T2(grid);
Field Tout(grid);
Field y(grid);
Field *Tnm = &T0;
Field *Tn = &T1;
Field *Tnp = &T2;
std::cout << GridLogMessage << "Chebyshev() starts"<<std::endl;
// Tn=T1 = (xscale M + mscale)in
RealD xscale = 2.0/(hi-lo);
RealD mscale = -(hi+lo)/(hi-lo);
@ -254,7 +256,7 @@ public:
// sum = .5 c[0] T0 + c[1] T1
// out = ()*T0 + Coeffs[1]*T1;
axpby(out,0.5*Coeffs[0],Coeffs[1],T0,T1);
axpby(Tout,0.5*Coeffs[0],Coeffs[1],T0,T1);
for(int n=2;n<order;n++){
Linop.HermOp(*Tn,y);
@ -275,7 +277,7 @@ public:
axpby(y,xscale,mscale,y,(*Tn));
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
if ( Coeffs[n] != 0.0) {
axpy(out,Coeffs[n],*Tnp,out);
axpy(Tout,Coeffs[n],*Tnp,Tout);
}
#endif
// Cycle pointers to avoid copies
@ -285,6 +287,8 @@ public:
Tnp =swizzle;
}
out = Tout;
std::cout << GridLogMessage << "Chebyshev() ends"<<std::endl;
}
};
@ -377,24 +381,26 @@ public:
Field T0(grid); T0 = in;
Field T1(grid);
Field T2(grid);
Field Tout(grid);
Field y(grid);
Field *Tnm = &T0;
Field *Tn = &T1;
Field *Tnp = &T2;
std::cout << GridLogMessage << "ChebyshevLanczos() starts"<<std::endl;
// Tn=T1 = (xscale M )*in
AminusMuSq(Linop,T0,T1);
// sum = .5 c[0] T0 + c[1] T1
out = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
Tout = (0.5*Coeffs[0])*T0 + Coeffs[1]*T1;
for(int n=2;n<order;n++){
AminusMuSq(Linop,*Tn,y);
*Tnp=2.0*y-(*Tnm);
out=out+Coeffs[n]* (*Tnp);
Tout=Tout+Coeffs[n]* (*Tnp);
// Cycle pointers to avoid copies
Field *swizzle = Tnm;
@ -403,6 +409,8 @@ public:
Tnp =swizzle;
}
out=Tout;
std::cout << GridLogMessage << "ChebyshevLanczos() ends"<<std::endl;
}
};
NAMESPACE_END(Grid);

File diff suppressed because it is too large Load Diff

View File

@ -44,7 +44,7 @@ void MemoryManager::AcceleratorFree (void *ptr,size_t bytes)
if ( __freeme ) {
acceleratorFreeDevice(__freeme);
total_device-=bytes;
// PrintBytes();
// PrintBytes();
}
}
void *MemoryManager::SharedAllocate(size_t bytes)
@ -53,8 +53,8 @@ void *MemoryManager::SharedAllocate(size_t bytes)
if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocShared(bytes);
total_shared+=bytes;
// std::cout <<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
// PrintBytes();
std::cout <<GridLogMessage<<"AcceleratorAllocate: allocated Shared pointer "<<std::hex<<ptr<<std::dec<<std::endl;
// PrintBytes();
}
return ptr;
}
@ -74,6 +74,7 @@ void *MemoryManager::CpuAllocate(size_t bytes)
if ( ptr == (void *) NULL ) {
ptr = (void *) acceleratorAllocShared(bytes);
total_host+=bytes;
// std::cout << GridLogMessage<< "MemoryManager:: CpuAllocate total_host= "<<total_host<<" "<< ptr << std::endl;
}
return ptr;
}
@ -83,6 +84,7 @@ void MemoryManager::CpuFree (void *_ptr,size_t bytes)
void *__freeme = Insert(_ptr,bytes,Cpu);
if ( __freeme ) {
acceleratorFreeShared(__freeme);
// std::cout << GridLogMessage<< "MemoryManager:: CpuFree total_host= "<<total_host<<" "<< __freeme << std::endl;
total_host-=bytes;
}
}

View File

@ -35,6 +35,9 @@ Author: Christoph Lehner <christoph@lhnr.de>
#endif
#ifdef GRID_HIP
#include <hip/hip_runtime_api.h>
#endif
#ifdef GRID_SYCl
#endif
NAMESPACE_BEGIN(Grid);
@ -70,6 +73,7 @@ void GlobalSharedMemory::Init(Grid_MPI_Comm comm)
WorldNodes = WorldSize/WorldShmSize;
assert( (WorldNodes * WorldShmSize) == WorldSize );
// FIXME: Check all WorldShmSize are the same ?
/////////////////////////////////////////////////////////////////////
@ -446,7 +450,47 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
////////////////////////////////////////////////////////////////////////////////////////////
// Hugetlbfs mapping intended
////////////////////////////////////////////////////////////////////////////////////////////
#if defined(GRID_CUDA) ||defined(GRID_HIP)
#if defined(GRID_CUDA) ||defined(GRID_HIP) || defined(GRID_SYCL)
//if defined(GRID_SYCL)
#if 0
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
assert(_ShmSetup==1);
assert(_ShmAlloc==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////////
// allocate the pointer array for shared windows for our group
//////////////////////////////////////////////////////////////////////////////////////////////////////////
MPI_Barrier(WorldShmComm);
WorldShmCommBufs.resize(WorldShmSize);
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
ShmCommBuf = acceleratorAllocDevice(bytes);
if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE);
}
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
SharedMemoryZero(ShmCommBuf,bytes);
assert(WorldShmSize == 1);
for(int r=0;r<WorldShmSize;r++){
WorldShmCommBufs[r] = ShmCommBuf;
}
_ShmAllocBytes=bytes;
_ShmAlloc=1;
}
#endif
#if defined(GRID_CUDA) ||defined(GRID_HIP) ||defined(GRID_SYCL)
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
{
void * ShmCommBuf ;
@ -469,8 +513,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Each MPI rank should allocate our own buffer
///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
auto zeDevice = cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_device());
auto zeContext= cl::sycl::get_native<cl::sycl::backend::level_zero>(theGridAccelerator->get_context());
ze_device_mem_alloc_desc_t zeDesc = {};
zeMemAllocDevice(zeContext,&zeDesc,bytes,2*1024*1024,zeDevice,&ShmCommBuf);
std::cout << WorldRank << header " SharedMemoryMPI.cc zeMemAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
#else
ShmCommBuf = acceleratorAllocDevice(bytes);
#endif
if (ShmCommBuf == (void *)NULL ) {
std::cerr << " SharedMemoryMPI.cc acceleratorAllocDevice failed NULL pointer for " << bytes<<" bytes " << std::endl;
exit(EXIT_FAILURE);
@ -480,8 +532,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
}
SharedMemoryZero(ShmCommBuf,bytes);
// SharedMemoryZero(ShmCommBuf,bytes);
std::cout<< "Setting up IPC"<<std::endl;
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Loop over ranks/gpu's on our node
///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -491,6 +543,23 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
//////////////////////////////////////////////////
// If it is me, pass around the IPC access key
//////////////////////////////////////////////////
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
ze_ipc_mem_handle_t handle;
if ( r==WorldShmRank ) {
auto err = zeMemGetIpcHandle(zeContext,ShmCommBuf,&handle);
if ( err != ZE_RESULT_SUCCESS ) {
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cerr << "SharedMemoryMPI.cc zeMemGetIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
std::cerr<<"Allocated IpcHandle rank "<<r<<" (hex) ";
for(int c=0;c<ZE_MAX_IPC_HANDLE_SIZE;c++){
std::cerr<<std::hex<<(uint32_t)((uint8_t)handle.data[c])<<std::dec;
}
std::cerr<<std::endl;
}
#endif
#ifdef GRID_CUDA
cudaIpcMemHandle_t handle;
if ( r==WorldShmRank ) {
@ -527,6 +596,25 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// If I am not the source, overwrite thisBuf with remote buffer
///////////////////////////////////////////////////////////////
void * thisBuf = ShmCommBuf;
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
if ( r!=WorldShmRank ) {
thisBuf = nullptr;
std::cerr<<"Using IpcHandle rank "<<r<<" ";
for(int c=0;c<ZE_MAX_IPC_HANDLE_SIZE;c++){
std::cerr<<std::hex<<(uint32_t)((uint8_t)handle.data[c])<<std::dec;
}
std::cerr<<std::endl;
auto err = zeMemOpenIpcHandle(zeContext,zeDevice,handle,0,&thisBuf);
if ( err != ZE_RESULT_SUCCESS ) {
std::cerr << "SharedMemoryMPI.cc "<<zeContext<<" "<<zeDevice<<std::endl;
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle failed for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
exit(EXIT_FAILURE);
} else {
std::cerr << "SharedMemoryMPI.cc zeMemOpenIpcHandle succeeded for rank "<<r<<" "<<std::hex<<err<<std::dec<<std::endl;
}
assert(thisBuf!=nullptr);
}
#endif
#ifdef GRID_CUDA
if ( r!=WorldShmRank ) {
auto err = cudaIpcOpenMemHandle(&thisBuf,handle,cudaIpcMemLazyEnablePeerAccess);
@ -557,6 +645,8 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
_ShmAllocBytes=bytes;
_ShmAlloc=1;
}
#endif
#else
#ifdef GRID_MPI3_SHMMMAP
void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
@ -727,16 +817,16 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
/////////////////////////////////////////////////////////////////////////
void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
{
#ifdef GRID_CUDA
cudaMemset(dest,0,bytes);
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
acceleratorMemSet(dest,0,bytes);
#else
bzero(dest,bytes);
#endif
}
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
{
#ifdef GRID_CUDA
cudaMemcpy(dest,src,bytes,cudaMemcpyDefault);
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
acceleratorCopyToDevice(src,dest,bytes);
#else
bcopy(src,dest,bytes);
#endif
@ -800,7 +890,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
}
#endif
SharedMemoryTest();
//SharedMemoryTest();
}
//////////////////////////////////////////////////////////////////
// On node barrier

View File

@ -198,7 +198,7 @@ public:
std::cerr << " nersc_csum " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl;
exit(0);
}
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 );
assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-1 );
assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 );
assert(nersc_csum == header.checksum );

View File

@ -1,35 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/serialisation/BaseIO.h
Copyright (C) 2015
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/GridCore.h>
NAMESPACE_BEGIN(Grid)
std::uint64_t EigenIO::EigenResizeCounter(0);
NAMESPACE_END(Grid)

View File

@ -9,7 +9,6 @@
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -31,7 +30,6 @@ Author: Michael Marshall <michael.marshall@ed.ac.uk>
#ifndef GRID_SERIALISATION_ABSTRACT_READER_H
#define GRID_SERIALISATION_ABSTRACT_READER_H
#include <atomic>
#include <type_traits>
#include <Grid/tensors/Tensors.h>
#include <Grid/serialisation/VectorUtils.h>
@ -112,10 +110,6 @@ namespace Grid {
template <typename ET>
inline typename std::enable_if<is_tensor_of_container<ET>::value, typename Traits<ET>::scalar_type *>::type
getFirstScalar(ET &eigenTensor) { return eigenTensor.data()->begin(); }
// Counter for resized EigenTensors (poor man's substitute for allocator)
// Defined in BinaryIO.cc
extern std::uint64_t EigenResizeCounter;
}
// Abstract writer/reader classes ////////////////////////////////////////////
@ -503,14 +497,8 @@ namespace Grid {
typename std::enable_if<EigenIO::is_tensor_variable<ETensor>::value, void>::type
Reader<T>::Reshape(ETensor &t, const std::array<typename ETensor::Index, ETensor::NumDimensions> &dims )
{
#ifdef GRID_OMP
// The memory counter is the reason this must be done from the primary thread
assert(omp_in_parallel()==0 && "Deserialisation which resizes Eigen tensor must happen from primary thread");
#endif
EigenIO::EigenResizeCounter -= static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
//t.reshape( dims );
t.resize( dims );
EigenIO::EigenResizeCounter += static_cast<uint64_t>(t.size()) * sizeof(typename ETensor::Scalar);
}
template <typename T>

View File

@ -1,34 +1,3 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/serialisation/VectorUtils.h
Copyright (C) 2015
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;

View File

@ -1,34 +1,3 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./Grid/serialisation/VectorUtils.h
Copyright (C) 2015
Author: Peter Boyle <paboyle@ed.ac.uk>
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#ifndef GRID_SERIALISATION_HDF5_H
#define GRID_SERIALISATION_HDF5_H
@ -65,13 +34,11 @@ namespace Grid
template <typename U>
void writeDefault(const std::string &s, const U &x);
template <typename U>
void writeRagged(const std::string &s, const std::vector<U> &x);
template <typename U>
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
writeDefault(const std::string &s, const std::vector<U> &x);
template <typename U>
typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
writeDefault(const std::string &s, const std::vector<U> &x) { writeRagged(s, x); }
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
writeDefault(const std::string &s, const std::vector<U> &x);
template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
H5NS::Group & getGroup(void);
@ -97,13 +64,11 @@ namespace Grid
template <typename U>
void readDefault(const std::string &s, U &output);
template <typename U>
void readRagged(const std::string &s, std::vector<U> &x);
template <typename U>
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
readDefault(const std::string &s, std::vector<U> &x);
template <typename U>
typename std::enable_if<!is_flattenable<std::vector<U>>::value>::type
readDefault(const std::string &s, std::vector<U> &x) { readRagged(s, x); }
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
readDefault(const std::string &s, std::vector<U> &x);
template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
H5NS::Group & getGroup(void);
@ -211,30 +176,24 @@ namespace Grid
}
template <typename U>
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
{
if (isRegularShape(x))
{
// alias to element type
using Scalar = typename is_flattenable<std::vector<U>>::type;
// flatten the vector and getting dimensions
Flatten<std::vector<U>> flat(x);
std::vector<size_t> dim;
const auto &flatx = flat.getFlatVector();
for (auto &d: flat.getDim())
dim.push_back(d);
writeMultiDim<Scalar>(s, dim, &flatx[0], flatx.size());
}
else
{
writeRagged(s, x);
}
// alias to element type
typedef typename element<std::vector<U>>::type Element;
// flatten the vector and getting dimensions
Flatten<std::vector<U>> flat(x);
std::vector<size_t> dim;
const auto &flatx = flat.getFlatVector();
for (auto &d: flat.getDim())
dim.push_back(d);
writeMultiDim<Element>(s, dim, &flatx[0], flatx.size());
}
template <typename U>
void Hdf5Writer::writeRagged(const std::string &s, const std::vector<U> &x)
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
Hdf5Writer::writeDefault(const std::string &s, const std::vector<U> &x)
{
push(s);
writeSingleAttribute(x.size(), HDF5_GRID_GUARD "vector_size",
@ -270,7 +229,7 @@ namespace Grid
void Hdf5Reader::readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim)
{
// alias to element type
using Scalar = typename is_flattenable<std::vector<U>>::type;
typedef typename element<std::vector<U>>::type Element;
// read the dimensions
H5NS::DataSpace dataSpace;
@ -301,44 +260,37 @@ namespace Grid
H5NS::DataSet dataSet;
dataSet = group_.openDataSet(s);
dataSet.read(buf.data(), Hdf5Type<Scalar>::type());
dataSet.read(buf.data(), Hdf5Type<Element>::type());
}
else
{
H5NS::Attribute attribute;
attribute = group_.openAttribute(s);
attribute.read(Hdf5Type<Scalar>::type(), buf.data());
attribute.read(Hdf5Type<Element>::type(), buf.data());
}
}
template <typename U>
typename std::enable_if<is_flattenable<std::vector<U>>::value>::type
typename std::enable_if<element<std::vector<U>>::is_number, void>::type
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
{
if (H5Lexists (group_.getId(), s.c_str(), H5P_DEFAULT) > 0
&& H5Aexists_by_name(group_.getId(), s.c_str(), HDF5_GRID_GUARD "vector_size", H5P_DEFAULT ) > 0)
{
readRagged(s, x);
}
else
{
// alias to element type
using Scalar = typename is_flattenable<std::vector<U>>::type;
// alias to element type
typedef typename element<std::vector<U>>::type Element;
std::vector<size_t> dim;
std::vector<Scalar> buf;
readMultiDim( s, buf, dim );
std::vector<size_t> dim;
std::vector<Element> buf;
readMultiDim( s, buf, dim );
// reconstruct the multidimensional vector
Reconstruct<std::vector<U>> r(buf, dim);
x = r.getVector();
}
// reconstruct the multidimensional vector
Reconstruct<std::vector<U>> r(buf, dim);
x = r.getVector();
}
template <typename U>
void Hdf5Reader::readRagged(const std::string &s, std::vector<U> &x)
typename std::enable_if<!element<std::vector<U>>::is_number, void>::type
Hdf5Reader::readDefault(const std::string &s, std::vector<U> &x)
{
uint64_t size;

View File

@ -118,13 +118,13 @@ static inline std::string SerialisableClassName(void) {return std::string(#cname
static constexpr bool isEnum = false; \
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\
template <typename T>\
static inline void write(::Grid::Writer<T> &WR,const std::string &s, const cname &obj){ \
static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \
push(WR,s);\
GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_WRITE_MEMBER,__VA_ARGS__)) \
pop(WR);\
}\
template <typename T>\
static inline void read(::Grid::Reader<T> &RD,const std::string &s, cname &obj){ \
static inline void read(Reader<T> &RD,const std::string &s, cname &obj){ \
if (!push(RD,s))\
{\
std::cout << ::Grid::GridLogWarning << "IO: Cannot open node '" << s << "'" << std::endl; \

View File

@ -9,8 +9,7 @@
Author: Antonin Portelli <antonin.portelli@me.com>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Michael Marshall <michael.marshall@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
@ -237,36 +236,21 @@ namespace Grid {
}
}
// is_flattenable<T>::value is true if T is a std::vector<> which can be flattened //////////////////////
template <typename T, typename V = void>
struct is_flattenable : std::false_type
{
using type = T;
using grid_type = T;
static constexpr int vecRank = 0;
static constexpr bool isGridTensor = false;
static constexpr bool children_flattenable = std::is_arithmetic<T>::value or is_complex<T>::value;
};
// Vector element trait //////////////////////////////////////////////////////
template <typename T>
struct is_flattenable<T, typename std::enable_if<isGridTensor<T>::value>::type> : std::false_type
struct element
{
using type = typename GridTypeMapper<T>::scalar_type;
using grid_type = T;
static constexpr int vecRank = 0;
static constexpr bool isGridTensor = true;
static constexpr bool children_flattenable = true;
typedef T type;
static constexpr bool is_number = false;
};
template <typename T>
struct is_flattenable<std::vector<T>, typename std::enable_if<is_flattenable<T>::children_flattenable>::type>
: std::true_type
struct element<std::vector<T>>
{
using type = typename is_flattenable<T>::type;
using grid_type = typename is_flattenable<T>::grid_type;
static constexpr bool isGridTensor = is_flattenable<T>::isGridTensor;
static constexpr int vecRank = is_flattenable<T>::vecRank + 1;
static constexpr bool children_flattenable = true;
typedef typename element<T>::type type;
static constexpr bool is_number = std::is_arithmetic<T>::value
or is_complex<T>::value
or element<T>::is_number;
};
// Vector flattening utility class ////////////////////////////////////////////
@ -275,30 +259,23 @@ namespace Grid {
class Flatten
{
public:
using Scalar = typename is_flattenable<V>::type;
static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
typedef typename element<V>::type Element;
public:
explicit Flatten(const V &vector);
const V & getVector(void) const { return vector_; }
const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
const std::vector<size_t> & getDim(void) const { return dim_; }
explicit Flatten(const V &vector);
const V & getVector(void);
const std::vector<Element> & getFlatVector(void);
const std::vector<size_t> & getDim(void);
private:
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
accumulate(const W &e);
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
accumulate(const W &e);
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
accumulate(const W &v);
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
accumulateDim(const W &e) {} // Innermost is a scalar - do nothing
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
accumulateDim(const W &e);
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
accumulateDim(const W &v);
void accumulate(const Element &e);
template <typename W>
void accumulate(const W &v);
void accumulateDim(const Element &e);
template <typename W>
void accumulateDim(const W &v);
private:
const V &vector_;
std::vector<Scalar> flatVector_;
std::vector<size_t> dim_;
const V &vector_;
std::vector<Element> flatVector_;
std::vector<size_t> dim_;
};
// Class to reconstruct a multidimensional std::vector
@ -306,57 +283,38 @@ namespace Grid {
class Reconstruct
{
public:
using Scalar = typename is_flattenable<V>::type;
static constexpr bool isGridTensor = is_flattenable<V>::isGridTensor;
typedef typename element<V>::type Element;
public:
Reconstruct(const std::vector<Scalar> &flatVector,
Reconstruct(const std::vector<Element> &flatVector,
const std::vector<size_t> &dim);
const V & getVector(void) const { return vector_; }
const std::vector<Scalar> & getFlatVector(void) const { return flatVector_; }
const std::vector<size_t> & getDim(void) const { return dim_; }
const V & getVector(void);
const std::vector<Element> & getFlatVector(void);
const std::vector<size_t> & getDim(void);
private:
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
fill(W &v);
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
fill(W &v);
template <typename W> typename std::enable_if< is_flattenable<W>::value>::type
fill(W &v);
template <typename W> typename std::enable_if< is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
resize(W &v, const unsigned int dim);
template <typename W> typename std::enable_if< is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
resize(W &v, const unsigned int dim);
template <typename W> typename std::enable_if<!is_flattenable<W>::isGridTensor>::type
checkInnermost(const W &e) {} // Innermost is a scalar - do nothing
template <typename W> typename std::enable_if< is_flattenable<W>::isGridTensor>::type
checkInnermost(const W &e);
void fill(std::vector<Element> &v);
template <typename W>
void fill(W &v);
void resize(std::vector<Element> &v, const unsigned int dim);
template <typename W>
void resize(W &v, const unsigned int dim);
private:
V vector_;
const std::vector<Scalar> &flatVector_;
std::vector<size_t> dim_;
size_t ind_{0};
unsigned int dimInd_{0};
V vector_;
const std::vector<Element> &flatVector_;
std::vector<size_t> dim_;
size_t ind_{0};
unsigned int dimInd_{0};
};
// Flatten class template implementation
template <typename V>
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
Flatten<V>::accumulate(const W &e)
void Flatten<V>::accumulate(const Element &e)
{
flatVector_.push_back(e);
}
template <typename V>
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
Flatten<V>::accumulate(const W &e)
{
for (const Scalar &x: e) {
flatVector_.push_back(x);
}
}
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
Flatten<V>::accumulate(const W &v)
template <typename W>
void Flatten<V>::accumulate(const W &v)
{
for (auto &e: v)
{
@ -365,17 +323,11 @@ namespace Grid {
}
template <typename V>
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
Flatten<V>::accumulateDim(const W &e)
{
using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
for (int rank=0; rank < Traits::Rank; ++rank)
dim_.push_back(Traits::Dimension(rank));
}
void Flatten<V>::accumulateDim(const Element &e) {};
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
Flatten<V>::accumulateDim(const W &v)
template <typename W>
void Flatten<V>::accumulateDim(const W &v)
{
dim_.push_back(v.size());
accumulateDim(v[0]);
@ -385,36 +337,42 @@ namespace Grid {
Flatten<V>::Flatten(const V &vector)
: vector_(vector)
{
accumulateDim(vector_);
std::size_t TotalSize{ dim_[0] };
for (int i = 1; i < dim_.size(); ++i) {
TotalSize *= dim_[i];
}
flatVector_.reserve(TotalSize);
accumulate(vector_);
accumulateDim(vector_);
}
template <typename V>
const V & Flatten<V>::getVector(void)
{
return vector_;
}
template <typename V>
const std::vector<typename Flatten<V>::Element> &
Flatten<V>::getFlatVector(void)
{
return flatVector_;
}
template <typename V>
const std::vector<size_t> & Flatten<V>::getDim(void)
{
return dim_;
}
// Reconstruct class template implementation
template <typename V>
template <typename W> typename std::enable_if<!is_flattenable<W>::value && !is_flattenable<W>::isGridTensor>::type
Reconstruct<V>::fill(W &v)
{
v = flatVector_[ind_++];
}
template <typename V>
template <typename W> typename std::enable_if<!is_flattenable<W>::value && is_flattenable<W>::isGridTensor>::type
Reconstruct<V>::fill(W &v)
void Reconstruct<V>::fill(std::vector<Element> &v)
{
for (auto &e: v)
{
e = flatVector_[ind_++];
}
}
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::value>::type
Reconstruct<V>::fill(W &v)
template <typename W>
void Reconstruct<V>::fill(W &v)
{
for (auto &e: v)
{
@ -423,15 +381,14 @@ namespace Grid {
}
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::value && is_flattenable<W>::vecRank==1>::type
Reconstruct<V>::resize(W &v, const unsigned int dim)
void Reconstruct<V>::resize(std::vector<Element> &v, const unsigned int dim)
{
v.resize(dim_[dim]);
}
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::value && (is_flattenable<W>::vecRank>1)>::type
Reconstruct<V>::resize(W &v, const unsigned int dim)
template <typename W>
void Reconstruct<V>::resize(W &v, const unsigned int dim)
{
v.resize(dim_[dim]);
for (auto &e: v)
@ -441,31 +398,34 @@ namespace Grid {
}
template <typename V>
template <typename W> typename std::enable_if<is_flattenable<W>::isGridTensor>::type
Reconstruct<V>::checkInnermost(const W &)
{
using Traits = GridTypeMapper<typename is_flattenable<W>::grid_type>;
const int gridRank{Traits::Rank};
const int dimRank{static_cast<int>(dim_.size())};
assert(dimRank >= gridRank && "Tensor rank too low for Grid tensor");
for (int i=0; i<gridRank; ++i) {
assert(dim_[dimRank - gridRank + i] == Traits::Dimension(i) && "Tensor dimension doesn't match Grid tensor");
}
dim_.resize(dimRank - gridRank);
}
template <typename V>
Reconstruct<V>::Reconstruct(const std::vector<Scalar> &flatVector,
Reconstruct<V>::Reconstruct(const std::vector<Element> &flatVector,
const std::vector<size_t> &dim)
: flatVector_(flatVector)
, dim_(dim)
{
checkInnermost(vector_);
assert(dim_.size() == is_flattenable<V>::vecRank && "Tensor rank doesn't match nested std::vector rank");
resize(vector_, 0);
fill(vector_);
}
template <typename V>
const V & Reconstruct<V>::getVector(void)
{
return vector_;
}
template <typename V>
const std::vector<typename Reconstruct<V>::Element> &
Reconstruct<V>::getFlatVector(void)
{
return flatVector_;
}
template <typename V>
const std::vector<size_t> & Reconstruct<V>::getDim(void)
{
return dim_;
}
// Vector IO utilities ///////////////////////////////////////////////////////
// helper function to read space-separated values
template <typename T>
@ -499,64 +459,6 @@ namespace Grid {
return os;
}
// In general, scalar types are considered "flattenable" (regularly shaped)
template <typename T>
bool isRegularShapeHelper(const std::vector<T> &, std::vector<std::size_t> &, int, bool)
{
return true;
}
template <typename T>
bool isRegularShapeHelper(const std::vector<std::vector<T>> &v, std::vector<std::size_t> &Dims, int Depth, bool bFirst)
{
if( bFirst)
{
assert( Dims.size() == Depth && "Bug: Delete this message after testing" );
Dims.push_back(v[0].size());
if (!Dims[Depth])
return false;
}
else
{
assert( Dims.size() >= Depth + 1 && "Bug: Delete this message after testing" );
}
for (std::size_t i = 0; i < v.size(); ++i)
{
if (v[i].size() != Dims[Depth] || !isRegularShapeHelper(v[i], Dims, Depth + 1, bFirst && i==0))
{
return false;
}
}
return true;
}
template <typename T>
bool isRegularShape(const T &t) { return true; }
template <typename T>
bool isRegularShape(const std::vector<T> &v) { return !v.empty(); }
// Return non-zero if all dimensions of this std::vector<std::vector<T>> are regularly shaped
template <typename T>
bool isRegularShape(const std::vector<std::vector<T>> &v)
{
if (v.empty() || v[0].empty())
return false;
// Make sure all of my rows are the same size
std::vector<std::size_t> Dims;
Dims.reserve(is_flattenable<T>::vecRank);
Dims.push_back(v.size());
Dims.push_back(v[0].size());
for (std::size_t i = 0; i < Dims[0]; ++i)
{
if (v[i].size() != Dims[1] || !isRegularShapeHelper(v[i], Dims, 2, i==0))
{
return false;
}
}
return true;
}
}
// helper function to read space-separated values

View File

@ -417,7 +417,7 @@ public:
stream << "{";
for (int j = 0; j < N; j++) {
stream << o._internal[i][j];
if (j < N - 1) stream << ",";
if (i < N - 1) stream << ",";
}
stream << "}";
if (i != N - 1) stream << "\n\t\t";

View File

@ -1,5 +1,5 @@
/*************************************************************************************
n
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/tensors/Tensor_extract_merge.h
@ -153,7 +153,7 @@ void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_ob
// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
////////////////////////////////////////////////////////////////////////
template<class vobj, class sobj> accelerator
void extract(const vobj &vec,ExtractPointerArray<sobj> &extracted, int offset)
void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
{
typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
typedef typename GridTypeMapper<vobj>::scalar_type scalar_type;
@ -181,7 +181,7 @@ void extract(const vobj &vec,ExtractPointerArray<sobj> &extracted, int offset)
// Merge bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
////////////////////////////////////////////////////////////////////////
template<class vobj, class sobj> accelerator
void merge(vobj &vec,ExtractPointerArray<sobj> &extracted, int offset)
void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
{
typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
typedef typename GridTypeMapper<vobj>::scalar_type scalar_type;

View File

@ -171,7 +171,6 @@ void acceleratorInit(void)
#ifdef GRID_SYCL
cl::sycl::queue *theGridAccelerator;
void acceleratorInit(void)
{
int nDevices = 1;
@ -179,6 +178,10 @@ void acceleratorInit(void)
cl::sycl::device selectedDevice { selector };
theGridAccelerator = new sycl::queue (selectedDevice);
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
zeInit(0);
#endif
char * localRankStr = NULL;
int rank = 0, world_rank=0;
#define ENV_LOCAL_RANK_OMPI "OMPI_COMM_WORLD_LOCAL_RANK"

View File

@ -39,6 +39,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#ifdef HAVE_MM_MALLOC_H
#include <mm_malloc.h>
#endif
#ifdef __APPLE__
// no memalign
inline void *memalign(size_t align, size_t bytes) { return malloc(bytes); }
#endif
NAMESPACE_BEGIN(Grid);
@ -194,6 +198,9 @@ inline void *acceleratorAllocShared(size_t bytes)
ptr = (void *) NULL;
printf(" cudaMallocManaged failed for %d %s \n",bytes,cudaGetErrorString(err));
}
size_t free,total;
cudaMemGetInfo(&free,&total);
std::cout<<"cudaMemGetInfo "<<free<<" / "<<total<<std::endl;
return ptr;
};
inline void *acceleratorAllocDevice(size_t bytes)
@ -204,6 +211,9 @@ inline void *acceleratorAllocDevice(size_t bytes)
ptr = (void *) NULL;
printf(" cudaMalloc failed for %d %s \n",bytes,cudaGetErrorString(err));
}
size_t free,total;
cudaMemGetInfo(&free,&total);
std::cout<<"cudaMemGetInfo "<<free<<" / "<<total<<std::endl;
return ptr;
};
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
@ -233,6 +243,13 @@ inline int acceleratorIsCommunicable(void *ptr)
NAMESPACE_END(Grid);
#include <CL/sycl.hpp>
#include <CL/sycl/usm.hpp>
#define GRID_SYCL_LEVEL_ZERO_IPC
#ifdef GRID_SYCL_LEVEL_ZERO_IPC
#include <level_zero/ze_api.h>
#include <CL/sycl/backend/level_zero.hpp>
#endif
NAMESPACE_BEGIN(Grid);
extern cl::sycl::queue *theGridAccelerator;
@ -257,11 +274,14 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
unsigned long nt=acceleratorThreads(); \
unsigned long unum1 = num1; \
unsigned long unum2 = num2; \
if(nt < 8)nt=8; \
cl::sycl::range<3> local {nt,1,nsimd}; \
cl::sycl::range<3> global{unum1,unum2,nsimd}; \
cgh.parallel_for<class dslash>( \
cl::sycl::nd_range<3>(global,local), \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ { \
[=] (cl::sycl::nd_item<3> item) /*mutable*/ \
[[intel::reqd_sub_group_size(8)]] \
{ \
auto iter1 = item.get_global_id(0); \
auto iter2 = item.get_global_id(1); \
auto lane = item.get_global_id(2); \
@ -409,6 +429,8 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
#undef GRID_SIMT
#define accelerator
#define accelerator_inline strong_inline
#define accelerator_for(iterator,num,nsimd, ... ) thread_for(iterator, num, { __VA_ARGS__ });

View File

@ -56,6 +56,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
static int
feenableexcept (unsigned int excepts)
{
#if 0
// Fails on Apple M1
static fenv_t fenv;
unsigned int new_excepts = excepts & FE_ALL_EXCEPT;
unsigned int old_excepts; // previous masks
@ -70,6 +72,8 @@ feenableexcept (unsigned int excepts)
iold_excepts = (int) old_excepts;
return ( fesetenv (&fenv) ? -1 : iold_excepts );
#endif
return 0;
}
#endif
@ -163,6 +167,13 @@ void GridCmdOptionInt(std::string &str,int & val)
return;
}
// ypj [add]
void GridCmdOptionFloat(std::string &str,double & val)
{
std::stringstream ss(str);
ss>>val;
return;
}
void GridParseLayout(char **argv,int argc,
Coordinate &latt_c,

View File

@ -57,7 +57,8 @@ void GridCmdOptionCSL(std::string str,std::vector<std::string> & vec);
template<class VectorInt>
void GridCmdOptionIntVector(const std::string &str,VectorInt & vec);
void GridCmdOptionInt(std::string &str,int & val);
// ypj [add]
void GridCmdOptionFloat(std::string &str,double & val);
void GridParseLayout(char **argv,int argc,
std::vector<int> &latt,

View File

@ -1,5 +1,5 @@
# additional include paths necessary to compile the C++ library
SUBDIRS = Grid HMC benchmarks tests
SUBDIRS = Grid HMC benchmarks tests examples
include $(top_srcdir)/doxygen.inc

View File

@ -133,34 +133,30 @@ public:
std::vector<HalfSpinColourVectorD *> xbuf(8);
std::vector<HalfSpinColourVectorD *> rbuf(8);
Grid.ShmBufferFreeAll();
//Grid.ShmBufferFreeAll();
uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
for(int d=0;d<8;d++){
xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
xbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
rbuf[d] = (HalfSpinColourVectorD *)acceleratorAllocDevice(bytes);
// bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
// bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
}
int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
int ncomm;
double dbytes;
std::vector<double> times(Nloop);
for(int i=0;i<Nloop;i++){
double start=usecond();
for(int dir=0;dir<8;dir++) {
int mu =dir % 4;
if (mpi_layout[mu]>1 ) {
dbytes=0;
ncomm=0;
std::vector<double> times(Nloop);
for(int i=0;i<Nloop;i++){
thread_for(dir,8,{
double tbytes;
int mu =dir % 4;
if (mpi_layout[mu]>1 ) {
dbytes=0;
double start=usecond();
int xmit_to_rank;
int recv_from_rank;
if ( dir == mu ) {
int comm_proc=1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
@ -168,40 +164,38 @@ public:
int comm_proc = mpi_layout[mu]-1;
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
}
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank,
bytes,dir);
thread_critical {
ncomm++;
dbytes+=tbytes;
}
Grid.SendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
(void *)&rbuf[dir][0], recv_from_rank,
bytes);
dbytes+=bytes;
double stop=usecond();
t_time[i] = stop-start; // microseconds
}
});
Grid.Barrier();
double stop=usecond();
t_time[i] = stop-start; // microseconds
timestat.statistics(t_time);
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
<< bytes << " \t "
<<xbytes/timestat.mean<<" \t "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " \t "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
timestat.statistics(t_time);
dbytes=dbytes*ppn;
double xbytes = dbytes*0.5;
// double rbytes = dbytes*0.5;
double bidibytes = dbytes;
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
<< bytes << " \t "
<<xbytes/timestat.mean<<" \t "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " \t "
<<xbytes/timestat.max <<" "<< xbytes/timestat.min
<< "\t\t"<< bidibytes/timestat.mean<< " " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " "
<< bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl;
}
}
for(int d=0;d<8;d++){
acceleratorFreeDevice(xbuf[d]);
acceleratorFreeDevice(rbuf[d]);
}
}
}
return;
}
static void Memory(void)
@ -281,7 +275,6 @@ public:
uint64_t lmax=32;
#define NLOOP (1000*lmax*lmax*lmax*lmax/lat/lat/lat/lat)
GridSerialRNG sRNG; sRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
for(int lat=8;lat<=lmax;lat+=8){
@ -445,7 +438,7 @@ public:
// 1344= 3*(2*8+6)*2*8 + 8*3*2*2 + 3*4*2*8
// 1344 = Nc* (6+(Nc-1)*8)*2*Nd + Nd*Nc*2*2 + Nd*Nc*Ns*2
// double flops=(1344.0*volume)/2;
#if 1
#if 0
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + Nd*Nc*Ns + Nd*Nc*Ns*2;
#else
double fps = Nc* (6+(Nc-1)*8)*Ns*Nd + 2*Nd*Nc*Ns + 2*Nd*Nc*Ns*2;
@ -716,12 +709,12 @@ int main (int argc, char ** argv)
if ( do_su4 ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Memory benchmark " <<std::endl;
std::cout<<GridLogMessage << " SU(4) benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
Benchmark::SU4();
}
if ( do_comms && (NN>1) ) {
if ( do_comms ) {
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;
std::cout<<GridLogMessage << " Communications benchmark " <<std::endl;
std::cout<<GridLogMessage << "=================================================================================="<<std::endl;

View File

@ -815,6 +815,7 @@ AC_CONFIG_FILES(tests/smearing/Makefile)
AC_CONFIG_FILES(tests/qdpxx/Makefile)
AC_CONFIG_FILES(tests/testu01/Makefile)
AC_CONFIG_FILES(benchmarks/Makefile)
AC_CONFIG_FILES(examples/Makefile)
AC_OUTPUT
echo ""

Binary file not shown.

View File

@ -1787,7 +1787,7 @@ Hdf5Writer Hdf5Reader HDF5
Write interfaces, similar to the XML facilities in QDP++ are presented. However,
the serialisation routines are automatically generated by the macro, and a virtual
reader and writer interface enables writing to any of a number of formats.
reader adn writer interface enables writing to any of a number of formats.
**Example**::
@ -1814,91 +1814,6 @@ reader and writer interface enables writing to any of a number of formats.
}
Eigen tensor support -- added 2019H1
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The Serialisation library was expanded in 2019 to support de/serialisation of
Eigen tensors. De/serialisation of existing types was not changed. Data files
without Eigen tensors remain compatible with earlier versions of Grid and other readers.
Conversely, data files containing serialised Eigen tensors is a breaking change.
Eigen tensor serialisation support was added to BaseIO, which was modified to provide a Traits class
to recognise Eigen tensors with elements that are either: primitive scalars (arithmetic and complex types);
or Grid tensors.
**Traits determining de/serialisable scalars**::
// Is this an Eigen tensor
template<typename T> struct is_tensor : std::integral_constant<bool,
std::is_base_of<Eigen::TensorBase<T, Eigen::ReadOnlyAccessors>, T>::value> {};
// Is this an Eigen tensor of a supported scalar
template<typename T, typename V = void> struct is_tensor_of_scalar : public std::false_type {};
template<typename T> struct is_tensor_of_scalar<T, typename std::enable_if<is_tensor<T>::value && is_scalar<typename T::Scalar>::value>::type> : public std::true_type {};
// Is this an Eigen tensor of a supported container
template<typename T, typename V = void> struct is_tensor_of_container : public std::false_type {};
template<typename T> struct is_tensor_of_container<T, typename std::enable_if<is_tensor<T>::value && isGridTensor<typename T::Scalar>::value>::type> : public std::true_type {};
Eigen tensors are regular, multidimensional objects, and each Reader/Writer
was extended to support this new datatype. Where the Eigen tensor contains
a Grid tensor, the dimensions of the data written are the dimensions of the
Eigen tensor plus the dimensions of the underlying Grid scalar. Dimensions
of size 1 are preserved.
**New Reader/Writer methods for multi-dimensional data**::
template <typename U>
void readMultiDim(const std::string &s, std::vector<U> &buf, std::vector<size_t> &dim);
template <typename U>
void writeMultiDim(const std::string &s, const std::vector<size_t> & Dimensions, const U * pDataRowMajor, size_t NumElements);
On readback, the Eigen tensor rank must match the data being read, but the tensor
dimensions will be resized if necessary. Resizing is not possible for Eigen::TensorMap<T>
because these tensors use a buffer provided at construction, and this buffer cannot be changed.
Deserialisation failures cause Grid to assert.
HDF5 Optimisations -- added June 2021
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Grid serialisation is intended to be light, deterministic and provide a layer of abstraction over
multiple file formats. HDF5 excels at handling multi-dimensional data, and the Grid HDF5Reader/HDF5Writer exploits this.
When serialising nested ``std::vector<T>``, where ``T`` is an arithmetic or complex type,
the Hdf5Writer writes the data as an Hdf5 DataSet object.
However, nested ``std::vector<std::vector<...T>>`` might be "ragged", i.e. not necessarily regular. E.g. a 3d nested
``std::vector`` might contain 2 rows, the first being a 2x2 block and the second row being a 1 x 2 block.
A bug existed whereby this was not checked on write, so nested, ragged vectors
were written as a regular dataset, with a buffer under/overrun and jumbled contents.
Clearly this was not used in production, as the bug went undetected until now. Fixing this bug
is an opportunity to further optimise the HDF5 file format.
The goals of this change are to:
* Make changes to the Hdf5 file format only -- i.e. do not impact other file formats
* Implement file format changes in such a way that they are transparent to the Grid reader
* Correct the bug for ragged vectors of numeric / complex types
* Extend the support of nested std::vector<T> to arbitrarily nested Grid tensors
The trait class ``element`` has been redefined to ``is_flattenable``, which is a trait class for
potentially "flattenable" objects. These are (possibly nested) ``std::vector<T>`` where ``T`` is
an arithmetic, complex or Grid tensor type. Flattenable objects are tested on write
(with the function ``isRegularShape``) to see whether they actually are regular.
Flattenable, regular objects are written to a multidimensional HDF5 DataSet.
Otherwise, an Hdf5 sub group is created with the object "name", and each element of the outer dimension is
recursively written to as object "name_n", where n is a 0-indexed number.
On readback (by Grid)), the presence of a subgroup containing the attribute ``Grid_vector_size`` triggers a
"ragged read", otherwise a read from a DataSet is attempted.
Data parallel field IO
-----------------------

View File

@ -0,0 +1,396 @@
#include <Grid/Grid.h>
using namespace Grid;
/*
/////////////////////////////////////////////////////////////////////////////////////////////
// Grid/algorithms/SparseMatrix.h: Interface defining what I expect of a general sparse matrix, such as a Fermion action
/////////////////////////////////////////////////////////////////////////////////////////////
template<class Field> class SparseMatrixBase {
public:
virtual GridBase *Grid(void) =0;
virtual void M (const Field &in, Field &out)=0;
virtual void Mdag (const Field &in, Field &out)=0;
virtual void MdagM(const Field &in, Field &out) {
Field tmp (in.Grid());
M(in,tmp);
Mdag(tmp,out);
}
virtual void Mdiag (const Field &in, Field &out)=0;
virtual void Mdir (const Field &in, Field &out,int dir, int disp)=0;
virtual void MdirAll (const Field &in, std::vector<Field> &out)=0;
};
*/
const std::vector<int> directions ({Xdir,Ydir,Zdir,Xdir,Ydir,Zdir});
const std::vector<int> displacements({1,1,1,-1,-1,-1});
template<class Field> class FreeLaplacianCshift : public SparseMatrixBase<Field>
{
public:
GridBase *grid;
FreeLaplacianCshift(GridBase *_grid)
{
grid=_grid;
};
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out = Zero();
for(int mu=0;mu<Nd-1;mu++) {
out = out + Cshift(in,mu,1) + Cshift(in,mu,-1) - 2.0 * in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
GridBase *grid;
GaugeField U;
CovariantLaplacianCshift(GaugeField &_U) :
grid(_U.Grid()),
U(_U) { };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out=Zero();
for(int mu=0;mu<Nd-1;mu++) {
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
out = out + Gimpl::CovShiftForward(Umu,mu,in);
out = out + Gimpl::CovShiftBackward(Umu,mu,in);
out = out - 2.0*in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
#define LEG_LOAD(Dir) \
SE = st.GetEntry(ptype, Dir, ss); \
if (SE->_is_local ) { \
int perm= SE->_permute; \
chi = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
} else { \
chi = coalescedRead(buf[SE->_offset],lane); \
} \
acceleratorSynchronise();
template<class Field> class FreeLaplacianStencil : public SparseMatrixBase<Field>
{
public:
typedef typename Field::vector_object siteObject;
typedef CartesianStencil<siteObject, siteObject, int> StencilImpl;
GridBase *grid;
StencilImpl Stencil;
SimpleCompressor<siteObject> Compressor;
FreeLaplacianStencil(GridBase *_grid)
: Stencil (_grid,6,Even,directions,displacements,0), grid(_grid)
{ };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &_in, Field &_out)
{
///////////////////////////////////////////////
// Halo exchange for this geometry of stencil
///////////////////////////////////////////////
Stencil.HaloExchange(_in, Compressor);
///////////////////////////////////
// Arithmetic expressions
///////////////////////////////////
// Views; device friendly/accessible pointers
auto st = Stencil.View(AcceleratorRead);
auto buf = st.CommBuf();
autoView( in , _in , AcceleratorRead);
autoView( out , _out , AcceleratorWrite);
typedef typename Field::vector_object vobj;
typedef decltype(coalescedRead(in[0])) calcObj;
const int Nsimd = vobj::Nsimd();
const uint64_t NN = grid->oSites();
accelerator_for( ss, NN, Nsimd, {
StencilEntry *SE;
const int lane=acceleratorSIMTlane(Nsimd);
calcObj chi;
calcObj res;
int ptype;
res = coalescedRead(in[ss])*(-6.0);
LEG_LOAD(0); res = res + chi;
LEG_LOAD(1); res = res + chi;
LEG_LOAD(2); res = res + chi;
LEG_LOAD(3); res = res + chi;
LEG_LOAD(4); res = res + chi;
LEG_LOAD(5); res = res + chi;
coalescedWrite(out[ss], res,lane);
});
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
template<class Gimpl,class Field> class CovariantLaplacianStencil : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
typedef typename Field::vector_object siteObject;
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nc> >, Nds>;
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
typedef CartesianStencil<siteObject, siteObject, int> StencilImpl;
GridBase *grid;
StencilImpl Stencil;
SimpleCompressor<siteObject> Compressor;
DoubledGaugeField Uds;
CovariantLaplacianStencil(GaugeField &Umu)
:
grid(Umu.Grid()),
Stencil (grid,6,Even,directions,displacements,0),
Uds(grid)
{
for (int mu = 0; mu < Nd; mu++) {
auto U = PeekIndex<LorentzIndex>(Umu, mu);
PokeIndex<LorentzIndex>(Uds, U, mu );
U = adj(Cshift(U, mu, -1));
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
}
};
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &_in, Field &_out)
{
///////////////////////////////////////////////
// Halo exchange for this geometry of stencil
///////////////////////////////////////////////
Stencil.HaloExchange(_in, Compressor);
///////////////////////////////////
// Arithmetic expressions
///////////////////////////////////
auto st = Stencil.View(AcceleratorRead);
auto buf = st.CommBuf();
autoView( in , _in , AcceleratorRead);
autoView( out , _out , AcceleratorWrite);
autoView( U , Uds , AcceleratorRead);
typedef typename Field::vector_object vobj;
typedef decltype(coalescedRead(in[0])) calcObj;
typedef decltype(coalescedRead(U[0](0))) calcLink;
const int Nsimd = vobj::Nsimd();
const uint64_t NN = grid->oSites();
accelerator_for( ss, NN, Nsimd, {
StencilEntry *SE;
const int lane=acceleratorSIMTlane(Nsimd);
calcObj chi;
calcObj res;
calcObj Uchi;
calcLink UU;
int ptype;
res = coalescedRead(in[ss])*(-6.0);
#define LEG_LOAD_MULT(leg,polarisation) \
UU = coalescedRead(U[ss](polarisation)); \
LEG_LOAD(leg); \
mult(&Uchi(), &UU, &chi()); \
res = res + Uchi;
LEG_LOAD_MULT(0,Xp);
LEG_LOAD_MULT(1,Yp);
LEG_LOAD_MULT(2,Zp);
LEG_LOAD_MULT(3,Xm);
LEG_LOAD_MULT(4,Ym);
LEG_LOAD_MULT(5,Zm);
coalescedWrite(out[ss], res,lane);
});
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
#undef LEG_LOAD_MULT
#undef LEG_LOAD
int main(int argc, char ** argv)
{
Grid_init(&argc, &argv);
typedef LatticeColourVector Field;
auto latt_size = GridDefaultLatt();
auto simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
auto mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG RNG(&Grid); RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
FreeLaplacianCshift<Field> FLcs(&Grid);
FreeLaplacianStencil<Field> FLst(&Grid);
LatticeGaugeField U(&Grid);
SU<Nc>::ColdConfiguration(RNG,U);
std::cout << " Gauge field has norm " <<norm2(U)<<std::endl;
CovariantLaplacianCshift <PeriodicGimplR,Field> CLcs(U);
CovariantLaplacianStencil<PeriodicGimplR,Field> CLst(U);
Field in(&Grid); gaussian(RNG,in);
Field out_FLcs(&Grid);
Field out_FLst(&Grid);
Field out_CLcs(&Grid);
Field out_CLst(&Grid);
Field diff(&Grid);
////////////////////////////////////////////////////////
// First test: in free field these should all agree
////////////////////////////////////////////////////////
FLcs.M(in,out_FLcs);
FLst.M(in,out_FLst);
CLcs.M(in,out_CLcs);
CLst.M(in,out_CLst);
std:: cout << "******************************************************************" <<std::endl;
std:: cout << " Test A: consistency of four different Laplacian implementations " <<std::endl;
std:: cout << "******************************************************************" <<std::endl;
std:: cout << " Input test vector " <<norm2(in)<<std::endl;
std:: cout << "--------------------------------------------------------" <<std::endl;
std:: cout << " Free cshift output vector " <<norm2(out_FLcs)<<std::endl;
std:: cout << " Free stencil output vector " <<norm2(out_FLst)<<std::endl;
std:: cout << " Cov cshift output vector " <<norm2(out_CLcs)<<std::endl;
std:: cout << " Cov stencil output vector " <<norm2(out_CLst)<<std::endl;
std:: cout << "--------------------------------------------------------" <<std::endl;
diff = out_FLcs - out_FLst;
std:: cout << " Difference between free Cshift Laplacian and free Stencil Laplacian = " <<norm2(diff)<<std::endl;
diff = out_FLcs - out_CLcs;
std:: cout << " Difference between free Cshift Laplacian and covariant Cshift Laplacian = " <<norm2(diff)<<std::endl;
diff = out_FLcs - out_CLst;
std:: cout << " Difference between free Cshift Laplacian and covariant Stencil Laplacian = " <<norm2(diff)<<std::endl;
std:: cout << "--------------------------------------------------------" <<std::endl;
std:: cout << "******************************************************************" <<std::endl;
std:: cout << " Test B: gauge covariance " <<std::endl;
std:: cout << "******************************************************************" <<std::endl;
LatticeGaugeField U_GT(&Grid); // Gauge transformed field
LatticeColourMatrix g(&Grid); // local Gauge xform matrix
U_GT = U;
// Make a random xform to teh gauge field
SU<Nc>::RandomGaugeTransform(RNG,U_GT,g); // Unit gauge
Field in_GT(&Grid);
Field out_GT(&Grid);
Field out_CLcs_GT(&Grid);
Field out_CLst_GT(&Grid);
CovariantLaplacianCshift <PeriodicGimplR,Field> CLcs_GT(U_GT);
CovariantLaplacianStencil<PeriodicGimplR,Field> CLst_GT(U_GT);
in_GT = g*in;
out_GT = g*out_FLcs;
// Check M^GT_xy in_GT = g(x) M_xy g^dag(y) g(y) in = g(x) out(x)
CLcs_GT.M(in_GT,out_CLcs_GT);
CLst_GT.M(in_GT,out_CLst_GT);
diff = out_CLcs_GT - out_GT;
std:: cout << " Difference between Gauge xformed result and covariant Cshift Laplacian in xformed gauge = " <<norm2(diff)<<std::endl;
diff = out_CLst_GT - out_GT;
std:: cout << " Difference between Gauge xformed result and covariant Stencil Laplacian in xformed gauge = " <<norm2(diff)<<std::endl;
std:: cout << "--------------------------------------------------------" <<std::endl;
std:: cout << "******************************************************************" <<std::endl;
std:: cout << " Test C: compare in free Field to \"Feynman rule\" " <<std::endl;
std:: cout << "******************************************************************" <<std::endl;
std::vector<int> dim_mask({1,1,1,0}); // 3d FFT
FFT theFFT(&Grid);
Field out(&Grid);
Field F_out(&Grid);
Field F_in(&Grid);
// FFT the random input vector
theFFT.FFT_dim_mask(F_in,in,dim_mask,FFT::forward);
// Convolution theorem: multiply by Fourier representation of (discrete) Laplacian to apply diff op
LatticeComplexD lap(&Grid); lap = Zero();
LatticeComplexD kmu(&Grid);
ComplexD ci(0.0,1.0);
for(int mu=0;mu<3;mu++) {
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(kmu,mu);
kmu = TwoPiL * kmu;
// (e^ik_mu + e^-ik_mu - 2) = 2( cos kmu - 1) ~ 2 (1 - k_mu^2/2 -1 ) = - k_mu^2 + O(k^4)
lap = lap + 2.0*cos(kmu) - 2.0;
}
F_out = lap * F_in;
// Inverse FFT the result
theFFT.FFT_dim_mask(out,F_out,dim_mask,FFT::backward);
std::cout<<"Fourier xformed (in) "<<norm2(F_in)<<std::endl;
std::cout<<"Fourier xformed Laplacian x (in) "<<norm2(F_out)<<std::endl;
std::cout<<"Momentum space Laplacian application "<< norm2(out)<<std::endl;
std::cout<<"Stencil Laplacian application "<< norm2(out_CLcs)<<std::endl;
diff = out_CLcs - out;
std::cout<<"diff "<< norm2(diff)<<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,127 @@
#include <Grid/Grid.h>
using namespace Grid;
// Function used for Chebyshev smearing
//
Real MomentumSmearing(Real p2)
{
return (1 - 4.0*p2) * exp(-p2/4);
}
Real DistillationSmearing(Real p2)
{
if ( p2 > 0.5 ) return 0.0;
else return 1.0;
}
// Flip sign to make prop to p^2, not -p^2 relative to last example
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
GridBase *grid;
GaugeField U;
CovariantLaplacianCshift(GaugeField &_U) :
grid(_U.Grid()),
U(_U) { };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out=Zero();
for(int mu=0;mu<Nd-1;mu++) {
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
out = out - Gimpl::CovShiftForward(Umu,mu,in);
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
out = out + 2.0*in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
int main(int argc, char ** argv)
{
Grid_init(&argc, &argv);
typedef LatticeColourVector Field;
auto latt_size = GridDefaultLatt();
auto simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
auto mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG RNG(&Grid); RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField U(&Grid);
SU<Nc>::ColdConfiguration(RNG,U);
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
Laplacian_t Laplacian(U);
ColourVector ColourKronecker;
ColourKronecker = Zero();
ColourKronecker()()(0) = 1.0;
Coordinate site({latt_size[0]/2,
latt_size[1]/2,
latt_size[2]/2,
0});
Field kronecker(&Grid);
kronecker = Zero();
pokeSite(ColourKronecker,kronecker,site);
Field psi(&Grid), chi(&Grid);
//////////////////////////////////////
// Classic Wuppertal smearing
//////////////////////////////////////
Integer Iterations = 80;
Real width = 2.0;
Real coeff = (width*width) / Real(4*Iterations);
chi=kronecker;
// chi = (1-p^2/2N)^N kronecker
for(int n = 0; n < Iterations; ++n) {
Laplacian.M(chi,psi);
chi = chi - coeff*psi;
}
std::cout << " Wuppertal smeared operator is chi = \n" << chi <<std::endl;
/////////////////////////////////////
// Chebyshev smearing
/////////////////////////////////////
RealD lo = 0.0;
RealD hi = 12.0; // Analytic free field bound
HermitianLinearOperator<Laplacian_t,Field> HermOp(Laplacian);
std::cout << " Checking spectral range of our POSITIVE definite operator \n";
PowerMethod<Field> PM;
PM(HermOp,kronecker);
// Chebyshev<Field> ChebySmear(lo,hi,20,DistillationSmearing);
Chebyshev<Field> ChebySmear(lo,hi,20,MomentumSmearing);
{
std::ofstream of("chebysmear");
ChebySmear.csv(of);
}
ChebySmear(HermOp,kronecker,chi);
std::cout << " Chebyshev smeared operator is chi = \n" << chi <<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,129 @@
#include <Grid/Grid.h>
using namespace Grid;
template<class Field>
void SimpleConjugateGradient(LinearOperatorBase<Field> &HPDop,const Field &b, Field &x)
{
RealD cp, c, alpha, d, beta, ssq, qq;
RealD Tolerance=1.0e-10;
int MaxIterations=10000;
Field p(b), mmp(b), r(b);
HPDop.HermOpAndNorm(x, mmp, d, beta);
r = b - mmp;
p = r;
cp = alpha = norm2(p);
ssq = norm2(b);
RealD rsq = Tolerance * Tolerance * ssq;
for (int k = 1; k <= MaxIterations; k++) {
c = cp;
HPDop.HermOp(p, mmp);
d = real(innerProduct(p,mmp));
alpha = c / d;
r = r - alpha *mmp;
cp = norm2(r);
beta = cp / c;
x = x + alpha* p ;
p = r + beta* p ;
std::cout << "iteration "<<k<<" cp " <<std::sqrt(cp/ssq) << std::endl;
if (cp <= rsq) {
return;
}
}
assert(0);
}
// Flip sign to make prop to p^2, not -p^2 relative to last example
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
GridBase *grid;
GaugeField U;
RealD m2=1.0e-2;
CovariantLaplacianCshift(GaugeField &_U) :
grid(_U.Grid()),
U(_U) { };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out=Zero();
for(int mu=0;mu<Nd-1;mu++) {
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
out = out - Gimpl::CovShiftForward(Umu,mu,in);
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
out = out + 2.0*in + m2*in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
int main(int argc, char ** argv)
{
Grid_init(&argc, &argv);
typedef LatticeColourVector Field;
auto latt_size = GridDefaultLatt();
auto simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
auto mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
GridParallelRNG RNG(&Grid); RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
LatticeGaugeField U(&Grid);
SU<Nc>::ColdConfiguration(RNG,U);
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
Laplacian_t Laplacian(U);
ColourVector ColourKronecker;
ColourKronecker = Zero();
ColourKronecker()()(0) = 1.0;
Coordinate site({0,0,0,0}); // Point source at origin
Field kronecker(&Grid);
kronecker = Zero();
pokeSite(ColourKronecker,kronecker,site);
Field psi(&Grid); psi=Zero();
HermitianLinearOperator<Laplacian_t,Field> HermOp(Laplacian);
SimpleConjugateGradient(HermOp, kronecker,psi);
Field r(&Grid);
Laplacian.M(psi,r);
r=kronecker-r;
std::cout << "True residual "<< norm2(r) <<std::endl;
// Optionally print the result vector
// std::cout << psi<<std::endl;
Grid_finalize();
}

View File

@ -0,0 +1,314 @@
/*
* Warning: This code illustrative only: not well tested, and not meant for production use
* without regression / tests being applied
*/
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
GridBase *grid;
GaugeField U;
CovariantLaplacianCshift(GaugeField &_U) :
grid(_U.Grid()),
U(_U) { };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out=Zero();
for(int mu=0;mu<Nd-1;mu++) {
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
out = out - Gimpl::CovShiftForward(Umu,mu,in);
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
out = out + 2.0*in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
void MakePhase(Coordinate mom,LatticeComplex &phase)
{
GridBase *grid = phase.Grid();
auto latt_size = grid->GlobalDimensions();
ComplexD ci(0.0,1.0);
phase=Zero();
LatticeComplex coor(phase.Grid());
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
phase = phase + (TwoPiL * mom[mu]) * coor;
}
phase = exp(phase*ci);
}
void PointSource(Coordinate &coor,LatticePropagator &source)
{
// Coordinate coor({0,0,0,0});
source=Zero();
SpinColourMatrix kronecker; kronecker=1.0;
pokeSite(kronecker,source,coor);
}
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
{
GridBase *grid = source.Grid();
LatticeComplex noise(grid);
LatticeComplex zz(grid); zz=Zero();
LatticeInteger t(grid);
RealD nrm=1.0/sqrt(2);
bernoulli(RNG, noise); // 0,1 50:50
noise = (2.*noise - Complex(1,1))*nrm;
LatticeCoordinate(t,Tdir);
noise = where(t==Integer(tslice), noise, zz);
source = 1.0;
source = source*noise;
std::cout << " Z2 wall " << norm2(source) << std::endl;
}
template<class Field>
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
{
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
Laplacian_t Laplacian(U);
Integer Iterations = 40;
Real width = 2.0;
Real coeff = (width*width) / Real(4*Iterations);
Field tmp(U.Grid());
smeared=unsmeared;
// chi = (1-p^2/2N)^N kronecker
for(int n = 0; n < Iterations; ++n) {
Laplacian.M(smeared,tmp);
smeared = smeared - coeff*tmp;
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
}
}
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
{
LatticePropagator tmp(source.Grid());
PointSource(site,source);
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
tmp = source;
GaussianSmear(U,tmp,source);
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
}
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
{
Z2WallSource(RNG,tslice,source);
auto tmp = source;
GaussianSmear(U,tmp,source);
}
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
{
assert(mom.size()==Nd);
assert(mom[Tdir] == 0);
GridBase * grid = spectator.Grid();
LatticeInteger ts(grid);
LatticeCoordinate(ts,Tdir);
source = Zero();
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
LatticeComplex phase(grid);
MakePhase(mom,phase);
source = source *phase;
}
template<class Action>
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
{
GridBase *UGrid = D.GaugeGrid();
GridBase *FGrid = D.FermionGrid();
LatticeFermion src4 (UGrid);
LatticeFermion src5 (FGrid);
LatticeFermion result5(FGrid);
LatticeFermion result4(UGrid);
ConjugateGradient<LatticeFermion> CG(1.0e-8,100000);
SchurRedBlackDiagMooeeSolve<LatticeFermion> schur(CG);
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
PropToFerm<Action>(src4,source,s,c);
D.ImportPhysicalFermionSource(src4,src5);
result5=Zero();
schur(D,src5,result5,ZG);
std::cout<<GridLogMessage
<<"spin "<<s<<" color "<<c
<<" norm2(src5d) " <<norm2(src5)
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
D.ExportPhysicalFermionSolution(result5,result4);
FermToProp<Action>(propagator,result4,s,c);
}
}
}
class MesonFile: Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
};
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::Gamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::GammaTGamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::Gamma5},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaTGamma5}
};
Gamma G5(Gamma::Algebra::Gamma5);
LatticeComplex meson_CF(q1.Grid());
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
meson_CF = trace(G5*adj(q1)*G5*Gsnk*q2*adj(Gsrc));
std::vector<TComplex> meson_T;
sliceSum(meson_CF,meson_T, Tdir);
int nt=meson_T.size();
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
corr[t] = TensorRemove(meson_T[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
int main (int argc, char ** argv)
{
const int Ls=8;
Grid_init(&argc,&argv);
// Double precision grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
//////////////////////////////////////////////////////////////////////
// You can manage seeds however you like.
// Recommend SeedUniqueString.
//////////////////////////////////////////////////////////////////////
std::vector<int> seeds4({1,2,3,4});
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid);
std::string config;
if( argc > 1 && argv[1][0] != '-' )
{
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
FieldMetaData header;
NerscIO::readConfiguration(Umu, header, argv[1]);
config=argv[1];
}
else
{
std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl;
SU<Nc>::ColdConfiguration(Umu);
// SU<Nc>::HotConfiguration(RNG4,Umu);
config="HotConfig";
}
std::vector<RealD> masses({ 0.03,0.04,0.45} ); // u/d, s, c ??
int nmass = masses.size();
std::vector<MobiusFermionR *> FermActs;
std::cout<<GridLogMessage <<"======================"<<std::endl;
std::cout<<GridLogMessage <<"MobiusFermion action as Scaled Shamir kernel"<<std::endl;
std::cout<<GridLogMessage <<"======================"<<std::endl;
for(auto mass: masses) {
RealD M5=1.0;
RealD b=1.5;// Scale factor b+c=2, b-c=1
RealD c=0.5;
FermActs.push_back(new MobiusFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c));
}
LatticePropagator point_source(UGrid);
LatticePropagator wall_source(UGrid);
LatticePropagator gaussian_source(UGrid);
Coordinate Origin({0,0,0,0});
PointSource (Origin,point_source);
Z2WallSource (RNG4,0,wall_source);
GaussianSource(Origin,Umu,gaussian_source);
std::vector<LatticePropagator> PointProps(nmass,UGrid);
std::vector<LatticePropagator> GaussProps(nmass,UGrid);
std::vector<LatticePropagator> Z2Props (nmass,UGrid);
for(int m=0;m<nmass;m++) {
Solve(*FermActs[m],point_source ,PointProps[m]);
Solve(*FermActs[m],gaussian_source,GaussProps[m]);
Solve(*FermActs[m],wall_source ,Z2Props[m]);
}
LatticeComplex phase(UGrid);
Coordinate mom({0,0,0,0});
MakePhase(mom,phase);
for(int m1=0 ;m1<nmass;m1++) {
for(int m2=m1;m2<nmass;m2++) {
std::stringstream ssp,ssg,ssz;
ssp<<config<< "_m" << m1 << "_m"<< m2 << "_point_meson.xml";
ssg<<config<< "_m" << m1 << "_m"<< m2 << "_smeared_meson.xml";
ssz<<config<< "_m" << m1 << "_m"<< m2 << "_wall_meson.xml";
MesonTrace(ssp.str(),PointProps[m1],PointProps[m2],phase);
MesonTrace(ssg.str(),GaussProps[m1],GaussProps[m2],phase);
MesonTrace(ssz.str(),Z2Props[m1],Z2Props[m2],phase);
}}
Grid_finalize();
}

6
examples/Makefile.am Normal file
View File

@ -0,0 +1,6 @@
SUBDIRS = .
include Make.inc

View File

@ -82,3 +82,18 @@ for f in $TESTS; do
echo >> Make.inc
done
cd ..
# examples Make.inc
cd $home/examples/
echo> Make.inc
TESTS=`ls *.cc`
TESTLIST=`echo ${TESTS} | sed s/.cc//g `
echo bin_PROGRAMS = ${TESTLIST} > Make.inc
echo >> Make.inc
for f in $TESTS; do
BNAME=`basename $f .cc`
echo ${BNAME}_SOURCES=$f >> Make.inc
echo ${BNAME}_LDADD='$(top_builddir)/Grid/libGrid.a'>> Make.inc
echo >> Make.inc
done
cd ..

View File

@ -48,9 +48,7 @@ public:
std::vector<double>, array,
std::vector<std::vector<double> >, twodimarray,
std::vector<std::vector<std::vector<std::complex<double>> > >, cmplx3darray,
SpinColourMatrix, scm,
std::vector<std::vector<std::vector<int> > >, ragged,
std::vector<std::vector<SpinColourMatrix> >, vscm
SpinColourMatrix, scm
);
myclass() {}
myclass(int i)
@ -58,10 +56,6 @@ public:
, twodimarray(3,std::vector<double>(5, 1.23456))
, cmplx3darray(3,std::vector<std::vector<std::complex<double>>>(5, std::vector<std::complex<double>>(7, std::complex<double>(1.2, 3.4))))
, ve(2, myenum::blue)
, ragged( {{{i+1},{i+2,i+3}}, // ragged
{{i+4,i+5,i+6,i+7},{i+8,i+9,i+10,i+11},{i+12,i+13,i+14,i+15}}, // block
{{i+16,i+17},{i+18,i+19,i+20}}} ) //ragged
, vscm(3, std::vector<SpinColourMatrix>(5))
{
e=myenum::red;
x=i;
@ -74,13 +68,6 @@ public:
scm()(0, 2)(1, 1) = 6.336;
scm()(2, 1)(2, 2) = 7.344;
scm()(1, 1)(2, 0) = 8.3534;
int Counter = i;
for( auto & v : vscm ) {
for( auto & j : v ) {
j = std::complex<double>(Counter, -Counter);
Counter++;
}
}
}
};

View File

@ -0,0 +1,73 @@
#Example script
DIR=/gpfs/alpine/phy157/proj-shared/phy157dwf/chulwoo/Grid/BL/build/tests/lanczos
BIN=${DIR}/Test_dwf_block_lanczos
VOL='--grid 16.16.16.32 '
GRID='--mpi 1.1.1.4 '
CONF='--gconf ckpoint_lat.IEEE64BIG.2000 '
OPT='--mass 0.01 --M5 1.8 --phase in.params --omega in.params --shm 4096'
#BL='--rbl 16.1024.128.1000.10 --split 1.1.4.4 --check_int 100 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51'
BL='--rbl 4.128.16.100.10 --split 1.1.1.4 --check_int 25 --resid 1.0e-5 --cheby_l 0.007 --cheby_u 7 --cheby_n 51'
ARGS=${CONF}" "${OPT}" "${BL}" "${VOL}" "${GRID}
export APP="${BIN} ${ARGS}"
echo APP=${APP}
#export JS="jsrun --nrs 32 -a4 -g4 -c42 -dpacked -b packed:7 --smpiargs="-gpu" "
export JS="jsrun --nrs 1 -a4 -g4 -c42 -dpacked -b packed:10 --smpiargs="-gpu" "
$JS $APP
#sample in.param
boundary_phase 0 1 0
boundary_phase 1 1 0
boundary_phase 2 1 0
boundary_phase 3 -1 0
omega 0 0.5 0
omega 1 0.5 0
omega 2 0.5 0
omega 3 0.5 0
omega 4 0.5 0
omega 5 0.5 0
omega 6 0.5 0
omega 7 0.5 0
omega 8 0.5 0
omega 9 0.5 0
omega 10 0.5 0
omega 11 0.5 0
#output
Grid : Message : 1.717474 s : Gauge Configuration ckpoint_lat.IEEE64BIG.2000
Grid : Message : 1.717478 s : boundary_phase[0] = (1,0)
Grid : Message : 1.717497 s : boundary_phase[1] = (1,0)
Grid : Message : 1.717500 s : boundary_phase[2] = (1,0)
Grid : Message : 1.717503 s : boundary_phase[3] = (-1,0)
Grid : Message : 1.717506 s : Ls 12
Grid : Message : 1.717507 s : mass 0.01
Grid : Message : 1.717510 s : M5 1.8
Grid : Message : 1.717512 s : mob_b 1.5
Grid : Message : 1.717514 s : omega[0] = (0.5,0)
Grid : Message : 1.717517 s : omega[1] = (0.5,0)
Grid : Message : 1.717520 s : omega[2] = (0.5,0)
Grid : Message : 1.717523 s : omega[3] = (0.5,0)
Grid : Message : 1.717526 s : omega[4] = (0.5,0)
Grid : Message : 1.717529 s : omega[5] = (0.5,0)
Grid : Message : 1.717532 s : omega[6] = (0.5,0)
Grid : Message : 1.717535 s : omega[7] = (0.5,0)
Grid : Message : 1.717538 s : omega[8] = (0.5,0)
Grid : Message : 1.717541 s : omega[9] = (0.5,0)
Grid : Message : 1.717544 s : omega[10] = (0.5,0)
Grid : Message : 1.717547 s : omega[11] = (0.5,0)
Grid : Message : 1.717550 s : Nu 4
Grid : Message : 1.717551 s : Nk 128
Grid : Message : 1.717552 s : Np 16
Grid : Message : 1.717553 s : Nm 288
Grid : Message : 1.717554 s : Nstop 100
Grid : Message : 1.717555 s : Ntest 25
Grid : Message : 1.717557 s : MaxIter 10
Grid : Message : 1.717558 s : resid 1e-05
Grid : Message : 1.717560 s : Cheby Poly 0.007,7,51

View File

@ -0,0 +1,403 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_block_lanczos.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/util/Init.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
using namespace std;
using namespace Grid;
//using namespace Grid::QCD;
//typedef typename GparityDomainWallFermionR::FermionField FermionField;
typedef typename ZMobiusFermionF::FermionField FermionField;
RealD AllZero(RealD x){ return 0.;}
class CmdJobParams
{
public:
std::string gaugefile;
int Ls;
double mass;
double M5;
double mob_b;
std::vector<ComplexD> omega;
std::vector<Complex> boundary_phase;
std::vector<int> mpi_split;
LanczosType Impl;
int Nu;
int Nk;
int Np;
int Nm;
int Nstop;
int Ntest;
int MaxIter;
double resid;
double low;
double high;
int order;
CmdJobParams()
: gaugefile("Hot"),
Ls(8), mass(0.01), M5(1.8), mob_b(1.5),
Impl(LanczosType::irbl),mpi_split(4,1),
Nu(4), Nk(200), Np(200), Nstop(100), Ntest(1), MaxIter(10), resid(1.0e-8),
low(0.2), high(5.5), order(11)
{Nm=Nk+Np;};
void Parse(char **argv, int argc);
};
void CmdJobParams::Parse(char **argv,int argc)
{
std::string arg;
std::vector<int> vi;
double re,im;
int expect, idx;
std::string vstr;
std::ifstream pfile;
if( GridCmdOptionExists(argv,argv+argc,"--gconf") ){
gaugefile = GridCmdOptionPayload(argv,argv+argc,"--gconf");
}
if( GridCmdOptionExists(argv,argv+argc,"--phase") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--phase");
pfile.open(arg);
assert(pfile);
expect = 0;
while( pfile >> vstr ) {
if ( vstr.compare("boundary_phase") == 0 ) {
pfile >> vstr;
GridCmdOptionInt(vstr,idx);
assert(expect==idx);
pfile >> vstr;
GridCmdOptionFloat(vstr,re);
pfile >> vstr;
GridCmdOptionFloat(vstr,im);
boundary_phase.push_back({re,im});
expect++;
}
}
pfile.close();
} else {
for (int i=0; i<4; ++i) boundary_phase.push_back({1.,0.});
}
if( GridCmdOptionExists(argv,argv+argc,"--omega") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--omega");
pfile.open(arg);
assert(pfile);
Ls = 0;
while( pfile >> vstr ) {
if ( vstr.compare("omega") == 0 ) {
pfile >> vstr;
GridCmdOptionInt(vstr,idx);
assert(Ls==idx);
pfile >> vstr;
GridCmdOptionFloat(vstr,re);
pfile >> vstr;
GridCmdOptionFloat(vstr,im);
omega.push_back({re,im});
Ls++;
}
}
pfile.close();
} else {
if( GridCmdOptionExists(argv,argv+argc,"--Ls") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--Ls");
GridCmdOptionInt(arg,Ls);
}
}
if( GridCmdOptionExists(argv,argv+argc,"--mass") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--mass");
GridCmdOptionFloat(arg,mass);
}
if( GridCmdOptionExists(argv,argv+argc,"--M5") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--M5");
GridCmdOptionFloat(arg,M5);
}
if( GridCmdOptionExists(argv,argv+argc,"--mob_b") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--mob_b");
GridCmdOptionFloat(arg,mob_b);
}
if( GridCmdOptionExists(argv,argv+argc,"--irbl") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--irbl");
GridCmdOptionIntVector(arg,vi);
Nu = vi[0];
Nk = vi[1];
Np = vi[2];
Nstop = vi[3];
MaxIter = vi[4];
// ypj[fixme] mode overriding message is needed.
Impl = LanczosType::irbl;
Nm = Nk+Np;
}
// block Lanczos with explicit extension of its dimensions
if( GridCmdOptionExists(argv,argv+argc,"--rbl") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--rbl");
GridCmdOptionIntVector(arg,vi);
Nu = vi[0];
Nk = vi[1];
Np = vi[2]; // vector space is enlarged by adding Np vectors
Nstop = vi[3];
MaxIter = vi[4];
// ypj[fixme] mode overriding message is needed.
Impl = LanczosType::rbl;
Nm = Nk+Np*MaxIter;
}
#if 1
// block Lanczos with explicit extension of its dimensions
if( GridCmdOptionExists(argv,argv+argc,"--split") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--split");
GridCmdOptionIntVector(arg,vi);
for(int i=0;i<mpi_split.size();i++)
mpi_split[i] = vi[i];
}
#endif
if( GridCmdOptionExists(argv,argv+argc,"--check_int") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--check_int");
GridCmdOptionInt(arg,Ntest);
}
if( GridCmdOptionExists(argv,argv+argc,"--resid") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--resid");
GridCmdOptionFloat(arg,resid);
}
if( GridCmdOptionExists(argv,argv+argc,"--cheby_l") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_l");
GridCmdOptionFloat(arg,low);
}
if( GridCmdOptionExists(argv,argv+argc,"--cheby_u") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_u");
GridCmdOptionFloat(arg,high);
}
if( GridCmdOptionExists(argv,argv+argc,"--cheby_n") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--cheby_n");
GridCmdOptionInt(arg,order);
}
if ( CartesianCommunicator::RankWorld() == 0 ) {
std::streamsize ss = std::cout.precision();
std::cout << GridLogMessage <<" Gauge Configuration "<< gaugefile << '\n';
std::cout.precision(15);
for ( int i=0; i<4; ++i ) std::cout << GridLogMessage <<" boundary_phase["<< i << "] = " << boundary_phase[i] << '\n';
std::cout.precision(ss);
std::cout << GridLogMessage <<" Ls "<< Ls << '\n';
std::cout << GridLogMessage <<" mass "<< mass << '\n';
std::cout << GridLogMessage <<" M5 "<< M5 << '\n';
std::cout << GridLogMessage <<" mob_b "<< mob_b << '\n';
std::cout.precision(15);
for ( int i=0; i<Ls; ++i ) std::cout << GridLogMessage <<" omega["<< i << "] = " << omega[i] << '\n';
std::cout.precision(ss);
std::cout << GridLogMessage <<" Nu "<< Nu << '\n';
std::cout << GridLogMessage <<" Nk "<< Nk << '\n';
std::cout << GridLogMessage <<" Np "<< Np << '\n';
std::cout << GridLogMessage <<" Nm "<< Nm << '\n';
std::cout << GridLogMessage <<" Nstop "<< Nstop << '\n';
std::cout << GridLogMessage <<" Ntest "<< Ntest << '\n';
std::cout << GridLogMessage <<" MaxIter "<< MaxIter << '\n';
std::cout << GridLogMessage <<" resid "<< resid << '\n';
std::cout << GridLogMessage <<" Cheby Poly "<< low << "," << high << "," << order << std::endl;
}
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
CmdJobParams JP;
JP.Parse(argv,argc);
GridCartesian * UGridD = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
// ypj [note] why seed RNG5 again? bug? In this case, run with a default seed().
GridParallelRNG RNG5rb(FrbGrid); RNG5rb.SeedFixedIntegers(seeds5);
LatticeGaugeField UmuD(UGridD);
LatticeGaugeFieldF Umu(UGrid);
std::vector<LatticeColourMatrixF> U(4,UGrid);
if ( JP.gaugefile.compare("Hot") == 0 ) {
SU3::HotConfiguration(RNG4, UmuD);
} else {
FieldMetaData header;
NerscIO::readConfiguration(UmuD,header,JP.gaugefile);
// ypj [fixme] additional checks for the loaded configuration?
}
precisionChange(Umu,UmuD);
for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
}
RealD mass = JP.mass;
RealD M5 = JP.M5;
// ypj [fixme] flexible support for a various Fermions
// RealD mob_b = JP.mob_b; // Gparity
// std::vector<ComplexD> omega; // ZMobius
// GparityMobiusFermionD ::ImplParams params;
// std::vector<int> twists({1,1,1,0});
// params.twists = twists;
// GparityMobiusFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
// SchurDiagTwoOperator<GparityMobiusFermionR,FermionField> HermOp(Ddwf);
// int mrhs = JP.Nu;
int Ndir=4;
auto mpi_layout = GridDefaultMpi();
std::vector<int> mpi_split (Ndir,1);
#if 0
int tmp=mrhs, dir=0;
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_split[dir]<<std::endl;
while ( tmp> 1) {
if ((mpi_split[dir]*2) <= mpi_layout[dir]){
mpi_split[dir] *=2;
tmp = tmp/2;
}
std::cout << GridLogMessage << "dir= "<<dir <<"tmp= "<<tmp<<"mpi_split= "<<mpi_split[dir]<<"mpi_layout= "<<mpi_layout[dir]<<std::endl;
dir = (dir+1)%Ndir;
}
#endif
int mrhs=1;
for(int i =0;i<Ndir;i++){
mpi_split[i] = mpi_layout[i] / JP.mpi_split[i] ;
mrhs *= JP.mpi_split[i];
}
std::cout << GridLogMessage << "mpi_layout= " << mpi_layout << std::endl;
std::cout << GridLogMessage << "mpi_split= " << mpi_split << std::endl;
std::cout << GridLogMessage << "mrhs= " << mrhs << std::endl;
// assert(JP.Nu==tmp);
/////////////////////////////////////////////
// Split into 1^4 mpi communicators
/////////////////////////////////////////////
GridCartesian * SGrid = new GridCartesian(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplexF::Nsimd()),
mpi_split,
*UGrid);
GridCartesian * SFGrid = SpaceTimeGrid::makeFiveDimGrid(JP.Ls,SGrid);
GridRedBlackCartesian * SrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(SGrid);
GridRedBlackCartesian * SFrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(JP.Ls,SGrid);
LatticeGaugeFieldF s_Umu(SGrid);
Grid_split (Umu,s_Umu);
//WilsonFermionR::ImplParams params;
ZMobiusFermionF::ImplParams params;
params.overlapCommsCompute = true;
params.boundary_phases = JP.boundary_phase;
ZMobiusFermionF Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,JP.omega,1.,0.,params);
// SchurDiagTwoOperator<ZMobiusFermionR,FermionField> HermOp(Ddwf);
SchurDiagOneOperator<ZMobiusFermionF,FermionField> HermOp(Ddwf);
ZMobiusFermionF Dsplit(s_Umu,*SFGrid,*SFrbGrid,*SGrid,*SrbGrid,mass,M5,JP.omega,1.,0.,params);
// SchurDiagTwoOperator<ZMobiusFermionR,FermionField> SHermOp(Dsplit);
SchurDiagOneOperator<ZMobiusFermionF,FermionField> SHermOp(Dsplit);
//std::vector<double> Coeffs { 0.,-1.};
// ypj [note] this may not be supported by some compilers
std::vector<double> Coeffs({ 0.,-1.});
Polynomial<FermionField> PolyX(Coeffs);
//Chebyshev<FermionField> Cheb(0.2,5.5,11);
Chebyshev<FermionField> Cheb(JP.low,JP.high,JP.order);
// Cheb.csv(std::cout);
ImplicitlyRestartedBlockLanczos<FermionField> IRBL(HermOp, SHermOp,
FrbGrid,SFrbGrid,mrhs,
Cheb,
JP.Nstop, JP.Ntest,
JP.Nu, JP.Nk, JP.Nm,
JP.resid,
JP.MaxIter,
IRBLdiagonaliseWithEigen);
// IRBLdiagonaliseWithLAPACK);
IRBL.split_test=0;
std::vector<RealD> eval(JP.Nm);
std::vector<FermionField> src(JP.Nu,FrbGrid);
if (0)
{
// in case RNG is too slow
std::cout << GridLogMessage << "Using RNG5"<<std::endl;
FermionField src_tmp(FGrid);
for ( int i=0; i<JP.Nu; ++i ){
// gaussian(RNG5,src_tmp);
ComplexD rnd;
RealD re;
fillScalar(re,RNG5._gaussian[0],RNG5._generators[0]);
std::cout << i <<" / "<< JP.Nm <<" re "<< re << std::endl;
// printf("%d / %d re %e\n",i,FGrid->_processor,re);
src_tmp=re;
pickCheckerboard(Odd,src[i],src_tmp);
}
RNG5.Report();
} else {
std::cout << GridLogMessage << "Using RNG5rb"<<std::endl;
for ( int i=0; i<JP.Nu; ++i )
gaussian(RNG5rb,src[i]);
RNG5rb.Report();
}
std::vector<FermionField> evec(JP.Nm,FrbGrid);
for(int i=0;i<1;++i){
std::cout << GridLogMessage << i <<" / "<< JP.Nm <<" grid pointer "<< evec[i].Grid() << std::endl;
};
int Nconv;
IRBL.calc(eval,evec,src,Nconv,JP.Impl);
Grid_finalize();
}

27
tests/lanczos/dwf.batch Normal file
View File

@ -0,0 +1,27 @@
# run script on Cori
#!/bin/bash
#SBATCH -t 30
#SBATCH -N 16
#SBATCH -C knl
#SBATCH -A mp13
#SBATCH --ntasks-per-node=1
export OMP_PROC_BIND=true
export OMP_PLACES=threads
export OMP_NUM_THREADS=64
#BIN=benchmarks/Benchmark_ITT
#OPT='--cpu-bind=cores'
BIN=./Test_dwf_block_lanczos
rundir=.
CONF='--gconf '${rundir}'/ckpoint_lat.IEEE64BIG.5000'
VOL='--grid 16.16.16.32'
GRID='--mpi 1.2.2.4'
OPT='--mass 0.00054 --M5 1.8 --phase in.params --omega in.params --shm 4096'
BL='--rbl 4.32.32.30.7 --split 1.2.2.1 --check_int 8 --resid 1.0e-5 --cheby_l 0.0027 --cheby_u 7 --cheby_n 51'
#echo srun $OPT $BIN $gridoptions $jobparams
#srun $OPT $BIN $gridoptions $jobparams
echo srun $BIN $CONF $OPT $BL $VOL $GRID
srun $BIN $CONF $OPT $BL $VOL $GRID

17
tests/lanczos/in.params Normal file
View File

@ -0,0 +1,17 @@
boundary_phase 0 1 0
boundary_phase 1 1 0
boundary_phase 2 1 0
boundary_phase 3 -1 0
omega 0 0.375 0
omega 1 0.375 0
omega 2 0.375 0
omega 3 0.375 0
omega 4 0.375 0
omega 5 0.375 0
omega 6 0.375 0
omega 7 0.375 0
omega 8 0.375 0
omega 9 0.375 0
omega 10 0.375 0
omega 11 0.375 0