From 3cbe974eb48eb6ee97cb420b08475beeda5cc71b Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:55:21 +0100 Subject: [PATCH 01/16] Layout --- lib/tensors/Tensor_extract_merge.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/tensors/Tensor_extract_merge.h b/lib/tensors/Tensor_extract_merge.h index 41a431ad..a32d3785 100644 --- a/lib/tensors/Tensor_extract_merge.h +++ b/lib/tensors/Tensor_extract_merge.h @@ -250,8 +250,7 @@ void merge(vobj &vec,std::vector &extracted,int } } -template inline -void merge1(vobj &vec,std::vector &extracted,int offset) +template inline void merge1(vobj &vec,std::vector &extracted,int offset) { typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::vector_type vector_type ; @@ -269,8 +268,7 @@ void merge1(vobj &vec,std::vector &extracted,int }} } -template inline -void merge2(vobj &vec,std::vector &extracted,int offset) +template inline void merge2(vobj &vec,std::vector &extracted,int offset) { typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::vector_type vector_type ; From 8c043da5b7dd583c79e0b540277060e6311d27cc Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:56:05 +0100 Subject: [PATCH 02/16] SHMEM and comms allocator made different --- lib/AlignedAllocator.h | 130 ++++++++++++++++++++++++++++------------- 1 file changed, 88 insertions(+), 42 deletions(-) diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index 8f69971d..d0de1ec3 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -40,14 +40,6 @@ Author: Peter Boyle #include #endif -#ifdef GRID_COMMS_SHMEM -extern "C" { -#include -extern void * shmem_align(size_t, size_t); -extern void shmem_free(void *); -} -#endif - namespace Grid { //////////////////////////////////////////////////////////////////// @@ -65,28 +57,81 @@ public: typedef _Tp value_type; template struct rebind { typedef alignedAllocator<_Tp1> other; }; - alignedAllocator() throw() { } - alignedAllocator(const alignedAllocator&) throw() { } - template alignedAllocator(const alignedAllocator<_Tp1>&) throw() { } - ~alignedAllocator() throw() { } - pointer address(reference __x) const { return &__x; } - // const_pointer address(const_reference __x) const { return &__x; } - size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } pointer allocate(size_type __n, const void* _p= 0) { +#ifdef HAVE_MM_MALLOC_H + _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); +#else + _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); +#endif + + _Tp tmp; +#ifdef GRID_NUMA +#pragma omp parallel for schedule(static) + for(int i=0;i<__n;i++){ + ptr[i]=tmp; + } +#endif + return ptr; + } + + void deallocate(pointer __p, size_type) { +#ifdef HAVE_MM_MALLOC_H + _mm_free((void *)__p); +#else + free((void *)__p); +#endif + } + void construct(pointer __p, const _Tp& __val) { }; + void construct(pointer __p) { }; + void destroy(pointer __p) { }; +}; +template inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } +template inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } + +////////////////////////////////////////////////////////////////////////////////////////// +// MPI3 : comms must use shm region +// SHMEM: comms must use symmetric heap +////////////////////////////////////////////////////////////////////////////////////////// #ifdef GRID_COMMS_SHMEM - - _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); - - +extern "C" { +#include +extern void * shmem_align(size_t, size_t); +extern void shmem_free(void *); +} #define PARANOID_SYMMETRIC_HEAP +#endif + +template +class commAllocator { +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef _Tp* pointer; + typedef const _Tp* const_pointer; + typedef _Tp& reference; + typedef const _Tp& const_reference; + typedef _Tp value_type; + + template struct rebind { typedef commAllocator<_Tp1> other; }; + commAllocator() throw() { } + commAllocator(const commAllocator&) throw() { } + template commAllocator(const commAllocator<_Tp1>&) throw() { } + ~commAllocator() throw() { } + pointer address(reference __x) const { return &__x; } + size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } + +#ifdef GRID_COMMS_SHMEM + pointer allocate(size_type __n, const void* _p= 0) + { + _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); #ifdef PARANOID_SYMMETRIC_HEAP static void * bcast; static long psync[_SHMEM_REDUCE_SYNC_SIZE]; @@ -99,51 +144,52 @@ public: BACKTRACEFILE(); exit(0); } - assert( bcast == (void *) ptr); - #endif + return ptr; + } + void deallocate(pointer __p, size_type) { + shmem_free((void *)__p); + } +#elif defined(GRID_COMMS_MPI3) + pointer allocate(size_type __n, const void* _p= 0) + { +#error "implement MPI3 windowed allocate" + + } + void deallocate(pointer __p, size_type) { +#error "implement MPI3 windowed allocate" + } #else - + pointer allocate(size_type __n, const void* _p= 0) #ifdef HAVE_MM_MALLOC_H _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); #else _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); #endif - -#endif - _Tp tmp; -#ifdef GRID_NUMA -#pragma omp parallel for schedule(static) - for(int i=0;i<__n;i++){ - ptr[i]=tmp; - } -#endif return ptr; } - void deallocate(pointer __p, size_type) { -#ifdef GRID_COMMS_SHMEM - shmem_free((void *)__p); -#else #ifdef HAVE_MM_MALLOC_H _mm_free((void *)__p); #else free((void *)__p); -#endif #endif } +#endif void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p) { }; - void destroy(pointer __p) { }; }; +template inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; } +template inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; } -template inline bool -operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } - -template inline bool -operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } +//////////////////////////////////////////////////////////////////////////////// +// Template typedefs +//////////////////////////////////////////////////////////////////////////////// +template using Vector = std::vector >; +template using commVector = std::vector >; +template using Matrix = std::vector > >; }; // namespace Grid #endif From 39f1c880b8172b0354064e4971a27e3cf3aef3d6 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:56:40 +0100 Subject: [PATCH 03/16] mpi3 --- configure.ac | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configure.ac b/configure.ac index 7bcdc49f..d6340a71 100644 --- a/configure.ac +++ b/configure.ac @@ -244,6 +244,9 @@ case ${ac_COMMS} in mpi) AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) ;; + mpi3) + AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] ) + ;; shmem) AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) ;; @@ -253,6 +256,7 @@ case ${ac_COMMS} in esac AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ]) +AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] ) AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) ############### RNG selection From 4955672fc369baa88c83708838b8aef5502f8399 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:57:00 +0100 Subject: [PATCH 04/16] MPI3 --- lib/Cshift.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/Cshift.h b/lib/Cshift.h index bbc56cae..4eeb6cea 100644 --- a/lib/Cshift.h +++ b/lib/Cshift.h @@ -38,6 +38,10 @@ Author: Peter Boyle #include #endif +#ifdef GRID_COMMS_MPI3 +#include +#endif + #ifdef GRID_COMMS_SHMEM #include // uses same implementation of communicator #endif From cbcfea466f75f185a171ffe529b9a14b7938aff3 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:57:14 +0100 Subject: [PATCH 05/16] MPI3 --- lib/Makefile.am | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/Makefile.am b/lib/Makefile.am index bc752af5..35e76cc8 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -3,6 +3,10 @@ if BUILD_COMMS_MPI extra_sources+=communicator/Communicator_mpi.cc endif +if BUILD_COMMS_MPI3 + extra_sources+=communicator/Communicator_mpi3.cc +endif + if BUILD_COMMS_SHMEM extra_sources+=communicator/Communicator_shmem.cc endif From c7cccaaa69e7888db33fe8c8c82612da43dee15c Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:57:31 +0100 Subject: [PATCH 06/16] Comm vector for shmem --- lib/Stencil.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Stencil.h b/lib/Stencil.h index db608fe7..65de9945 100644 --- a/lib/Stencil.h +++ b/lib/Stencil.h @@ -71,7 +71,7 @@ namespace Grid { template void -Gather_plane_simple_table_compute (const Lattice &rhs,std::vector > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector >& table) +Gather_plane_simple_table_compute (const Lattice &rhs,commVector &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector >& table) { table.resize(0); int rd = rhs._grid->_rdimensions[dimension]; @@ -109,7 +109,7 @@ Gather_plane_simple_table_compute (const Lattice &rhs,std::vector void -Gather_plane_simple_table (std::vector >& table,const Lattice &rhs,std::vector > &buffer, +Gather_plane_simple_table (std::vector >& table,const Lattice &rhs,commVector &buffer, compressor &compress, int off,int so) { PARALLEL_FOR_LOOP @@ -119,7 +119,7 @@ PARALLEL_FOR_LOOP } template void -Gather_plane_simple_stencil (const Lattice &rhs,std::vector > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off, +Gather_plane_simple_stencil (const Lattice &rhs,commVector &buffer,int dimension,int plane,int cbmask,compressor &compress, int off, double &t_table ,double & t_data ) { std::vector > table; @@ -347,10 +347,10 @@ Gather_plane_simple_stencil (const Lattice &rhs,std::vector > u_simd_send_buf; - std::vector > u_simd_recv_buf; - Vector u_send_buf; - Vector comm_buf; + std::vector > u_simd_send_buf; + std::vector > u_simd_recv_buf; + commVector u_send_buf; + commVector comm_buf; int u_comm_offset; int _unified_buffer_size; From 5fe2b85cbd951e2a0137e8ce291f0556bbb091ea Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:58:01 +0100 Subject: [PATCH 07/16] MPI3 and shared memory support --- lib/communicator/Communicator_base.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/lib/communicator/Communicator_base.h b/lib/communicator/Communicator_base.h index 94d277e9..9b5ae8cb 100644 --- a/lib/communicator/Communicator_base.h +++ b/lib/communicator/Communicator_base.h @@ -34,6 +34,9 @@ Author: Peter Boyle #ifdef GRID_COMMS_MPI #include #endif +#ifdef GRID_COMMS_MPI3 +#include +#endif #ifdef GRID_COMMS_SHMEM #include #endif @@ -52,6 +55,29 @@ class CartesianCommunicator { #ifdef GRID_COMMS_MPI MPI_Comm communicator; typedef MPI_Request CommsRequest_t; +#elif GRID_COMMS_MPI3 + MPI_Comm communicator; + typedef MPI_Request CommsRequest_t; + + const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now + + std::vector WorldDims; + std::vector GroupDims; + std::vector ShmDims; + + std::vector GroupCoor; + std::vector ShmCoor; + std::vector WorldCoor; + + int GroupRank; + int ShmRank; + int WorldRank; + + int GroupSize; + int ShmSize; + int WorldSize; + + std::vector LexicographicToWorldRank; #else typedef int CommsRequest_t; #endif From 9b39f35ae6c3942f21541745cb50b79ec8077981 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:58:53 +0100 Subject: [PATCH 08/16] commVector different for SHMEM compat --- lib/cshift/Cshift_common.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/cshift/Cshift_common.h b/lib/cshift/Cshift_common.h index b0e9b798..2b146daa 100644 --- a/lib/cshift/Cshift_common.h +++ b/lib/cshift/Cshift_common.h @@ -45,7 +45,7 @@ public: // Gather for when there is no need to SIMD split with compression /////////////////////////////////////////////////////////////////// template void -Gather_plane_simple (const Lattice &rhs,std::vector > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) +Gather_plane_simple (const Lattice &rhs,commVector &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) { int rd = rhs._grid->_rdimensions[dimension]; @@ -114,6 +114,7 @@ PARALLEL_NESTED_LOOP2 int o = n*n1; int offset = b+n*n2; cobj temp =compress(rhs._odata[so+o+b]); + extract(temp,pointers,offset); } @@ -121,6 +122,7 @@ PARALLEL_NESTED_LOOP2 } else { assert(0); //Fixme think this is buggy + for(int n=0;n_slice_stride[dimension]; @@ -139,7 +141,7 @@ PARALLEL_NESTED_LOOP2 ////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split ////////////////////////////////////////////////////// -template void Gather_plane_simple (const Lattice &rhs,std::vector > &buffer, int dimension,int plane,int cbmask) +template void Gather_plane_simple (const Lattice &rhs,commVector &buffer, int dimension,int plane,int cbmask) { SimpleCompressor dontcompress; Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress); @@ -157,7 +159,7 @@ template void Gather_plane_extract(const Lattice &rhs,std::vec ////////////////////////////////////////////////////// // Scatter for when there is no need to SIMD split ////////////////////////////////////////////////////// -template void Scatter_plane_simple (Lattice &rhs,std::vector > &buffer, int dimension,int plane,int cbmask) +template void Scatter_plane_simple (Lattice &rhs,commVector &buffer, int dimension,int plane,int cbmask) { int rd = rhs._grid->_rdimensions[dimension]; From 4f8e636a438007a14ff95b2e59fe6297a6c38283 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:59:16 +0100 Subject: [PATCH 09/16] commVector --- lib/cshift/Cshift_mpi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/cshift/Cshift_mpi.h b/lib/cshift/Cshift_mpi.h index 704fda34..b3c07cd6 100644 --- a/lib/cshift/Cshift_mpi.h +++ b/lib/cshift/Cshift_mpi.h @@ -119,8 +119,8 @@ template void Cshift_comms(Lattice &ret,const Lattice &r assert(shift_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; - std::vector > send_buf(buffer_size); - std::vector > recv_buf(buffer_size); + commVector send_buf(buffer_size); + commVector recv_buf(buffer_size); int cb= (cbmask==0x2)? Odd : Even; int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); @@ -191,8 +191,8 @@ template void Cshift_comms_simd(Lattice &ret,const Lattice_slice_nblock[dimension]*grid->_slice_block[dimension]; int words = sizeof(vobj)/sizeof(vector_type); - std::vector > send_buf_extract(Nsimd,Vector(buffer_size) ); - std::vector > recv_buf_extract(Nsimd,Vector(buffer_size) ); + std::vector > send_buf_extract(Nsimd,commVector(buffer_size) ); + std::vector > recv_buf_extract(Nsimd,commVector(buffer_size) ); int bytes = buffer_size*sizeof(scalar_object); From f9d5e95d72291c54a5be219e9a3333e9296a45da Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 16:59:39 +0100 Subject: [PATCH 10/16] allocator template typedefs moved to AlignedAllocator --- lib/lattice/Lattice_base.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/lattice/Lattice_base.h b/lib/lattice/Lattice_base.h index 3bfa5613..e04c1443 100644 --- a/lib/lattice/Lattice_base.h +++ b/lib/lattice/Lattice_base.h @@ -65,9 +65,6 @@ public: class LatticeExpressionBase {}; -template using Vector = std::vector >; // Aligned allocator?? -template using Matrix = std::vector > >; // Aligned allocator?? - template class LatticeUnaryExpression : public std::pair > , public LatticeExpressionBase { public: From b58adc6a4bbfa461acb8cdc3d90133ea04ecc35f Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 17:00:15 +0100 Subject: [PATCH 11/16] commVector --- lib/communicator/Communicator_mpi3.cc | 358 ++++++++++++++++++++ lib/qcd/action/fermion/WilsonKernels.cc | 6 +- lib/qcd/action/fermion/WilsonKernels.h | 22 +- lib/qcd/action/fermion/WilsonKernelsAsm.cc | 16 +- lib/qcd/action/fermion/WilsonKernelsHand.cc | 16 +- 5 files changed, 388 insertions(+), 30 deletions(-) create mode 100644 lib/communicator/Communicator_mpi3.cc diff --git a/lib/communicator/Communicator_mpi3.cc b/lib/communicator/Communicator_mpi3.cc new file mode 100644 index 00000000..11734fd9 --- /dev/null +++ b/lib/communicator/Communicator_mpi3.cc @@ -0,0 +1,358 @@ + /************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/communicator/Communicator_mpi.cc + + Copyright (C) 2015 + +Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ + /* END LEGAL */ +#include "Grid.h" +#include + +namespace Grid { + +// Global used by Init and nowhere else. How to hide? +int Rank(void) { + int pe; + MPI_Comm_rank(MPI_COMM_WORLD,&pe); + return pe; +} + // Should error check all MPI calls. +void CartesianCommunicator::Init(int *argc, char ***argv) { + int flag; + MPI_Initialized(&flag); // needed to coexist with other libs apparently + if ( !flag ) { + MPI_Init(argc,argv); + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////// + // Want to implement some magic ... Group sub-cubes into those on same node + // + //////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest) +{ + std::vector coor = _processor_coor; + + assert(std::abs(shift) <_processors[dim]); + + coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim]; + Lexicographic::IndexFromCoor(coor,source,_processors); + source = LexicographicToWorldRank[source]; + + coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim]; + Lexicographic::IndexFromCoor(coor,dest,_processors); + dest = LexicographicToWorldRank[dest]; +} +int CartesianCommunicator::RankFromProcessorCoor(std::vector &coor) +{ + int rank; + Lexicographic::IndexFromCoor(coor,rank,_processors); + rank = LexicographicToWorldRank[rank]; + return rank; +} +void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector &coor) +{ + Lexicographic::CoorFromIndex(coor,rank,_processors); + rank = LexicographicToWorldRank[rank]; +} + +CartesianCommunicator::CartesianCommunicator(const std::vector &processors) +{ + _ndimension = processors.size(); + std::cout << "Creating "<< _ndimension << " dim communicator "< world_ranks(WorldSize); + std::vector group_ranks(WorldSize); + std::vector mygroup(GroupSize); + for(int r=0;r()); + int myleader = mygroup[0]; + + std::vector leaders_1hot(WorldSize,0); + std::vector leaders_group(GroupSize,0); + leaders_1hot [ myleader ] = 1; + + /////////////////////////////////////////////////////////////////// + // global sum leaders over comm world + /////////////////////////////////////////////////////////////////// + int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator); + assert(ierr==0); + + /////////////////////////////////////////////////////////////////// + // find the group leaders world rank + /////////////////////////////////////////////////////////////////// + int group=0; + for(int l=0;l reqs(0); + SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes); + SendToRecvFromComplete(reqs); +} + +void CartesianCommunicator::SendRecvPacket(void *xmit, + void *recv, + int sender, + int receiver, + int bytes) +{ + MPI_Status stat; + assert(sender != receiver); + int tag = sender; + if ( _processor == sender ) { + MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator); + } + if ( _processor == receiver ) { + MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat); + } +} + +// Basic Halo comms primitive +void CartesianCommunicator::SendToRecvFromBegin(std::vector &list, + void *xmit, + int dest, + void *recv, + int from, + int bytes) +{ + MPI_Request xrq; + MPI_Request rrq; + int rank = _processor; + int ierr; + ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq); + ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq); + + assert(ierr==0); + + list.push_back(xrq); + list.push_back(rrq); +} +void CartesianCommunicator::SendToRecvFromComplete(std::vector &list) +{ + int nreq=list.size(); + std::vector status(nreq); + int ierr = MPI_Waitall(nreq,&list[0],&status[0]); + + assert(ierr==0); +} + +void CartesianCommunicator::Barrier(void) +{ + int ierr = MPI_Barrier(communicator); + assert(ierr==0); +} + +void CartesianCommunicator::Broadcast(int root,void* data, int bytes) +{ + int ierr=MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + communicator); + assert(ierr==0); +} + +void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) +{ + int ierr= MPI_Bcast(data, + bytes, + MPI_BYTE, + root, + MPI_COMM_WORLD); + assert(ierr==0); +} + +} + diff --git a/lib/qcd/action/fermion/WilsonKernels.cc b/lib/qcd/action/fermion/WilsonKernels.cc index 49bd98c5..58e62cc8 100644 --- a/lib/qcd/action/fermion/WilsonKernels.cc +++ b/lib/qcd/action/fermion/WilsonKernels.cc @@ -45,7 +45,7 @@ WilsonKernels::WilsonKernels(const ImplParams &p) : Base(p){}; template void WilsonKernels::DiracOptGenericDhopSiteDag( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, int sF, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; @@ -222,7 +222,7 @@ void WilsonKernels::DiracOptGenericDhopSiteDag( template void WilsonKernels::DiracOptGenericDhopSite( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, int sF, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out) { SiteHalfSpinor tmp; SiteHalfSpinor chi; @@ -398,7 +398,7 @@ void WilsonKernels::DiracOptGenericDhopSite( template void WilsonKernels::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U, - std::vector > &buf, int sF, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out, int dir, int gamma) { SiteHalfSpinor tmp; SiteHalfSpinor chi; diff --git a/lib/qcd/action/fermion/WilsonKernels.h b/lib/qcd/action/fermion/WilsonKernels.h index 23c145de..66770c3c 100644 --- a/lib/qcd/action/fermion/WilsonKernels.h +++ b/lib/qcd/action/fermion/WilsonKernels.h @@ -58,7 +58,7 @@ namespace Grid { typename std::enable_if::type DiracOptDhopSite( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { #ifdef AVX512 @@ -89,7 +89,7 @@ namespace Grid { typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type DiracOptDhopSite( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { for (int site = 0; site < Ns; site++) { @@ -107,7 +107,7 @@ namespace Grid { void>::type DiracOptDhopSiteDag( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { #ifdef AVX512 @@ -139,7 +139,7 @@ namespace Grid { void>::type DiracOptDhopSiteDag( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) { for (int site = 0; site < Ns; site++) { @@ -154,7 +154,7 @@ namespace Grid { void DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma); @@ -162,34 +162,34 @@ namespace Grid { // Specialised variants void DiracOptGenericDhopSite( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out); void DiracOptGenericDhopSiteDag( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out); void DiracOptAsmDhopSite( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); void DiracOptAsmDhopSiteDag( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out); void DiracOptHandDhopSite( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out); void DiracOptHandDhopSiteDag( StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF, int sU, const FermionField &in, FermionField &out); public: diff --git a/lib/qcd/action/fermion/WilsonKernelsAsm.cc b/lib/qcd/action/fermion/WilsonKernelsAsm.cc index b09699ef..7857e89a 100644 --- a/lib/qcd/action/fermion/WilsonKernelsAsm.cc +++ b/lib/qcd/action/fermion/WilsonKernelsAsm.cc @@ -40,14 +40,14 @@ namespace Grid { /////////////////////////////////////////////////////////// template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) { assert(0); } template void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) { assert(0); @@ -86,14 +86,14 @@ namespace Grid { #undef KERNEL_DAG template<> void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include #define KERNEL_DAG template<> void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include @@ -111,14 +111,14 @@ namespace Grid { #undef KERNEL_DAG template<> void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include #define KERNEL_DAG template<> void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) #include @@ -127,10 +127,10 @@ namespace Grid { #define INSTANTIATE_ASM(A)\ template void WilsonKernels::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ - std::vector > &buf,\ + commVector &buf,\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ template void WilsonKernels::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ - std::vector > &buf,\ + commVector &buf,\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ diff --git a/lib/qcd/action/fermion/WilsonKernelsHand.cc b/lib/qcd/action/fermion/WilsonKernelsHand.cc index 15c8ab56..9d7eac23 100644 --- a/lib/qcd/action/fermion/WilsonKernelsHand.cc +++ b/lib/qcd/action/fermion/WilsonKernelsHand.cc @@ -313,7 +313,7 @@ namespace QCD { template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int sU,const FermionField &in, FermionField &out) { typedef typename Simd::scalar_type S; @@ -556,7 +556,7 @@ namespace QCD { template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int ss,int sU,const FermionField &in, FermionField &out) { // std::cout << "Hand op Dhop "< void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF,int sU,const FermionField &in, FermionField &out) { assert(0); @@ -812,7 +812,7 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,Leb template<> void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF,int sU,const FermionField &in, FermionField &out) { assert(0); @@ -820,7 +820,7 @@ void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st, template<> void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF,int sU,const FermionField &in, FermionField &out) { assert(0); @@ -828,7 +828,7 @@ void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,Leb template<> void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, - std::vector > &buf, + commVector &buf, int sF,int sU,const FermionField &in, FermionField &out) { assert(0); @@ -841,10 +841,10 @@ void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st, #define INSTANTIATE_THEM(A) \ template void WilsonKernels::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ - std::vector > &buf,\ + commVector &buf,\ int ss,int sU,const FermionField &in, FermionField &out);\ template void WilsonKernels::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ - std::vector > &buf,\ + commVector &buf,\ int ss,int sU,const FermionField &in, FermionField &out); INSTANTIATE_THEM(WilsonImplF); From 5b5925b8e5aa454467f9b53d35dcd9f779a302b2 Mon Sep 17 00:00:00 2001 From: paboyle Date: Thu, 20 Oct 2016 17:09:40 +0100 Subject: [PATCH 12/16] Forgot to add --- lib/AlignedAllocator.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index d0de1ec3..89731246 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -161,7 +161,8 @@ public: #error "implement MPI3 windowed allocate" } #else - pointer allocate(size_type __n, const void* _p= 0) + pointer allocate(size_type __n, const void* _p= 0) + { #ifdef HAVE_MM_MALLOC_H _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); #else @@ -187,9 +188,9 @@ template inline bool operator!=(const commAllocator<_Tp>&, const //////////////////////////////////////////////////////////////////////////////// // Template typedefs //////////////////////////////////////////////////////////////////////////////// -template using Vector = std::vector >; +template using Vector = std::vector >; template using commVector = std::vector >; -template using Matrix = std::vector > >; +template using Matrix = std::vector > >; }; // namespace Grid #endif From 202078eb1b9d02aa5eee05f1ffe0528801a3ef05 Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Fri, 21 Oct 2016 09:07:20 +0100 Subject: [PATCH 13/16] Cray / OpenSHMEM ordering differs --- lib/AlignedAllocator.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/AlignedAllocator.h b/lib/AlignedAllocator.h index 89731246..8301d811 100644 --- a/lib/AlignedAllocator.h +++ b/lib/AlignedAllocator.h @@ -130,8 +130,12 @@ public: #ifdef GRID_COMMS_SHMEM pointer allocate(size_type __n, const void* _p= 0) - { + { +#ifdef CRAY _Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); +#else + _Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp)); +#endif #ifdef PARANOID_SYMMETRIC_HEAP static void * bcast; static long psync[_SHMEM_REDUCE_SYNC_SIZE]; From 20a091c3eddfdb67a82ece6413740a93650a2f98 Mon Sep 17 00:00:00 2001 From: azusayamaguchi Date: Fri, 21 Oct 2016 09:08:36 +0100 Subject: [PATCH 14/16] Intel vs. Clang intrinsics differences absorbed --- lib/simd/Grid_avx512.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/simd/Grid_avx512.h b/lib/simd/Grid_avx512.h index 821898d9..d4fd446c 100644 --- a/lib/simd/Grid_avx512.h +++ b/lib/simd/Grid_avx512.h @@ -41,6 +41,22 @@ Author: paboyle namespace Grid{ namespace Optimization { + + template + union uconv { + __m512 f; + vtype v; + }; + + union u512f { + __m512 v; + float f[8]; + }; + + union u512d { + __m512 v; + double f[4]; + }; struct Vsplat{ //Complex float @@ -361,8 +377,9 @@ namespace Optimization { // Some Template specialization // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases -#undef GNU_CLANG_COMPILER -#ifdef GNU_CLANG_COMPILER + +#ifndef __INTEL_COMPILER +#warning "Slow reduction due to incomplete reduce intrinsics" //Complex float Reduce template<> inline Grid::ComplexF Reduce::operator()(__m512 in){ From 63d219498ba4a036d74c8520e7455c94543cff49 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 21 Oct 2016 13:10:13 +0100 Subject: [PATCH 15/16] first (dirty) implementation of Feynman stoctachtic EM field --- lib/qcd/action/gauge/Photon.h | 344 ++++++++++++++++++++++------------ 1 file changed, 226 insertions(+), 118 deletions(-) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index 29d40f8b..ea6f21b9 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -1,126 +1,234 @@ - /************************************************************************************* - - Grid physics library, www.github.com/paboyle/Grid - - Source file: ./lib/qcd/action/gauge/Photon.h - - Copyright (C) 2015 - -Author: Peter Boyle - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - See the full license in the file "LICENSE" in the top level distribution directory - *************************************************************************************/ - /* END LEGAL */ +/************************************************************************************* + + Grid physics library, www.github.com/paboyle/Grid + + Source file: ./lib/qcd/action/gauge/Photon.h + + Copyright (C) 2015 + + Author: Peter Boyle + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + See the full license in the file "LICENSE" in the top level distribution directory + *************************************************************************************/ +/* END LEGAL */ #ifndef QCD_PHOTON_ACTION_H #define QCD_PHOTON_ACTION_H namespace Grid{ - namespace QCD{ +namespace QCD{ + + template + class Photon + { + public: + INHERIT_GIMPL_TYPES(Gimpl); + enum class Gauge {Feynman, Coulomb, Landau}; + enum class ZmScheme {QedL, QedTL}; + public: + Photon(Gauge gauge, ZmScheme zmScheme); + virtual ~Photon(void) = default; + void FreePropagator(const GaugeField &in, GaugeField &out); + void MomentumSpacePropagator(const GaugeField &in, GaugeField &out); + void StochasticField(GaugeField &out, GridParallelRNG &rng); + private: + void kHatSquared(GaugeLinkField &out); + void zmSub(GaugeLinkField &out); + private: + Gauge gauge_; + ZmScheme zmScheme_; + }; - template - class Photon { - - public: - - INHERIT_GIMPL_TYPES(Gimpl); - - enum PhotonType { FEYNMAN_L, FEYNMAN_TL }; - - PhotonType GaugeBC; - - Photon(PhotonType _GaugeBC) : GaugeBC(_GaugeBC){}; - - void FreePropagator (const GaugeField &in,GaugeField &out) { - FFT theFFT((GridCartesian *) in._grid); - - GaugeField in_k(in._grid); - GaugeField prop_k(in._grid); - - theFFT.FFT_all_dim(in_k,in,FFT::forward); - MomentumSpacePropagator(prop_k,in_k); - theFFT.FFT_all_dim(out,prop_k,FFT::backward); - } - void MomentumSpacePropagator(GaugeField &out,const GaugeField &in) { - if ( GaugeBC == FEYNMAN_L ) { - FeynmanGaugeMomentumSpacePropagator_L(out,in); - } else if ( GaugeBC == FEYNMAN_TL ) { - FeynmanGaugeMomentumSpacePropagator_TL(out,in); - } else { // No coulomb yet - assert(0); - } - } - void FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, const GaugeField &in) { - - FeynmanGaugeMomentumSpacePropagator_TL(out,in); - - GridBase *grid = out._grid; - LatticeInteger coor(grid); - GaugeField zz(grid); zz=zero; - - // xyzt - for(int d = 0; d < grid->_ndimension-1;d++){ - LatticeCoordinate(coor,d); - out = where(coor==Integer(0),zz,out); - } - } - - void FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, const GaugeField &in) { - - // what type LatticeComplex - GridBase *grid = out._grid; - int nd = grid->_ndimension; - - typedef typename GaugeField::vector_type vector_type; - typedef typename GaugeField::scalar_type ScalComplex; - typedef Lattice > LatComplex; + template + Photon::Photon(Gauge gauge, ZmScheme zmScheme) + : gauge_(gauge), zmScheme_(zmScheme) + {} + + template + void Photon::FreePropagator (const GaugeField &in,GaugeField &out) + { + FFT theFFT(in._grid); - std::vector latt_size = grid->_fdimensions; - - LatComplex denom(grid); denom= zero; - LatComplex one(grid); one = ScalComplex(1.0,0.0); - LatComplex kmu(grid); - - ScalComplex ci(0.0,1.0); - // momphase = n * 2pi / L - for(int mu=0;mu zero_mode(nd,0); - TComplexD Tone = ComplexD(1.0,0.0); - TComplexD Tzero= ComplexD(0.0,0.0); - - pokeSite(Tone,denom,zero_mode); - - denom= one/denom; - - pokeSite(Tzero,denom,zero_mode); - - out = zero; - out = in*denom; - }; - - }; - + GaugeField in_k(in._grid); + GaugeField prop_k(in._grid); + + theFFT.FFT_all_dim(in_k,in,FFT::forward); + MomentumSpacePropagator(prop_k,in_k); + theFFT.FFT_all_dim(out,prop_k,FFT::backward); + } + + template + void Photon::kHatSquared(GaugeLinkField &out) + { + GridBase *grid = out._grid; + GaugeLinkField kmu(grid); + const unsigned int nd = grid->_ndimension; + std::vector &l = grid->_fdimensions; + + out = zero; + for(int mu = 0; mu < nd; mu++) + { + RealD twoPiL = M_PI*2./l[mu]; + + LatticeCoordinate(kmu,mu); + kmu = 2.*sin(.5*twoPiL*kmu); + out = out + kmu*kmu; + } + } + + template + void Photon::zmSub(GaugeLinkField &out) + { + GridBase *grid = out._grid; + const unsigned int nd = grid->_ndimension; + + switch (zmScheme_) + { + case ZmScheme::QedTL: + { + std::vector zm(nd,0); + TComplexD Tzero = ComplexD(0.0,0.0); + + pokeSite(Tzero, out, zm); + + break; + } + case ZmScheme::QedL: + { + LatticeInteger spNrm(grid), coor(grid); + GaugeLinkField z(grid); + + spNrm = zero; + for(int d = 0; d < grid->_ndimension - 1; d++) + { + LatticeCoordinate(coor,d); + spNrm = spNrm + coor*coor; + } + out = where(spNrm == 0, 0.*out, out); + + break; + } + default: + break; + } + } + + template + void Photon::MomentumSpacePropagator(const GaugeField &in, + GaugeField &out) + { + GridBase *grid = out._grid; + const unsigned int nd = grid->_ndimension; + LatticeComplex k2Inv(grid), one(grid); + + one = ComplexD(1.0,0.0); + kHatSquared(k2Inv); + + std::vector zm(nd,0); + TComplexD Tone = ComplexD(1.0,0.0); + TComplexD Tzero= ComplexD(0.0,0.0); + + pokeSite(Tone, k2Inv, zm); + k2Inv = one/k2Inv; + pokeSite(Tzero, k2Inv,zm); + + out = in*k2Inv; + } + + template + void Photon::StochasticField(GaugeField &out, GridParallelRNG &rng) + { + auto *grid = out._grid; + const unsigned int nd = grid->_ndimension; + GaugeLinkField k2(grid), r(grid); + GaugeField aTilde(grid); + FFT fft(grid); + + kHatSquared(k2); + zmSub(k2); + for(int mu = 0; mu < nd; mu++) + { + gaussian(rng, r); + r = k2*r; + pokeLorentz(aTilde, r, mu); + } + fft.FFT_all_dim(out, aTilde, FFT::backward); + } +// template +// void Photon::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, +// const GaugeField &in) +// { +// +// FeynmanGaugeMomentumSpacePropagator_TL(out,in); +// +// GridBase *grid = out._grid; +// LatticeInteger coor(grid); +// GaugeField zz(grid); zz=zero; +// +// // xyzt +// for(int d = 0; d < grid->_ndimension-1;d++){ +// LatticeCoordinate(coor,d); +// out = where(coor==Integer(0),zz,out); +// } +// } +// +// template +// void Photon::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, +// const GaugeField &in) +// { +// +// // what type LatticeComplex +// GridBase *grid = out._grid; +// int nd = grid->_ndimension; +// +// typedef typename GaugeField::vector_type vector_type; +// typedef typename GaugeField::scalar_type ScalComplex; +// typedef Lattice > LatComplex; +// +// std::vector latt_size = grid->_fdimensions; +// +// LatComplex denom(grid); denom= zero; +// LatComplex one(grid); one = ScalComplex(1.0,0.0); +// LatComplex kmu(grid); +// +// ScalComplex ci(0.0,1.0); +// // momphase = n * 2pi / L +// for(int mu=0;mu zero_mode(nd,0); +// TComplexD Tone = ComplexD(1.0,0.0); +// TComplexD Tzero= ComplexD(0.0,0.0); +// +// pokeSite(Tone,denom,zero_mode); +// +// denom= one/denom; +// +// pokeSite(Tzero,denom,zero_mode); +// +// out = zero; +// out = in*denom; +// }; + }} #endif From 462921e5491fbef99c93cfc460819890b0a63b66 Mon Sep 17 00:00:00 2001 From: Antonin Portelli Date: Fri, 21 Oct 2016 14:41:08 +0100 Subject: [PATCH 16/16] QED: fix stochastic field --- lib/qcd/action/gauge/Photon.h | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/lib/qcd/action/gauge/Photon.h b/lib/qcd/action/gauge/Photon.h index ea6f21b9..aac72898 100644 --- a/lib/qcd/action/gauge/Photon.h +++ b/lib/qcd/action/gauge/Photon.h @@ -45,7 +45,7 @@ namespace QCD{ void MomentumSpacePropagator(const GaugeField &in, GaugeField &out); void StochasticField(GaugeField &out, GridParallelRNG &rng); private: - void kHatSquared(GaugeLinkField &out); + void invKHatSquared(GaugeLinkField &out); void zmSub(GaugeLinkField &out); private: Gauge gauge_; @@ -71,13 +71,17 @@ namespace QCD{ } template - void Photon::kHatSquared(GaugeLinkField &out) + void Photon::invKHatSquared(GaugeLinkField &out) { GridBase *grid = out._grid; - GaugeLinkField kmu(grid); + GaugeLinkField kmu(grid), one(grid); const unsigned int nd = grid->_ndimension; std::vector &l = grid->_fdimensions; + std::vector zm(nd,0); + TComplexD Tone = ComplexD(1.0,0.0); + TComplexD Tzero= ComplexD(0.0,0.0); + one = ComplexD(1.0,0.0); out = zero; for(int mu = 0; mu < nd; mu++) { @@ -87,6 +91,9 @@ namespace QCD{ kmu = 2.*sin(.5*twoPiL*kmu); out = out + kmu*kmu; } + pokeSite(Tone, out, zm); + out = one/out; + pokeSite(Tzero, out,zm); } template @@ -131,19 +138,10 @@ namespace QCD{ GaugeField &out) { GridBase *grid = out._grid; - const unsigned int nd = grid->_ndimension; - LatticeComplex k2Inv(grid), one(grid); + LatticeComplex k2Inv(grid); - one = ComplexD(1.0,0.0); - kHatSquared(k2Inv); - - std::vector zm(nd,0); - TComplexD Tone = ComplexD(1.0,0.0); - TComplexD Tzero= ComplexD(0.0,0.0); - - pokeSite(Tone, k2Inv, zm); - k2Inv = one/k2Inv; - pokeSite(Tzero, k2Inv,zm); + invKHatSquared(k2Inv); + zmSub(k2Inv); out = in*k2Inv; } @@ -153,16 +151,17 @@ namespace QCD{ { auto *grid = out._grid; const unsigned int nd = grid->_ndimension; - GaugeLinkField k2(grid), r(grid); + GaugeLinkField sqrtK2Inv(grid), r(grid); GaugeField aTilde(grid); FFT fft(grid); - kHatSquared(k2); - zmSub(k2); + invKHatSquared(sqrtK2Inv); + sqrtK2Inv = sqrt(real(sqrtK2Inv)); + zmSub(sqrtK2Inv); for(int mu = 0; mu < nd; mu++) { gaussian(rng, r); - r = k2*r; + r = sqrtK2Inv*r; pokeLorentz(aTilde, r, mu); } fft.FFT_all_dim(out, aTilde, FFT::backward);