1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 15:55:37 +00:00

Merge branch 'feature/feynman-rules' into feature/qed-fvol

This commit is contained in:
Antonin Portelli 2016-10-21 14:42:18 +01:00
commit ab31ad006a
17 changed files with 781 additions and 213 deletions

View File

@ -244,6 +244,9 @@ case ${ac_COMMS} in
mpi) mpi)
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] ) AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
;; ;;
mpi3)
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
;;
shmem) shmem)
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] ) AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
;; ;;
@ -253,6 +256,7 @@ case ${ac_COMMS} in
esac esac
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ]) AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ]) AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] )
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ]) AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
############### RNG selection ############### RNG selection

View File

@ -40,14 +40,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <mm_malloc.h> #include <mm_malloc.h>
#endif #endif
#ifdef GRID_COMMS_SHMEM
extern "C" {
#include <mpp/shmem.h>
extern void * shmem_align(size_t, size_t);
extern void shmem_free(void *);
}
#endif
namespace Grid { namespace Grid {
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
@ -65,28 +57,85 @@ public:
typedef _Tp value_type; typedef _Tp value_type;
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; }; template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
alignedAllocator() throw() { } alignedAllocator() throw() { }
alignedAllocator(const alignedAllocator&) throw() { } alignedAllocator(const alignedAllocator&) throw() { }
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { } template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
~alignedAllocator() throw() { } ~alignedAllocator() throw() { }
pointer address(reference __x) const { return &__x; } pointer address(reference __x) const { return &__x; }
// const_pointer address(const_reference __x) const { return &__x; }
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); } size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
pointer allocate(size_type __n, const void* _p= 0) pointer allocate(size_type __n, const void* _p= 0)
{ {
#ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
#else
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
#endif
_Tp tmp;
#ifdef GRID_NUMA
#pragma omp parallel for schedule(static)
for(int i=0;i<__n;i++){
ptr[i]=tmp;
}
#endif
return ptr;
}
void deallocate(pointer __p, size_type) {
#ifdef HAVE_MM_MALLOC_H
_mm_free((void *)__p);
#else
free((void *)__p);
#endif
}
void construct(pointer __p, const _Tp& __val) { };
void construct(pointer __p) { };
void destroy(pointer __p) { };
};
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
//////////////////////////////////////////////////////////////////////////////////////////
// MPI3 : comms must use shm region
// SHMEM: comms must use symmetric heap
//////////////////////////////////////////////////////////////////////////////////////////
#ifdef GRID_COMMS_SHMEM #ifdef GRID_COMMS_SHMEM
extern "C" {
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64); #include <mpp/shmem.h>
extern void * shmem_align(size_t, size_t);
extern void shmem_free(void *);
}
#define PARANOID_SYMMETRIC_HEAP #define PARANOID_SYMMETRIC_HEAP
#endif
template<typename _Tp>
class commAllocator {
public:
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef _Tp* pointer;
typedef const _Tp* const_pointer;
typedef _Tp& reference;
typedef const _Tp& const_reference;
typedef _Tp value_type;
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
commAllocator() throw() { }
commAllocator(const commAllocator&) throw() { }
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
~commAllocator() throw() { }
pointer address(reference __x) const { return &__x; }
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
#ifdef GRID_COMMS_SHMEM
pointer allocate(size_type __n, const void* _p= 0)
{
#ifdef CRAY
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
#else
_Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
#endif
#ifdef PARANOID_SYMMETRIC_HEAP #ifdef PARANOID_SYMMETRIC_HEAP
static void * bcast; static void * bcast;
static long psync[_SHMEM_REDUCE_SYNC_SIZE]; static long psync[_SHMEM_REDUCE_SYNC_SIZE];
@ -99,51 +148,53 @@ public:
BACKTRACEFILE(); BACKTRACEFILE();
exit(0); exit(0);
} }
assert( bcast == (void *) ptr); assert( bcast == (void *) ptr);
#endif #endif
return ptr;
}
void deallocate(pointer __p, size_type) {
shmem_free((void *)__p);
}
#elif defined(GRID_COMMS_MPI3)
pointer allocate(size_type __n, const void* _p= 0)
{
#error "implement MPI3 windowed allocate"
}
void deallocate(pointer __p, size_type) {
#error "implement MPI3 windowed allocate"
}
#else #else
pointer allocate(size_type __n, const void* _p= 0)
{
#ifdef HAVE_MM_MALLOC_H #ifdef HAVE_MM_MALLOC_H
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128); _Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
#else #else
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp)); _Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
#endif #endif
#endif
_Tp tmp;
#ifdef GRID_NUMA
#pragma omp parallel for schedule(static)
for(int i=0;i<__n;i++){
ptr[i]=tmp;
}
#endif
return ptr; return ptr;
} }
void deallocate(pointer __p, size_type) { void deallocate(pointer __p, size_type) {
#ifdef GRID_COMMS_SHMEM
shmem_free((void *)__p);
#else
#ifdef HAVE_MM_MALLOC_H #ifdef HAVE_MM_MALLOC_H
_mm_free((void *)__p); _mm_free((void *)__p);
#else #else
free((void *)__p); free((void *)__p);
#endif
#endif #endif
} }
#endif
void construct(pointer __p, const _Tp& __val) { }; void construct(pointer __p, const _Tp& __val) { };
void construct(pointer __p) { }; void construct(pointer __p) { };
void destroy(pointer __p) { }; void destroy(pointer __p) { };
}; };
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
template<typename _Tp> inline bool ////////////////////////////////////////////////////////////////////////////////
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; } // Template typedefs
////////////////////////////////////////////////////////////////////////////////
template<typename _Tp> inline bool template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; } template<class T> using commVector = std::vector<T,commAllocator<T> >;
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
}; // namespace Grid }; // namespace Grid
#endif #endif

View File

@ -38,6 +38,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/cshift/Cshift_mpi.h> #include <Grid/cshift/Cshift_mpi.h>
#endif #endif
#ifdef GRID_COMMS_MPI3
#include <Grid/cshift/Cshift_mpi.h>
#endif
#ifdef GRID_COMMS_SHMEM #ifdef GRID_COMMS_SHMEM
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator #include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
#endif #endif

View File

@ -3,6 +3,10 @@ if BUILD_COMMS_MPI
extra_sources+=communicator/Communicator_mpi.cc extra_sources+=communicator/Communicator_mpi.cc
endif endif
if BUILD_COMMS_MPI3
extra_sources+=communicator/Communicator_mpi3.cc
endif
if BUILD_COMMS_SHMEM if BUILD_COMMS_SHMEM
extra_sources+=communicator/Communicator_shmem.cc extra_sources+=communicator/Communicator_shmem.cc
endif endif

View File

@ -71,7 +71,7 @@
namespace Grid { namespace Grid {
template<class vobj,class cobj,class compressor> void template<class vobj,class cobj,class compressor> void
Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table) Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
{ {
table.resize(0); table.resize(0);
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];
@ -109,7 +109,7 @@ Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,ali
} }
template<class vobj,class cobj,class compressor> void template<class vobj,class cobj,class compressor> void
Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer, Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,commVector<cobj> &buffer,
compressor &compress, int off,int so) compressor &compress, int off,int so)
{ {
PARALLEL_FOR_LOOP PARALLEL_FOR_LOOP
@ -119,7 +119,7 @@ PARALLEL_FOR_LOOP
} }
template<class vobj,class cobj,class compressor> void template<class vobj,class cobj,class compressor> void
Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off, Gather_plane_simple_stencil (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
double &t_table ,double & t_data ) double &t_table ,double & t_data )
{ {
std::vector<std::pair<int,int> > table; std::vector<std::pair<int,int> > table;
@ -347,10 +347,10 @@ Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAl
} }
// Comms buffers // Comms buffers
std::vector<Vector<scalar_object> > u_simd_send_buf; std::vector<commVector<scalar_object> > u_simd_send_buf;
std::vector<Vector<scalar_object> > u_simd_recv_buf; std::vector<commVector<scalar_object> > u_simd_recv_buf;
Vector<cobj> u_send_buf; commVector<cobj> u_send_buf;
Vector<cobj> comm_buf; commVector<cobj> comm_buf;
int u_comm_offset; int u_comm_offset;
int _unified_buffer_size; int _unified_buffer_size;

View File

@ -34,6 +34,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#ifdef GRID_COMMS_MPI #ifdef GRID_COMMS_MPI
#include <mpi.h> #include <mpi.h>
#endif #endif
#ifdef GRID_COMMS_MPI3
#include <mpi.h>
#endif
#ifdef GRID_COMMS_SHMEM #ifdef GRID_COMMS_SHMEM
#include <mpp/shmem.h> #include <mpp/shmem.h>
#endif #endif
@ -52,6 +55,29 @@ class CartesianCommunicator {
#ifdef GRID_COMMS_MPI #ifdef GRID_COMMS_MPI
MPI_Comm communicator; MPI_Comm communicator;
typedef MPI_Request CommsRequest_t; typedef MPI_Request CommsRequest_t;
#elif GRID_COMMS_MPI3
MPI_Comm communicator;
typedef MPI_Request CommsRequest_t;
const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now
std::vector<int> WorldDims;
std::vector<int> GroupDims;
std::vector<int> ShmDims;
std::vector<int> GroupCoor;
std::vector<int> ShmCoor;
std::vector<int> WorldCoor;
int GroupRank;
int ShmRank;
int WorldRank;
int GroupSize;
int ShmSize;
int WorldSize;
std::vector<int> LexicographicToWorldRank;
#else #else
typedef int CommsRequest_t; typedef int CommsRequest_t;
#endif #endif

View File

@ -0,0 +1,358 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/communicator/Communicator_mpi.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include "Grid.h"
#include <mpi.h>
namespace Grid {
// Global used by Init and nowhere else. How to hide?
int Rank(void) {
int pe;
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
return pe;
}
// Should error check all MPI calls.
void CartesianCommunicator::Init(int *argc, char ***argv) {
int flag;
MPI_Initialized(&flag); // needed to coexist with other libs apparently
if ( !flag ) {
MPI_Init(argc,argv);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Want to implement some magic ... Group sub-cubes into those on same node
//
////////////////////////////////////////////////////////////////////////////////////////////////////////////
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
{
std::vector<int> coor = _processor_coor;
assert(std::abs(shift) <_processors[dim]);
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,source,_processors);
source = LexicographicToWorldRank[source];
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
Lexicographic::IndexFromCoor(coor,dest,_processors);
dest = LexicographicToWorldRank[dest];
}
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
{
int rank;
Lexicographic::IndexFromCoor(coor,rank,_processors);
rank = LexicographicToWorldRank[rank];
return rank;
}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
{
Lexicographic::CoorFromIndex(coor,rank,_processors);
rank = LexicographicToWorldRank[rank];
}
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
{
_ndimension = processors.size();
std::cout << "Creating "<< _ndimension << " dim communicator "<<std::endl;
for(int d =0;d<_ndimension;d++){
std::cout << processors[d]<<" ";
};
std::cout << std::endl;
WorldDims = processors;
communicator = MPI_COMM_WORLD;
MPI_Comm shmcomm;
MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&shmcomm);
MPI_Comm_rank(communicator,&WorldRank);
MPI_Comm_size(communicator,&WorldSize);
MPI_Comm_rank(shmcomm ,&ShmRank);
MPI_Comm_size(shmcomm ,&ShmSize);
GroupSize = WorldSize/ShmSize;
std::cout<< "Ranks per node "<< ShmSize << std::endl;
std::cout<< "Nodes "<< GroupSize << std::endl;
std::cout<< "Ranks "<< WorldSize << std::endl;
////////////////////////////////////////////////////////////////
// Assert power of two shm_size.
////////////////////////////////////////////////////////////////
int log2size = -1;
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
if ( (0x1<<i) == ShmSize ) {
log2size = i;
break;
}
}
assert(log2size != -1);
////////////////////////////////////////////////////////////////
// Identify subblock of ranks on node spreading across dims
// in a maximally symmetrical way
////////////////////////////////////////////////////////////////
int dim = 0;
ShmDims.resize(_ndimension,1);
GroupDims.resize(_ndimension);
ShmCoor.resize(_ndimension);
GroupCoor.resize(_ndimension);
WorldCoor.resize(_ndimension);
for(int l2=0;l2<log2size;l2++){
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
ShmDims[dim]*=2;
dim=(dim+1)%_ndimension;
}
std::cout << "Shm group dims "<<std::endl;
for(int d =0;d<_ndimension;d++){
std::cout << ShmDims[d]<<" ";
};
std::cout << std::endl;
////////////////////////////////////////////////////////////////
// Establish torus of processes and nodes with sub-blockings
////////////////////////////////////////////////////////////////
for(int d=0;d<_ndimension;d++){
GroupDims[d] = WorldDims[d]/ShmDims[d];
}
std::cout << "Group dims "<<std::endl;
for(int d =0;d<_ndimension;d++){
std::cout << GroupDims[d]<<" ";
};
std::cout << std::endl;
MPI_Group WorldGroup, ShmGroup;
MPI_Comm_group (communicator, &WorldGroup);
MPI_Comm_group (shmcomm, &ShmGroup);
std::vector<int> world_ranks(WorldSize);
std::vector<int> group_ranks(WorldSize);
std::vector<int> mygroup(GroupSize);
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &group_ranks[0]);
////////////////////////////////////////////////////////////////
// Check processor counts match
////////////////////////////////////////////////////////////////
_Nprocessors=1;
_processors = processors;
_processor_coor.resize(_ndimension);
for(int i=0;i<_ndimension;i++){
std::cout << " p " << _processors[i]<<std::endl;
_Nprocessors*=_processors[i];
}
std::cout << " World " <<WorldSize <<" Nproc "<<_Nprocessors<<std::endl;
assert(WorldSize==_Nprocessors);
///////////////////////////////////////////////////////////////////
// Identify who is in my group and noninate the leader
///////////////////////////////////////////////////////////////////
int g=0;
for(int rank=0;rank<WorldSize;rank++){
if(group_ranks[rank]!=MPI_UNDEFINED){
mygroup[g] = rank;
}
}
std::sort(mygroup.begin(),mygroup.end(),std::greater<int>());
int myleader = mygroup[0];
std::vector<int> leaders_1hot(WorldSize,0);
std::vector<int> leaders_group(GroupSize,0);
leaders_1hot [ myleader ] = 1;
///////////////////////////////////////////////////////////////////
// global sum leaders over comm world
///////////////////////////////////////////////////////////////////
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator);
assert(ierr==0);
///////////////////////////////////////////////////////////////////
// find the group leaders world rank
///////////////////////////////////////////////////////////////////
int group=0;
for(int l=0;l<WorldSize;l++){
if(leaders_1hot[l]){
leaders_group[group++] = l;
}
}
///////////////////////////////////////////////////////////////////
// Identify the rank of the group in which I (and my leader) live
///////////////////////////////////////////////////////////////////
GroupRank=-1;
for(int g=0;g<GroupSize;g++){
if (myleader == leaders_group[g]){
GroupRank=g;
}
}
assert(GroupRank!=-1);
////////////////////////////////////////////////////////////////
// Establish mapping between lexico physics coord and WorldRank
//
////////////////////////////////////////////////////////////////
LexicographicToWorldRank.resize(WorldSize,0);
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
for(int d=0;d<_ndimension;d++){
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
}
_processor_coor = WorldCoor;
int lexico;
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
LexicographicToWorldRank[lexico]=WorldRank;
_processor = lexico;
///////////////////////////////////////////////////////////////////
// global sum Lexico to World mapping
///////////////////////////////////////////////////////////////////
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
assert(ierr==0);
};
void CartesianCommunicator::GlobalSum(uint32_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(uint64_t &u){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(float &f){
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(float *f,int N)
{
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSum(double &d)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
void CartesianCommunicator::GlobalSumVector(double *d,int N)
{
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
assert(ierr==0);
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFrom(void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
std::vector<CommsRequest_t> reqs(0);
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
SendToRecvFromComplete(reqs);
}
void CartesianCommunicator::SendRecvPacket(void *xmit,
void *recv,
int sender,
int receiver,
int bytes)
{
MPI_Status stat;
assert(sender != receiver);
int tag = sender;
if ( _processor == sender ) {
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
}
if ( _processor == receiver ) {
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
}
}
// Basic Halo comms primitive
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit,
int dest,
void *recv,
int from,
int bytes)
{
MPI_Request xrq;
MPI_Request rrq;
int rank = _processor;
int ierr;
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
assert(ierr==0);
list.push_back(xrq);
list.push_back(rrq);
}
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
{
int nreq=list.size();
std::vector<MPI_Status> status(nreq);
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
assert(ierr==0);
}
void CartesianCommunicator::Barrier(void)
{
int ierr = MPI_Barrier(communicator);
assert(ierr==0);
}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
{
int ierr=MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
communicator);
assert(ierr==0);
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,
bytes,
MPI_BYTE,
root,
MPI_COMM_WORLD);
assert(ierr==0);
}
}

View File

@ -45,7 +45,7 @@ public:
// Gather for when there is no need to SIMD split with compression // Gather for when there is no need to SIMD split with compression
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
template<class vobj,class cobj,class compressor> void template<class vobj,class cobj,class compressor> void
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0) Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
{ {
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];
@ -114,6 +114,7 @@ PARALLEL_NESTED_LOOP2
int o = n*n1; int o = n*n1;
int offset = b+n*n2; int offset = b+n*n2;
cobj temp =compress(rhs._odata[so+o+b]); cobj temp =compress(rhs._odata[so+o+b]);
extract<cobj>(temp,pointers,offset); extract<cobj>(temp,pointers,offset);
} }
@ -121,6 +122,7 @@ PARALLEL_NESTED_LOOP2
} else { } else {
assert(0); //Fixme think this is buggy assert(0); //Fixme think this is buggy
for(int n=0;n<e1;n++){ for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){ for(int b=0;b<e2;b++){
int o=n*rhs._grid->_slice_stride[dimension]; int o=n*rhs._grid->_slice_stride[dimension];
@ -139,7 +141,7 @@ PARALLEL_NESTED_LOOP2
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split // Gather for when there is no need to SIMD split
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask) template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
{ {
SimpleCompressor<vobj> dontcompress; SimpleCompressor<vobj> dontcompress;
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress); Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
@ -157,7 +159,7 @@ template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vec
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
// Scatter for when there is no need to SIMD split // Scatter for when there is no need to SIMD split
////////////////////////////////////////////////////// //////////////////////////////////////////////////////
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask) template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
{ {
int rd = rhs._grid->_rdimensions[dimension]; int rd = rhs._grid->_rdimensions[dimension];

View File

@ -119,8 +119,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
assert(shift<fd); assert(shift<fd);
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size); commVector<vobj> send_buf(buffer_size);
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size); commVector<vobj> recv_buf(buffer_size);
int cb= (cbmask==0x2)? Odd : Even; int cb= (cbmask==0x2)? Odd : Even;
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb); int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
@ -191,8 +191,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension]; int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
int words = sizeof(vobj)/sizeof(vector_type); int words = sizeof(vobj)/sizeof(vector_type);
std::vector<Vector<scalar_object> > send_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) ); std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
std::vector<Vector<scalar_object> > recv_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) ); std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
int bytes = buffer_size*sizeof(scalar_object); int bytes = buffer_size*sizeof(scalar_object);

View File

@ -65,9 +65,6 @@ public:
class LatticeExpressionBase {}; class LatticeExpressionBase {};
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
template <typename Op, typename T1> template <typename Op, typename T1>
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase { class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
public: public:

View File

@ -45,7 +45,7 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag( void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF, commVector<SiteHalfSpinor> &buf, int sF,
int sU, const FermionField &in, FermionField &out) { int sU, const FermionField &in, FermionField &out) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
@ -222,7 +222,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptGenericDhopSite( void WilsonKernels<Impl>::DiracOptGenericDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF, commVector<SiteHalfSpinor> &buf, int sF,
int sU, const FermionField &in, FermionField &out) { int sU, const FermionField &in, FermionField &out) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;
@ -398,7 +398,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
template <class Impl> template <class Impl>
void WilsonKernels<Impl>::DiracOptDhopDir( void WilsonKernels<Impl>::DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U, StencilImpl &st, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF, commVector<SiteHalfSpinor> &buf, int sF,
int sU, const FermionField &in, FermionField &out, int dir, int gamma) { int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
SiteHalfSpinor tmp; SiteHalfSpinor tmp;
SiteHalfSpinor chi; SiteHalfSpinor chi;

View File

@ -58,7 +58,7 @@ namespace Grid {
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
DiracOptDhopSite( DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) { FermionField &out) {
#ifdef AVX512 #ifdef AVX512
@ -89,7 +89,7 @@ namespace Grid {
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
DiracOptDhopSite( DiracOptDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) { FermionField &out) {
for (int site = 0; site < Ns; site++) { for (int site = 0; site < Ns; site++) {
@ -107,7 +107,7 @@ namespace Grid {
void>::type void>::type
DiracOptDhopSiteDag( DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) { FermionField &out) {
#ifdef AVX512 #ifdef AVX512
@ -139,7 +139,7 @@ namespace Grid {
void>::type void>::type
DiracOptDhopSiteDag( DiracOptDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out) { FermionField &out) {
for (int site = 0; site < Ns; site++) { for (int site = 0; site < Ns; site++) {
@ -154,7 +154,7 @@ namespace Grid {
void DiracOptDhopDir( void DiracOptDhopDir(
StencilImpl &st, DoubledGaugeField &U, StencilImpl &st, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
int gamma); int gamma);
@ -162,34 +162,34 @@ namespace Grid {
// Specialised variants // Specialised variants
void DiracOptGenericDhopSite( void DiracOptGenericDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptGenericDhopSiteDag( void DiracOptGenericDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptAsmDhopSite( void DiracOptAsmDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out); FermionField &out);
void DiracOptAsmDhopSiteDag( void DiracOptAsmDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, int Ls, int Ns, const FermionField &in, int sF, int sU, int Ls, int Ns, const FermionField &in,
FermionField &out); FermionField &out);
void DiracOptHandDhopSite( void DiracOptHandDhopSite(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionField &in, FermionField &out);
void DiracOptHandDhopSiteDag( void DiracOptHandDhopSiteDag(
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF, int sU, const FermionField &in, FermionField &out); int sF, int sU, const FermionField &in, FermionField &out);
public: public:

View File

@ -40,14 +40,14 @@ namespace Grid {
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
template<class Impl> template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
} }
template<class Impl> template<class Impl>
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
@ -86,14 +86,14 @@ namespace Grid {
#undef KERNEL_DAG #undef KERNEL_DAG
template<> template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG #define KERNEL_DAG
template<> template<>
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
@ -111,14 +111,14 @@ namespace Grid {
#undef KERNEL_DAG #undef KERNEL_DAG
template<> template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
#define KERNEL_DAG #define KERNEL_DAG
template<> template<>
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out) int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
#include <qcd/action/fermion/WilsonKernelsAsmBody.h> #include <qcd/action/fermion/WilsonKernelsAsmBody.h>
@ -127,10 +127,10 @@ namespace Grid {
#define INSTANTIATE_ASM(A)\ #define INSTANTIATE_ASM(A)\
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ commVector<SiteHalfSpinor> &buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\ template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ commVector<SiteHalfSpinor> &buf,\
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\ int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\

View File

@ -313,7 +313,7 @@ namespace QCD {
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionField &in, FermionField &out)
{ {
typedef typename Simd::scalar_type S; typedef typename Simd::scalar_type S;
@ -556,7 +556,7 @@ namespace QCD {
template<class Impl> template<class Impl>
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int ss,int sU,const FermionField &in, FermionField &out) int ss,int sU,const FermionField &in, FermionField &out)
{ {
// std::cout << "Hand op Dhop "<<std::endl; // std::cout << "Hand op Dhop "<<std::endl;
@ -804,7 +804,7 @@ namespace QCD {
//////////////////////////////////////////////// ////////////////////////////////////////////////
template<> template<>
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF,int sU,const FermionField &in, FermionField &out) int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
@ -812,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,Leb
template<> template<>
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF,int sU,const FermionField &in, FermionField &out) int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
@ -820,7 +820,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
template<> template<>
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF,int sU,const FermionField &in, FermionField &out) int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
@ -828,7 +828,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Leb
template<> template<>
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U, void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf, commVector<SiteHalfSpinor> &buf,
int sF,int sU,const FermionField &in, FermionField &out) int sF,int sU,const FermionField &in, FermionField &out)
{ {
assert(0); assert(0);
@ -841,10 +841,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
#define INSTANTIATE_THEM(A) \ #define INSTANTIATE_THEM(A) \
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ commVector<SiteHalfSpinor> &buf,\
int ss,int sU,const FermionField &in, FermionField &out);\ int ss,int sU,const FermionField &in, FermionField &out);\
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\ template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\ commVector<SiteHalfSpinor> &buf,\
int ss,int sU,const FermionField &in, FermionField &out); int ss,int sU,const FermionField &in, FermionField &out);
INSTANTIATE_THEM(WilsonImplF); INSTANTIATE_THEM(WilsonImplF);

View File

@ -1,126 +1,233 @@
/************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/gauge/Photon.h Source file: ./lib/qcd/action/gauge/Photon.h
Copyright (C) 2015 Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk> Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License along You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc., with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#ifndef QCD_PHOTON_ACTION_H #ifndef QCD_PHOTON_ACTION_H
#define QCD_PHOTON_ACTION_H #define QCD_PHOTON_ACTION_H
namespace Grid{ namespace Grid{
namespace QCD{ namespace QCD{
template<class Gimpl>
class Photon
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
enum class Gauge {Feynman, Coulomb, Landau};
enum class ZmScheme {QedL, QedTL};
public:
Photon(Gauge gauge, ZmScheme zmScheme);
virtual ~Photon(void) = default;
void FreePropagator(const GaugeField &in, GaugeField &out);
void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
void StochasticField(GaugeField &out, GridParallelRNG &rng);
private:
void invKHatSquared(GaugeLinkField &out);
void zmSub(GaugeLinkField &out);
private:
Gauge gauge_;
ZmScheme zmScheme_;
};
template<class Gimpl> template<class Gimpl>
class Photon { Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
: gauge_(gauge), zmScheme_(zmScheme)
public: {}
INHERIT_GIMPL_TYPES(Gimpl); template<class Gimpl>
void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
enum PhotonType { FEYNMAN_L, FEYNMAN_TL }; {
FFT theFFT(in._grid);
PhotonType GaugeBC;
Photon(PhotonType _GaugeBC) : GaugeBC(_GaugeBC){};
void FreePropagator (const GaugeField &in,GaugeField &out) {
FFT theFFT((GridCartesian *) in._grid);
GaugeField in_k(in._grid);
GaugeField prop_k(in._grid);
theFFT.FFT_all_dim(in_k,in,FFT::forward);
MomentumSpacePropagator(prop_k,in_k);
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
}
void MomentumSpacePropagator(GaugeField &out,const GaugeField &in) {
if ( GaugeBC == FEYNMAN_L ) {
FeynmanGaugeMomentumSpacePropagator_L(out,in);
} else if ( GaugeBC == FEYNMAN_TL ) {
FeynmanGaugeMomentumSpacePropagator_TL(out,in);
} else { // No coulomb yet
assert(0);
}
}
void FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, const GaugeField &in) {
FeynmanGaugeMomentumSpacePropagator_TL(out,in);
GridBase *grid = out._grid;
LatticeInteger coor(grid);
GaugeField zz(grid); zz=zero;
// xyzt
for(int d = 0; d < grid->_ndimension-1;d++){
LatticeCoordinate(coor,d);
out = where(coor==Integer(0),zz,out);
}
}
void FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, const GaugeField &in) {
// what type LatticeComplex
GridBase *grid = out._grid;
int nd = grid->_ndimension;
typedef typename GaugeField::vector_type vector_type;
typedef typename GaugeField::scalar_type ScalComplex;
typedef Lattice<iSinglet<vector_type> > LatComplex;
std::vector<int> latt_size = grid->_fdimensions; GaugeField in_k(in._grid);
GaugeField prop_k(in._grid);
LatComplex denom(grid); denom= zero;
LatComplex one(grid); one = ScalComplex(1.0,0.0); theFFT.FFT_all_dim(in_k,in,FFT::forward);
LatComplex kmu(grid); MomentumSpacePropagator(prop_k,in_k);
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
ScalComplex ci(0.0,1.0); }
// momphase = n * 2pi / L
for(int mu=0;mu<Nd;mu++) { template<class Gimpl>
void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
LatticeCoordinate(kmu,mu); {
GridBase *grid = out._grid;
RealD TwoPiL = M_PI * 2.0/ latt_size[mu]; GaugeLinkField kmu(grid), one(grid);
const unsigned int nd = grid->_ndimension;
kmu = TwoPiL * kmu ; std::vector<int> &l = grid->_fdimensions;
std::vector<int> zm(nd,0);
denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term TComplexD Tone = ComplexD(1.0,0.0);
} TComplexD Tzero= ComplexD(0.0,0.0);
std::vector<int> zero_mode(nd,0);
TComplexD Tone = ComplexD(1.0,0.0); one = ComplexD(1.0,0.0);
TComplexD Tzero= ComplexD(0.0,0.0); out = zero;
for(int mu = 0; mu < nd; mu++)
pokeSite(Tone,denom,zero_mode); {
RealD twoPiL = M_PI*2./l[mu];
denom= one/denom;
LatticeCoordinate(kmu,mu);
pokeSite(Tzero,denom,zero_mode); kmu = 2.*sin(.5*twoPiL*kmu);
out = out + kmu*kmu;
out = zero; }
out = in*denom; pokeSite(Tone, out, zm);
}; out = one/out;
pokeSite(Tzero, out,zm);
}; }
template<class Gimpl>
void Photon<Gimpl>::zmSub(GaugeLinkField &out)
{
GridBase *grid = out._grid;
const unsigned int nd = grid->_ndimension;
switch (zmScheme_)
{
case ZmScheme::QedTL:
{
std::vector<int> zm(nd,0);
TComplexD Tzero = ComplexD(0.0,0.0);
pokeSite(Tzero, out, zm);
break;
}
case ZmScheme::QedL:
{
LatticeInteger spNrm(grid), coor(grid);
GaugeLinkField z(grid);
spNrm = zero;
for(int d = 0; d < grid->_ndimension - 1; d++)
{
LatticeCoordinate(coor,d);
spNrm = spNrm + coor*coor;
}
out = where(spNrm == 0, 0.*out, out);
break;
}
default:
break;
}
}
template<class Gimpl>
void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
GaugeField &out)
{
GridBase *grid = out._grid;
LatticeComplex k2Inv(grid);
invKHatSquared(k2Inv);
zmSub(k2Inv);
out = in*k2Inv;
}
template<class Gimpl>
void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
{
auto *grid = out._grid;
const unsigned int nd = grid->_ndimension;
GaugeLinkField sqrtK2Inv(grid), r(grid);
GaugeField aTilde(grid);
FFT fft(grid);
invKHatSquared(sqrtK2Inv);
sqrtK2Inv = sqrt(real(sqrtK2Inv));
zmSub(sqrtK2Inv);
for(int mu = 0; mu < nd; mu++)
{
gaussian(rng, r);
r = sqrtK2Inv*r;
pokeLorentz(aTilde, r, mu);
}
fft.FFT_all_dim(out, aTilde, FFT::backward);
}
// template<class Gimpl>
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
// const GaugeField &in)
// {
//
// FeynmanGaugeMomentumSpacePropagator_TL(out,in);
//
// GridBase *grid = out._grid;
// LatticeInteger coor(grid);
// GaugeField zz(grid); zz=zero;
//
// // xyzt
// for(int d = 0; d < grid->_ndimension-1;d++){
// LatticeCoordinate(coor,d);
// out = where(coor==Integer(0),zz,out);
// }
// }
//
// template<class Gimpl>
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
// const GaugeField &in)
// {
//
// // what type LatticeComplex
// GridBase *grid = out._grid;
// int nd = grid->_ndimension;
//
// typedef typename GaugeField::vector_type vector_type;
// typedef typename GaugeField::scalar_type ScalComplex;
// typedef Lattice<iSinglet<vector_type> > LatComplex;
//
// std::vector<int> latt_size = grid->_fdimensions;
//
// LatComplex denom(grid); denom= zero;
// LatComplex one(grid); one = ScalComplex(1.0,0.0);
// LatComplex kmu(grid);
//
// ScalComplex ci(0.0,1.0);
// // momphase = n * 2pi / L
// for(int mu=0;mu<Nd;mu++) {
//
// LatticeCoordinate(kmu,mu);
//
// RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
//
// kmu = TwoPiL * kmu ;
//
// denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
// }
// std::vector<int> zero_mode(nd,0);
// TComplexD Tone = ComplexD(1.0,0.0);
// TComplexD Tzero= ComplexD(0.0,0.0);
//
// pokeSite(Tone,denom,zero_mode);
//
// denom= one/denom;
//
// pokeSite(Tzero,denom,zero_mode);
//
// out = zero;
// out = in*denom;
// };
}} }}
#endif #endif

View File

@ -41,6 +41,22 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
namespace Grid{ namespace Grid{
namespace Optimization { namespace Optimization {
template<class vtype>
union uconv {
__m512 f;
vtype v;
};
union u512f {
__m512 v;
float f[8];
};
union u512d {
__m512 v;
double f[4];
};
struct Vsplat{ struct Vsplat{
//Complex float //Complex float
@ -372,8 +388,9 @@ namespace Optimization {
// Some Template specialization // Some Template specialization
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
#undef GNU_CLANG_COMPILER
#ifdef GNU_CLANG_COMPILER #ifndef __INTEL_COMPILER
#warning "Slow reduction due to incomplete reduce intrinsics"
//Complex float Reduce //Complex float Reduce
template<> template<>
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){ inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){

View File

@ -250,8 +250,7 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
} }
} }
template<class vobj> inline template<class vobj> inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ; typedef typename vobj::vector_type vector_type ;
@ -269,8 +268,7 @@ void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
}} }}
} }
template<class vobj> inline template<class vobj> inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
{ {
typedef typename vobj::scalar_type scalar_type ; typedef typename vobj::scalar_type scalar_type ;
typedef typename vobj::vector_type vector_type ; typedef typename vobj::vector_type vector_type ;