mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-10 15:55:37 +00:00
Merge branch 'feature/feynman-rules' into feature/qed-fvol
This commit is contained in:
commit
ab31ad006a
@ -244,6 +244,9 @@ case ${ac_COMMS} in
|
|||||||
mpi)
|
mpi)
|
||||||
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
AC_DEFINE([GRID_COMMS_MPI],[1],[GRID_COMMS_MPI] )
|
||||||
;;
|
;;
|
||||||
|
mpi3)
|
||||||
|
AC_DEFINE([GRID_COMMS_MPI3],[1],[GRID_COMMS_MPI3] )
|
||||||
|
;;
|
||||||
shmem)
|
shmem)
|
||||||
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
AC_DEFINE([GRID_COMMS_SHMEM],[1],[GRID_COMMS_SHMEM] )
|
||||||
;;
|
;;
|
||||||
@ -253,6 +256,7 @@ case ${ac_COMMS} in
|
|||||||
esac
|
esac
|
||||||
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
AM_CONDITIONAL(BUILD_COMMS_SHMEM,[ test "X${ac_COMMS}X" == "XshmemX" ])
|
||||||
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
|
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" || test "X${ac_COMMS}X" == "Xmpi-autoX" ])
|
||||||
|
AM_CONDITIONAL(BUILD_COMMS_MPI3,[ test "X${ac_COMMS}X" == "Xmpi3X"] )
|
||||||
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])
|
||||||
|
|
||||||
############### RNG selection
|
############### RNG selection
|
||||||
|
@ -40,14 +40,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
|
||||||
extern "C" {
|
|
||||||
#include <mpp/shmem.h>
|
|
||||||
extern void * shmem_align(size_t, size_t);
|
|
||||||
extern void shmem_free(void *);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
@ -65,28 +57,85 @@ public:
|
|||||||
typedef _Tp value_type;
|
typedef _Tp value_type;
|
||||||
|
|
||||||
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
template<typename _Tp1> struct rebind { typedef alignedAllocator<_Tp1> other; };
|
||||||
|
|
||||||
alignedAllocator() throw() { }
|
alignedAllocator() throw() { }
|
||||||
|
|
||||||
alignedAllocator(const alignedAllocator&) throw() { }
|
alignedAllocator(const alignedAllocator&) throw() { }
|
||||||
|
|
||||||
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
template<typename _Tp1> alignedAllocator(const alignedAllocator<_Tp1>&) throw() { }
|
||||||
|
|
||||||
~alignedAllocator() throw() { }
|
~alignedAllocator() throw() { }
|
||||||
|
|
||||||
pointer address(reference __x) const { return &__x; }
|
pointer address(reference __x) const { return &__x; }
|
||||||
// const_pointer address(const_reference __x) const { return &__x; }
|
|
||||||
|
|
||||||
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
pointer allocate(size_type __n, const void* _p= 0)
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
{
|
{
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||||
|
#else
|
||||||
|
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
_Tp tmp;
|
||||||
|
#ifdef GRID_NUMA
|
||||||
|
#pragma omp parallel for schedule(static)
|
||||||
|
for(int i=0;i<__n;i++){
|
||||||
|
ptr[i]=tmp;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
|
_mm_free((void *)__p);
|
||||||
|
#else
|
||||||
|
free((void *)__p);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
|
void construct(pointer __p) { };
|
||||||
|
void destroy(pointer __p) { };
|
||||||
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// MPI3 : comms must use shm region
|
||||||
|
// SHMEM: comms must use symmetric heap
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
extern "C" {
|
||||||
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
|
#include <mpp/shmem.h>
|
||||||
|
extern void * shmem_align(size_t, size_t);
|
||||||
|
extern void shmem_free(void *);
|
||||||
|
}
|
||||||
#define PARANOID_SYMMETRIC_HEAP
|
#define PARANOID_SYMMETRIC_HEAP
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<typename _Tp>
|
||||||
|
class commAllocator {
|
||||||
|
public:
|
||||||
|
typedef std::size_t size_type;
|
||||||
|
typedef std::ptrdiff_t difference_type;
|
||||||
|
typedef _Tp* pointer;
|
||||||
|
typedef const _Tp* const_pointer;
|
||||||
|
typedef _Tp& reference;
|
||||||
|
typedef const _Tp& const_reference;
|
||||||
|
typedef _Tp value_type;
|
||||||
|
|
||||||
|
template<typename _Tp1> struct rebind { typedef commAllocator<_Tp1> other; };
|
||||||
|
commAllocator() throw() { }
|
||||||
|
commAllocator(const commAllocator&) throw() { }
|
||||||
|
template<typename _Tp1> commAllocator(const commAllocator<_Tp1>&) throw() { }
|
||||||
|
~commAllocator() throw() { }
|
||||||
|
pointer address(reference __x) const { return &__x; }
|
||||||
|
size_type max_size() const throw() { return size_t(-1) / sizeof(_Tp); }
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_SHMEM
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
#ifdef CRAY
|
||||||
|
_Tp *ptr = (_Tp *) shmem_align(__n*sizeof(_Tp),64);
|
||||||
|
#else
|
||||||
|
_Tp *ptr = (_Tp *) shmem_align(64,__n*sizeof(_Tp));
|
||||||
|
#endif
|
||||||
#ifdef PARANOID_SYMMETRIC_HEAP
|
#ifdef PARANOID_SYMMETRIC_HEAP
|
||||||
static void * bcast;
|
static void * bcast;
|
||||||
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
static long psync[_SHMEM_REDUCE_SYNC_SIZE];
|
||||||
@ -99,51 +148,53 @@ public:
|
|||||||
BACKTRACEFILE();
|
BACKTRACEFILE();
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert( bcast == (void *) ptr);
|
assert( bcast == (void *) ptr);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
shmem_free((void *)__p);
|
||||||
|
}
|
||||||
|
#elif defined(GRID_COMMS_MPI3)
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
|
#error "implement MPI3 windowed allocate"
|
||||||
|
|
||||||
|
}
|
||||||
|
void deallocate(pointer __p, size_type) {
|
||||||
|
#error "implement MPI3 windowed allocate"
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
|
pointer allocate(size_type __n, const void* _p= 0)
|
||||||
|
{
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
_Tp * ptr = (_Tp *) _mm_malloc(__n*sizeof(_Tp),128);
|
||||||
#else
|
#else
|
||||||
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
_Tp * ptr = (_Tp *) memalign(128,__n*sizeof(_Tp));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
|
||||||
_Tp tmp;
|
|
||||||
#ifdef GRID_NUMA
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for(int i=0;i<__n;i++){
|
|
||||||
ptr[i]=tmp;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deallocate(pointer __p, size_type) {
|
void deallocate(pointer __p, size_type) {
|
||||||
#ifdef GRID_COMMS_SHMEM
|
|
||||||
shmem_free((void *)__p);
|
|
||||||
#else
|
|
||||||
#ifdef HAVE_MM_MALLOC_H
|
#ifdef HAVE_MM_MALLOC_H
|
||||||
_mm_free((void *)__p);
|
_mm_free((void *)__p);
|
||||||
#else
|
#else
|
||||||
free((void *)__p);
|
free((void *)__p);
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
void construct(pointer __p, const _Tp& __val) { };
|
void construct(pointer __p, const _Tp& __val) { };
|
||||||
void construct(pointer __p) { };
|
void construct(pointer __p) { };
|
||||||
|
|
||||||
void destroy(pointer __p) { };
|
void destroy(pointer __p) { };
|
||||||
};
|
};
|
||||||
|
template<typename _Tp> inline bool operator==(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return true; }
|
||||||
|
template<typename _Tp> inline bool operator!=(const commAllocator<_Tp>&, const commAllocator<_Tp>&){ return false; }
|
||||||
|
|
||||||
template<typename _Tp> inline bool
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
operator==(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return true; }
|
// Template typedefs
|
||||||
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template<typename _Tp> inline bool
|
template<class T> using Vector = std::vector<T,alignedAllocator<T> >;
|
||||||
operator!=(const alignedAllocator<_Tp>&, const alignedAllocator<_Tp>&){ return false; }
|
template<class T> using commVector = std::vector<T,commAllocator<T> >;
|
||||||
|
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >;
|
||||||
|
|
||||||
}; // namespace Grid
|
}; // namespace Grid
|
||||||
#endif
|
#endif
|
||||||
|
@ -38,6 +38,10 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/cshift/Cshift_mpi.h>
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#include <Grid/cshift/Cshift_mpi.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
#include <Grid/cshift/Cshift_mpi.h> // uses same implementation of communicator
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,6 +3,10 @@ if BUILD_COMMS_MPI
|
|||||||
extra_sources+=communicator/Communicator_mpi.cc
|
extra_sources+=communicator/Communicator_mpi.cc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if BUILD_COMMS_MPI3
|
||||||
|
extra_sources+=communicator/Communicator_mpi3.cc
|
||||||
|
endif
|
||||||
|
|
||||||
if BUILD_COMMS_SHMEM
|
if BUILD_COMMS_SHMEM
|
||||||
extra_sources+=communicator/Communicator_shmem.cc
|
extra_sources+=communicator/Communicator_shmem.cc
|
||||||
endif
|
endif
|
||||||
|
@ -71,7 +71,7 @@
|
|||||||
namespace Grid {
|
namespace Grid {
|
||||||
|
|
||||||
template<class vobj,class cobj,class compressor> void
|
template<class vobj,class cobj,class compressor> void
|
||||||
Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
|
Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,std::vector<std::pair<int,int> >& table)
|
||||||
{
|
{
|
||||||
table.resize(0);
|
table.resize(0);
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
@ -109,7 +109,7 @@ Gather_plane_simple_table_compute (const Lattice<vobj> &rhs,std::vector<cobj,ali
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class cobj,class compressor> void
|
template<class vobj,class cobj,class compressor> void
|
||||||
Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,
|
Gather_plane_simple_table (std::vector<std::pair<int,int> >& table,const Lattice<vobj> &rhs,commVector<cobj> &buffer,
|
||||||
compressor &compress, int off,int so)
|
compressor &compress, int off,int so)
|
||||||
{
|
{
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
@ -119,7 +119,7 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj,class cobj,class compressor> void
|
template<class vobj,class cobj,class compressor> void
|
||||||
Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
|
Gather_plane_simple_stencil (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off,
|
||||||
double &t_table ,double & t_data )
|
double &t_table ,double & t_data )
|
||||||
{
|
{
|
||||||
std::vector<std::pair<int,int> > table;
|
std::vector<std::pair<int,int> > table;
|
||||||
@ -347,10 +347,10 @@ Gather_plane_simple_stencil (const Lattice<vobj> &rhs,std::vector<cobj,alignedAl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Comms buffers
|
// Comms buffers
|
||||||
std::vector<Vector<scalar_object> > u_simd_send_buf;
|
std::vector<commVector<scalar_object> > u_simd_send_buf;
|
||||||
std::vector<Vector<scalar_object> > u_simd_recv_buf;
|
std::vector<commVector<scalar_object> > u_simd_recv_buf;
|
||||||
Vector<cobj> u_send_buf;
|
commVector<cobj> u_send_buf;
|
||||||
Vector<cobj> comm_buf;
|
commVector<cobj> comm_buf;
|
||||||
int u_comm_offset;
|
int u_comm_offset;
|
||||||
int _unified_buffer_size;
|
int _unified_buffer_size;
|
||||||
|
|
||||||
|
@ -34,6 +34,9 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GRID_COMMS_MPI3
|
||||||
|
#include <mpi.h>
|
||||||
|
#endif
|
||||||
#ifdef GRID_COMMS_SHMEM
|
#ifdef GRID_COMMS_SHMEM
|
||||||
#include <mpp/shmem.h>
|
#include <mpp/shmem.h>
|
||||||
#endif
|
#endif
|
||||||
@ -52,6 +55,29 @@ class CartesianCommunicator {
|
|||||||
#ifdef GRID_COMMS_MPI
|
#ifdef GRID_COMMS_MPI
|
||||||
MPI_Comm communicator;
|
MPI_Comm communicator;
|
||||||
typedef MPI_Request CommsRequest_t;
|
typedef MPI_Request CommsRequest_t;
|
||||||
|
#elif GRID_COMMS_MPI3
|
||||||
|
MPI_Comm communicator;
|
||||||
|
typedef MPI_Request CommsRequest_t;
|
||||||
|
|
||||||
|
const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now
|
||||||
|
|
||||||
|
std::vector<int> WorldDims;
|
||||||
|
std::vector<int> GroupDims;
|
||||||
|
std::vector<int> ShmDims;
|
||||||
|
|
||||||
|
std::vector<int> GroupCoor;
|
||||||
|
std::vector<int> ShmCoor;
|
||||||
|
std::vector<int> WorldCoor;
|
||||||
|
|
||||||
|
int GroupRank;
|
||||||
|
int ShmRank;
|
||||||
|
int WorldRank;
|
||||||
|
|
||||||
|
int GroupSize;
|
||||||
|
int ShmSize;
|
||||||
|
int WorldSize;
|
||||||
|
|
||||||
|
std::vector<int> LexicographicToWorldRank;
|
||||||
#else
|
#else
|
||||||
typedef int CommsRequest_t;
|
typedef int CommsRequest_t;
|
||||||
#endif
|
#endif
|
||||||
|
358
lib/communicator/Communicator_mpi3.cc
Normal file
358
lib/communicator/Communicator_mpi3.cc
Normal file
@ -0,0 +1,358 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./lib/communicator/Communicator_mpi.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include "Grid.h"
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
namespace Grid {
|
||||||
|
|
||||||
|
// Global used by Init and nowhere else. How to hide?
|
||||||
|
int Rank(void) {
|
||||||
|
int pe;
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
||||||
|
return pe;
|
||||||
|
}
|
||||||
|
// Should error check all MPI calls.
|
||||||
|
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||||
|
int flag;
|
||||||
|
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||||
|
if ( !flag ) {
|
||||||
|
MPI_Init(argc,argv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
{
|
||||||
|
std::vector<int> coor = _processor_coor;
|
||||||
|
|
||||||
|
assert(std::abs(shift) <_processors[dim]);
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,source,_processors);
|
||||||
|
source = LexicographicToWorldRank[source];
|
||||||
|
|
||||||
|
coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
|
||||||
|
Lexicographic::IndexFromCoor(coor,dest,_processors);
|
||||||
|
dest = LexicographicToWorldRank[dest];
|
||||||
|
}
|
||||||
|
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
int rank;
|
||||||
|
Lexicographic::IndexFromCoor(coor,rank,_processors);
|
||||||
|
rank = LexicographicToWorldRank[rank];
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||||
|
{
|
||||||
|
Lexicographic::CoorFromIndex(coor,rank,_processors);
|
||||||
|
rank = LexicographicToWorldRank[rank];
|
||||||
|
}
|
||||||
|
|
||||||
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||||
|
{
|
||||||
|
_ndimension = processors.size();
|
||||||
|
std::cout << "Creating "<< _ndimension << " dim communicator "<<std::endl;
|
||||||
|
for(int d =0;d<_ndimension;d++){
|
||||||
|
std::cout << processors[d]<<" ";
|
||||||
|
};
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
WorldDims = processors;
|
||||||
|
|
||||||
|
communicator = MPI_COMM_WORLD;
|
||||||
|
MPI_Comm shmcomm;
|
||||||
|
MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&shmcomm);
|
||||||
|
MPI_Comm_rank(communicator,&WorldRank);
|
||||||
|
MPI_Comm_size(communicator,&WorldSize);
|
||||||
|
MPI_Comm_rank(shmcomm ,&ShmRank);
|
||||||
|
MPI_Comm_size(shmcomm ,&ShmSize);
|
||||||
|
GroupSize = WorldSize/ShmSize;
|
||||||
|
|
||||||
|
std::cout<< "Ranks per node "<< ShmSize << std::endl;
|
||||||
|
std::cout<< "Nodes "<< GroupSize << std::endl;
|
||||||
|
std::cout<< "Ranks "<< WorldSize << std::endl;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Assert power of two shm_size.
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int log2size = -1;
|
||||||
|
for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){
|
||||||
|
if ( (0x1<<i) == ShmSize ) {
|
||||||
|
log2size = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(log2size != -1);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Identify subblock of ranks on node spreading across dims
|
||||||
|
// in a maximally symmetrical way
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
int dim = 0;
|
||||||
|
|
||||||
|
ShmDims.resize(_ndimension,1);
|
||||||
|
GroupDims.resize(_ndimension);
|
||||||
|
|
||||||
|
ShmCoor.resize(_ndimension);
|
||||||
|
GroupCoor.resize(_ndimension);
|
||||||
|
WorldCoor.resize(_ndimension);
|
||||||
|
for(int l2=0;l2<log2size;l2++){
|
||||||
|
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
|
||||||
|
ShmDims[dim]*=2;
|
||||||
|
dim=(dim+1)%_ndimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Shm group dims "<<std::endl;
|
||||||
|
for(int d =0;d<_ndimension;d++){
|
||||||
|
std::cout << ShmDims[d]<<" ";
|
||||||
|
};
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish torus of processes and nodes with sub-blockings
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
GroupDims[d] = WorldDims[d]/ShmDims[d];
|
||||||
|
}
|
||||||
|
std::cout << "Group dims "<<std::endl;
|
||||||
|
for(int d =0;d<_ndimension;d++){
|
||||||
|
std::cout << GroupDims[d]<<" ";
|
||||||
|
};
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
MPI_Group WorldGroup, ShmGroup;
|
||||||
|
MPI_Comm_group (communicator, &WorldGroup);
|
||||||
|
MPI_Comm_group (shmcomm, &ShmGroup);
|
||||||
|
|
||||||
|
std::vector<int> world_ranks(WorldSize);
|
||||||
|
std::vector<int> group_ranks(WorldSize);
|
||||||
|
std::vector<int> mygroup(GroupSize);
|
||||||
|
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||||
|
|
||||||
|
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &group_ranks[0]);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Check processor counts match
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
_Nprocessors=1;
|
||||||
|
_processors = processors;
|
||||||
|
_processor_coor.resize(_ndimension);
|
||||||
|
for(int i=0;i<_ndimension;i++){
|
||||||
|
std::cout << " p " << _processors[i]<<std::endl;
|
||||||
|
_Nprocessors*=_processors[i];
|
||||||
|
}
|
||||||
|
std::cout << " World " <<WorldSize <<" Nproc "<<_Nprocessors<<std::endl;
|
||||||
|
assert(WorldSize==_Nprocessors);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Identify who is in my group and noninate the leader
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int g=0;
|
||||||
|
for(int rank=0;rank<WorldSize;rank++){
|
||||||
|
if(group_ranks[rank]!=MPI_UNDEFINED){
|
||||||
|
mygroup[g] = rank;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(mygroup.begin(),mygroup.end(),std::greater<int>());
|
||||||
|
int myleader = mygroup[0];
|
||||||
|
|
||||||
|
std::vector<int> leaders_1hot(WorldSize,0);
|
||||||
|
std::vector<int> leaders_group(GroupSize,0);
|
||||||
|
leaders_1hot [ myleader ] = 1;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// global sum leaders over comm world
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// find the group leaders world rank
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
int group=0;
|
||||||
|
for(int l=0;l<WorldSize;l++){
|
||||||
|
if(leaders_1hot[l]){
|
||||||
|
leaders_group[group++] = l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// Identify the rank of the group in which I (and my leader) live
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
GroupRank=-1;
|
||||||
|
for(int g=0;g<GroupSize;g++){
|
||||||
|
if (myleader == leaders_group[g]){
|
||||||
|
GroupRank=g;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(GroupRank!=-1);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Establish mapping between lexico physics coord and WorldRank
|
||||||
|
//
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
LexicographicToWorldRank.resize(WorldSize,0);
|
||||||
|
Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
|
||||||
|
Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
|
||||||
|
for(int d=0;d<_ndimension;d++){
|
||||||
|
WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
|
||||||
|
}
|
||||||
|
_processor_coor = WorldCoor;
|
||||||
|
|
||||||
|
int lexico;
|
||||||
|
Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
|
||||||
|
LexicographicToWorldRank[lexico]=WorldRank;
|
||||||
|
_processor = lexico;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
// global sum Lexico to World mapping
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(uint64_t &u){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(float &f){
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(float *f,int N)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSum(double &d)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::GlobalSumVector(double *d,int N)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
std::vector<CommsRequest_t> reqs(0);
|
||||||
|
SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
|
||||||
|
SendToRecvFromComplete(reqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||||
|
void *recv,
|
||||||
|
int sender,
|
||||||
|
int receiver,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Status stat;
|
||||||
|
assert(sender != receiver);
|
||||||
|
int tag = sender;
|
||||||
|
if ( _processor == sender ) {
|
||||||
|
MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
|
||||||
|
}
|
||||||
|
if ( _processor == receiver ) {
|
||||||
|
MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic Halo comms primitive
|
||||||
|
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||||
|
void *xmit,
|
||||||
|
int dest,
|
||||||
|
void *recv,
|
||||||
|
int from,
|
||||||
|
int bytes)
|
||||||
|
{
|
||||||
|
MPI_Request xrq;
|
||||||
|
MPI_Request rrq;
|
||||||
|
int rank = _processor;
|
||||||
|
int ierr;
|
||||||
|
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||||
|
ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
|
||||||
|
list.push_back(xrq);
|
||||||
|
list.push_back(rrq);
|
||||||
|
}
|
||||||
|
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||||
|
{
|
||||||
|
int nreq=list.size();
|
||||||
|
std::vector<MPI_Status> status(nreq);
|
||||||
|
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||||
|
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Barrier(void)
|
||||||
|
{
|
||||||
|
int ierr = MPI_Barrier(communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr=MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
communicator);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
|
{
|
||||||
|
int ierr= MPI_Bcast(data,
|
||||||
|
bytes,
|
||||||
|
MPI_BYTE,
|
||||||
|
root,
|
||||||
|
MPI_COMM_WORLD);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -45,7 +45,7 @@ public:
|
|||||||
// Gather for when there is no need to SIMD split with compression
|
// Gather for when there is no need to SIMD split with compression
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
template<class vobj,class cobj,class compressor> void
|
template<class vobj,class cobj,class compressor> void
|
||||||
Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<cobj,alignedAllocator<cobj> > &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
|
Gather_plane_simple (const Lattice<vobj> &rhs,commVector<cobj> &buffer,int dimension,int plane,int cbmask,compressor &compress, int off=0)
|
||||||
{
|
{
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
@ -114,6 +114,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
int o = n*n1;
|
int o = n*n1;
|
||||||
int offset = b+n*n2;
|
int offset = b+n*n2;
|
||||||
cobj temp =compress(rhs._odata[so+o+b]);
|
cobj temp =compress(rhs._odata[so+o+b]);
|
||||||
|
|
||||||
extract<cobj>(temp,pointers,offset);
|
extract<cobj>(temp,pointers,offset);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -121,6 +122,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
} else {
|
} else {
|
||||||
|
|
||||||
assert(0); //Fixme think this is buggy
|
assert(0); //Fixme think this is buggy
|
||||||
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o=n*rhs._grid->_slice_stride[dimension];
|
int o=n*rhs._grid->_slice_stride[dimension];
|
||||||
@ -139,7 +141,7 @@ PARALLEL_NESTED_LOOP2
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Gather for when there is no need to SIMD split
|
// Gather for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
SimpleCompressor<vobj> dontcompress;
|
SimpleCompressor<vobj> dontcompress;
|
||||||
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
|
Gather_plane_simple (rhs,buffer,dimension,plane,cbmask,dontcompress);
|
||||||
@ -157,7 +159,7 @@ template<class vobj> void Gather_plane_extract(const Lattice<vobj> &rhs,std::vec
|
|||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
// Scatter for when there is no need to SIMD split
|
// Scatter for when there is no need to SIMD split
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllocator<vobj> > &buffer, int dimension,int plane,int cbmask)
|
template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vobj> &buffer, int dimension,int plane,int cbmask)
|
||||||
{
|
{
|
||||||
int rd = rhs._grid->_rdimensions[dimension];
|
int rd = rhs._grid->_rdimensions[dimension];
|
||||||
|
|
||||||
|
@ -119,8 +119,8 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
|||||||
assert(shift<fd);
|
assert(shift<fd);
|
||||||
|
|
||||||
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
int buffer_size = rhs._grid->_slice_nblock[dimension]*rhs._grid->_slice_block[dimension];
|
||||||
std::vector<vobj,alignedAllocator<vobj> > send_buf(buffer_size);
|
commVector<vobj> send_buf(buffer_size);
|
||||||
std::vector<vobj,alignedAllocator<vobj> > recv_buf(buffer_size);
|
commVector<vobj> recv_buf(buffer_size);
|
||||||
|
|
||||||
int cb= (cbmask==0x2)? Odd : Even;
|
int cb= (cbmask==0x2)? Odd : Even;
|
||||||
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift= rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
@ -191,8 +191,8 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
|||||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||||
int words = sizeof(vobj)/sizeof(vector_type);
|
int words = sizeof(vobj)/sizeof(vector_type);
|
||||||
|
|
||||||
std::vector<Vector<scalar_object> > send_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > send_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
std::vector<Vector<scalar_object> > recv_buf_extract(Nsimd,Vector<scalar_object>(buffer_size) );
|
std::vector<commVector<scalar_object> > recv_buf_extract(Nsimd,commVector<scalar_object>(buffer_size) );
|
||||||
|
|
||||||
int bytes = buffer_size*sizeof(scalar_object);
|
int bytes = buffer_size*sizeof(scalar_object);
|
||||||
|
|
||||||
|
@ -65,9 +65,6 @@ public:
|
|||||||
|
|
||||||
class LatticeExpressionBase {};
|
class LatticeExpressionBase {};
|
||||||
|
|
||||||
template<class T> using Vector = std::vector<T,alignedAllocator<T> >; // Aligned allocator??
|
|
||||||
template<class T> using Matrix = std::vector<std::vector<T,alignedAllocator<T> > >; // Aligned allocator??
|
|
||||||
|
|
||||||
template <typename Op, typename T1>
|
template <typename Op, typename T1>
|
||||||
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
class LatticeUnaryExpression : public std::pair<Op,std::tuple<T1> > , public LatticeExpressionBase {
|
||||||
public:
|
public:
|
||||||
|
@ -45,7 +45,7 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
|
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
|
commVector<SiteHalfSpinor> &buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out) {
|
int sU, const FermionField &in, FermionField &out) {
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
@ -222,7 +222,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
|
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
|
commVector<SiteHalfSpinor> &buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out) {
|
int sU, const FermionField &in, FermionField &out) {
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
@ -398,7 +398,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptDhopDir(
|
void WilsonKernels<Impl>::DiracOptDhopDir(
|
||||||
StencilImpl &st, DoubledGaugeField &U,
|
StencilImpl &st, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
|
commVector<SiteHalfSpinor> &buf, int sF,
|
||||||
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||||
SiteHalfSpinor tmp;
|
SiteHalfSpinor tmp;
|
||||||
SiteHalfSpinor chi;
|
SiteHalfSpinor chi;
|
||||||
|
@ -58,7 +58,7 @@ namespace Grid {
|
|||||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||||
DiracOptDhopSite(
|
DiracOptDhopSite(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
FermionField &out) {
|
FermionField &out) {
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -89,7 +89,7 @@ namespace Grid {
|
|||||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||||
DiracOptDhopSite(
|
DiracOptDhopSite(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
FermionField &out) {
|
FermionField &out) {
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Ns; site++) {
|
||||||
@ -107,7 +107,7 @@ namespace Grid {
|
|||||||
void>::type
|
void>::type
|
||||||
DiracOptDhopSiteDag(
|
DiracOptDhopSiteDag(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
FermionField &out) {
|
FermionField &out) {
|
||||||
#ifdef AVX512
|
#ifdef AVX512
|
||||||
@ -139,7 +139,7 @@ namespace Grid {
|
|||||||
void>::type
|
void>::type
|
||||||
DiracOptDhopSiteDag(
|
DiracOptDhopSiteDag(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
FermionField &out) {
|
FermionField &out) {
|
||||||
for (int site = 0; site < Ns; site++) {
|
for (int site = 0; site < Ns; site++) {
|
||||||
@ -154,7 +154,7 @@ namespace Grid {
|
|||||||
|
|
||||||
void DiracOptDhopDir(
|
void DiracOptDhopDir(
|
||||||
StencilImpl &st, DoubledGaugeField &U,
|
StencilImpl &st, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
|
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
|
||||||
int gamma);
|
int gamma);
|
||||||
|
|
||||||
@ -162,34 +162,34 @@ namespace Grid {
|
|||||||
// Specialised variants
|
// Specialised variants
|
||||||
void DiracOptGenericDhopSite(
|
void DiracOptGenericDhopSite(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void DiracOptGenericDhopSiteDag(
|
void DiracOptGenericDhopSiteDag(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void DiracOptAsmDhopSite(
|
void DiracOptAsmDhopSite(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
FermionField &out);
|
FermionField &out);
|
||||||
|
|
||||||
void DiracOptAsmDhopSiteDag(
|
void DiracOptAsmDhopSiteDag(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||||
FermionField &out);
|
FermionField &out);
|
||||||
|
|
||||||
void DiracOptHandDhopSite(
|
void DiracOptHandDhopSite(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
void DiracOptHandDhopSiteDag(
|
void DiracOptHandDhopSiteDag(
|
||||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF, int sU, const FermionField &in, FermionField &out);
|
int sF, int sU, const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -40,14 +40,14 @@ namespace Grid {
|
|||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -86,14 +86,14 @@ namespace Grid {
|
|||||||
#undef KERNEL_DAG
|
#undef KERNEL_DAG
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#define KERNEL_DAG
|
#define KERNEL_DAG
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
@ -111,14 +111,14 @@ namespace Grid {
|
|||||||
#undef KERNEL_DAG
|
#undef KERNEL_DAG
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
#define KERNEL_DAG
|
#define KERNEL_DAG
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||||
|
|
||||||
@ -127,10 +127,10 @@ namespace Grid {
|
|||||||
|
|
||||||
#define INSTANTIATE_ASM(A)\
|
#define INSTANTIATE_ASM(A)\
|
||||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
commVector<SiteHalfSpinor> &buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
commVector<SiteHalfSpinor> &buf,\
|
||||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||||
|
|
||||||
|
|
||||||
|
@ -313,7 +313,7 @@ namespace QCD {
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
typedef typename Simd::scalar_type S;
|
typedef typename Simd::scalar_type S;
|
||||||
@ -556,7 +556,7 @@ namespace QCD {
|
|||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int ss,int sU,const FermionField &in, FermionField &out)
|
int ss,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
// std::cout << "Hand op Dhop "<<std::endl;
|
// std::cout << "Hand op Dhop "<<std::endl;
|
||||||
@ -804,7 +804,7 @@ namespace QCD {
|
|||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -812,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,Leb
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -820,7 +820,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -828,7 +828,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Leb
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,
|
commVector<SiteHalfSpinor> &buf,
|
||||||
int sF,int sU,const FermionField &in, FermionField &out)
|
int sF,int sU,const FermionField &in, FermionField &out)
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
@ -841,10 +841,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
|||||||
|
|
||||||
#define INSTANTIATE_THEM(A) \
|
#define INSTANTIATE_THEM(A) \
|
||||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
commVector<SiteHalfSpinor> &buf,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > &buf,\
|
commVector<SiteHalfSpinor> &buf,\
|
||||||
int ss,int sU,const FermionField &in, FermionField &out);
|
int ss,int sU,const FermionField &in, FermionField &out);
|
||||||
|
|
||||||
INSTANTIATE_THEM(WilsonImplF);
|
INSTANTIATE_THEM(WilsonImplF);
|
||||||
|
@ -1,126 +1,233 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/qcd/action/gauge/Photon.h
|
Source file: ./lib/qcd/action/gauge/Photon.h
|
||||||
|
|
||||||
Copyright (C) 2015
|
Copyright (C) 2015
|
||||||
|
|
||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
(at your option) any later version.
|
(at your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License along
|
You should have received a copy of the GNU General Public License along
|
||||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef QCD_PHOTON_ACTION_H
|
#ifndef QCD_PHOTON_ACTION_H
|
||||||
#define QCD_PHOTON_ACTION_H
|
#define QCD_PHOTON_ACTION_H
|
||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
namespace QCD{
|
namespace QCD{
|
||||||
|
|
||||||
|
template<class Gimpl>
|
||||||
|
class Photon
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
enum class Gauge {Feynman, Coulomb, Landau};
|
||||||
|
enum class ZmScheme {QedL, QedTL};
|
||||||
|
public:
|
||||||
|
Photon(Gauge gauge, ZmScheme zmScheme);
|
||||||
|
virtual ~Photon(void) = default;
|
||||||
|
void FreePropagator(const GaugeField &in, GaugeField &out);
|
||||||
|
void MomentumSpacePropagator(const GaugeField &in, GaugeField &out);
|
||||||
|
void StochasticField(GaugeField &out, GridParallelRNG &rng);
|
||||||
|
private:
|
||||||
|
void invKHatSquared(GaugeLinkField &out);
|
||||||
|
void zmSub(GaugeLinkField &out);
|
||||||
|
private:
|
||||||
|
Gauge gauge_;
|
||||||
|
ZmScheme zmScheme_;
|
||||||
|
};
|
||||||
|
|
||||||
template<class Gimpl>
|
template<class Gimpl>
|
||||||
class Photon {
|
Photon<Gimpl>::Photon(Gauge gauge, ZmScheme zmScheme)
|
||||||
|
: gauge_(gauge), zmScheme_(zmScheme)
|
||||||
public:
|
{}
|
||||||
|
|
||||||
INHERIT_GIMPL_TYPES(Gimpl);
|
template<class Gimpl>
|
||||||
|
void Photon<Gimpl>::FreePropagator (const GaugeField &in,GaugeField &out)
|
||||||
enum PhotonType { FEYNMAN_L, FEYNMAN_TL };
|
{
|
||||||
|
FFT theFFT(in._grid);
|
||||||
PhotonType GaugeBC;
|
|
||||||
|
|
||||||
Photon(PhotonType _GaugeBC) : GaugeBC(_GaugeBC){};
|
|
||||||
|
|
||||||
void FreePropagator (const GaugeField &in,GaugeField &out) {
|
|
||||||
FFT theFFT((GridCartesian *) in._grid);
|
|
||||||
|
|
||||||
GaugeField in_k(in._grid);
|
|
||||||
GaugeField prop_k(in._grid);
|
|
||||||
|
|
||||||
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
|
||||||
MomentumSpacePropagator(prop_k,in_k);
|
|
||||||
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
|
||||||
}
|
|
||||||
void MomentumSpacePropagator(GaugeField &out,const GaugeField &in) {
|
|
||||||
if ( GaugeBC == FEYNMAN_L ) {
|
|
||||||
FeynmanGaugeMomentumSpacePropagator_L(out,in);
|
|
||||||
} else if ( GaugeBC == FEYNMAN_TL ) {
|
|
||||||
FeynmanGaugeMomentumSpacePropagator_TL(out,in);
|
|
||||||
} else { // No coulomb yet
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out, const GaugeField &in) {
|
|
||||||
|
|
||||||
FeynmanGaugeMomentumSpacePropagator_TL(out,in);
|
|
||||||
|
|
||||||
GridBase *grid = out._grid;
|
|
||||||
LatticeInteger coor(grid);
|
|
||||||
GaugeField zz(grid); zz=zero;
|
|
||||||
|
|
||||||
// xyzt
|
|
||||||
for(int d = 0; d < grid->_ndimension-1;d++){
|
|
||||||
LatticeCoordinate(coor,d);
|
|
||||||
out = where(coor==Integer(0),zz,out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out, const GaugeField &in) {
|
|
||||||
|
|
||||||
// what type LatticeComplex
|
|
||||||
GridBase *grid = out._grid;
|
|
||||||
int nd = grid->_ndimension;
|
|
||||||
|
|
||||||
typedef typename GaugeField::vector_type vector_type;
|
|
||||||
typedef typename GaugeField::scalar_type ScalComplex;
|
|
||||||
typedef Lattice<iSinglet<vector_type> > LatComplex;
|
|
||||||
|
|
||||||
std::vector<int> latt_size = grid->_fdimensions;
|
GaugeField in_k(in._grid);
|
||||||
|
GaugeField prop_k(in._grid);
|
||||||
LatComplex denom(grid); denom= zero;
|
|
||||||
LatComplex one(grid); one = ScalComplex(1.0,0.0);
|
theFFT.FFT_all_dim(in_k,in,FFT::forward);
|
||||||
LatComplex kmu(grid);
|
MomentumSpacePropagator(prop_k,in_k);
|
||||||
|
theFFT.FFT_all_dim(out,prop_k,FFT::backward);
|
||||||
ScalComplex ci(0.0,1.0);
|
}
|
||||||
// momphase = n * 2pi / L
|
|
||||||
for(int mu=0;mu<Nd;mu++) {
|
template<class Gimpl>
|
||||||
|
void Photon<Gimpl>::invKHatSquared(GaugeLinkField &out)
|
||||||
LatticeCoordinate(kmu,mu);
|
{
|
||||||
|
GridBase *grid = out._grid;
|
||||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
GaugeLinkField kmu(grid), one(grid);
|
||||||
|
const unsigned int nd = grid->_ndimension;
|
||||||
kmu = TwoPiL * kmu ;
|
std::vector<int> &l = grid->_fdimensions;
|
||||||
|
std::vector<int> zm(nd,0);
|
||||||
denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
TComplexD Tone = ComplexD(1.0,0.0);
|
||||||
}
|
TComplexD Tzero= ComplexD(0.0,0.0);
|
||||||
std::vector<int> zero_mode(nd,0);
|
|
||||||
TComplexD Tone = ComplexD(1.0,0.0);
|
one = ComplexD(1.0,0.0);
|
||||||
TComplexD Tzero= ComplexD(0.0,0.0);
|
out = zero;
|
||||||
|
for(int mu = 0; mu < nd; mu++)
|
||||||
pokeSite(Tone,denom,zero_mode);
|
{
|
||||||
|
RealD twoPiL = M_PI*2./l[mu];
|
||||||
denom= one/denom;
|
|
||||||
|
LatticeCoordinate(kmu,mu);
|
||||||
pokeSite(Tzero,denom,zero_mode);
|
kmu = 2.*sin(.5*twoPiL*kmu);
|
||||||
|
out = out + kmu*kmu;
|
||||||
out = zero;
|
}
|
||||||
out = in*denom;
|
pokeSite(Tone, out, zm);
|
||||||
};
|
out = one/out;
|
||||||
|
pokeSite(Tzero, out,zm);
|
||||||
};
|
}
|
||||||
|
|
||||||
|
template<class Gimpl>
|
||||||
|
void Photon<Gimpl>::zmSub(GaugeLinkField &out)
|
||||||
|
{
|
||||||
|
GridBase *grid = out._grid;
|
||||||
|
const unsigned int nd = grid->_ndimension;
|
||||||
|
|
||||||
|
switch (zmScheme_)
|
||||||
|
{
|
||||||
|
case ZmScheme::QedTL:
|
||||||
|
{
|
||||||
|
std::vector<int> zm(nd,0);
|
||||||
|
TComplexD Tzero = ComplexD(0.0,0.0);
|
||||||
|
|
||||||
|
pokeSite(Tzero, out, zm);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case ZmScheme::QedL:
|
||||||
|
{
|
||||||
|
LatticeInteger spNrm(grid), coor(grid);
|
||||||
|
GaugeLinkField z(grid);
|
||||||
|
|
||||||
|
spNrm = zero;
|
||||||
|
for(int d = 0; d < grid->_ndimension - 1; d++)
|
||||||
|
{
|
||||||
|
LatticeCoordinate(coor,d);
|
||||||
|
spNrm = spNrm + coor*coor;
|
||||||
|
}
|
||||||
|
out = where(spNrm == 0, 0.*out, out);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Gimpl>
|
||||||
|
void Photon<Gimpl>::MomentumSpacePropagator(const GaugeField &in,
|
||||||
|
GaugeField &out)
|
||||||
|
{
|
||||||
|
GridBase *grid = out._grid;
|
||||||
|
LatticeComplex k2Inv(grid);
|
||||||
|
|
||||||
|
invKHatSquared(k2Inv);
|
||||||
|
zmSub(k2Inv);
|
||||||
|
|
||||||
|
out = in*k2Inv;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Gimpl>
|
||||||
|
void Photon<Gimpl>::StochasticField(GaugeField &out, GridParallelRNG &rng)
|
||||||
|
{
|
||||||
|
auto *grid = out._grid;
|
||||||
|
const unsigned int nd = grid->_ndimension;
|
||||||
|
GaugeLinkField sqrtK2Inv(grid), r(grid);
|
||||||
|
GaugeField aTilde(grid);
|
||||||
|
FFT fft(grid);
|
||||||
|
|
||||||
|
invKHatSquared(sqrtK2Inv);
|
||||||
|
sqrtK2Inv = sqrt(real(sqrtK2Inv));
|
||||||
|
zmSub(sqrtK2Inv);
|
||||||
|
for(int mu = 0; mu < nd; mu++)
|
||||||
|
{
|
||||||
|
gaussian(rng, r);
|
||||||
|
r = sqrtK2Inv*r;
|
||||||
|
pokeLorentz(aTilde, r, mu);
|
||||||
|
}
|
||||||
|
fft.FFT_all_dim(out, aTilde, FFT::backward);
|
||||||
|
}
|
||||||
|
// template<class Gimpl>
|
||||||
|
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_L(GaugeField &out,
|
||||||
|
// const GaugeField &in)
|
||||||
|
// {
|
||||||
|
//
|
||||||
|
// FeynmanGaugeMomentumSpacePropagator_TL(out,in);
|
||||||
|
//
|
||||||
|
// GridBase *grid = out._grid;
|
||||||
|
// LatticeInteger coor(grid);
|
||||||
|
// GaugeField zz(grid); zz=zero;
|
||||||
|
//
|
||||||
|
// // xyzt
|
||||||
|
// for(int d = 0; d < grid->_ndimension-1;d++){
|
||||||
|
// LatticeCoordinate(coor,d);
|
||||||
|
// out = where(coor==Integer(0),zz,out);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// template<class Gimpl>
|
||||||
|
// void Photon<Gimpl>::FeynmanGaugeMomentumSpacePropagator_TL(GaugeField &out,
|
||||||
|
// const GaugeField &in)
|
||||||
|
// {
|
||||||
|
//
|
||||||
|
// // what type LatticeComplex
|
||||||
|
// GridBase *grid = out._grid;
|
||||||
|
// int nd = grid->_ndimension;
|
||||||
|
//
|
||||||
|
// typedef typename GaugeField::vector_type vector_type;
|
||||||
|
// typedef typename GaugeField::scalar_type ScalComplex;
|
||||||
|
// typedef Lattice<iSinglet<vector_type> > LatComplex;
|
||||||
|
//
|
||||||
|
// std::vector<int> latt_size = grid->_fdimensions;
|
||||||
|
//
|
||||||
|
// LatComplex denom(grid); denom= zero;
|
||||||
|
// LatComplex one(grid); one = ScalComplex(1.0,0.0);
|
||||||
|
// LatComplex kmu(grid);
|
||||||
|
//
|
||||||
|
// ScalComplex ci(0.0,1.0);
|
||||||
|
// // momphase = n * 2pi / L
|
||||||
|
// for(int mu=0;mu<Nd;mu++) {
|
||||||
|
//
|
||||||
|
// LatticeCoordinate(kmu,mu);
|
||||||
|
//
|
||||||
|
// RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
//
|
||||||
|
// kmu = TwoPiL * kmu ;
|
||||||
|
//
|
||||||
|
// denom = denom + 4.0*sin(kmu*0.5)*sin(kmu*0.5); // Wilson term
|
||||||
|
// }
|
||||||
|
// std::vector<int> zero_mode(nd,0);
|
||||||
|
// TComplexD Tone = ComplexD(1.0,0.0);
|
||||||
|
// TComplexD Tzero= ComplexD(0.0,0.0);
|
||||||
|
//
|
||||||
|
// pokeSite(Tone,denom,zero_mode);
|
||||||
|
//
|
||||||
|
// denom= one/denom;
|
||||||
|
//
|
||||||
|
// pokeSite(Tzero,denom,zero_mode);
|
||||||
|
//
|
||||||
|
// out = zero;
|
||||||
|
// out = in*denom;
|
||||||
|
// };
|
||||||
|
|
||||||
}}
|
}}
|
||||||
#endif
|
#endif
|
||||||
|
@ -41,6 +41,22 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
namespace Grid{
|
namespace Grid{
|
||||||
namespace Optimization {
|
namespace Optimization {
|
||||||
|
|
||||||
|
template<class vtype>
|
||||||
|
union uconv {
|
||||||
|
__m512 f;
|
||||||
|
vtype v;
|
||||||
|
};
|
||||||
|
|
||||||
|
union u512f {
|
||||||
|
__m512 v;
|
||||||
|
float f[8];
|
||||||
|
};
|
||||||
|
|
||||||
|
union u512d {
|
||||||
|
__m512 v;
|
||||||
|
double f[4];
|
||||||
|
};
|
||||||
|
|
||||||
struct Vsplat{
|
struct Vsplat{
|
||||||
//Complex float
|
//Complex float
|
||||||
@ -372,8 +388,9 @@ namespace Optimization {
|
|||||||
// Some Template specialization
|
// Some Template specialization
|
||||||
|
|
||||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||||
#undef GNU_CLANG_COMPILER
|
|
||||||
#ifdef GNU_CLANG_COMPILER
|
#ifndef __INTEL_COMPILER
|
||||||
|
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||||
//Complex float Reduce
|
//Complex float Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||||
|
@ -250,8 +250,7 @@ void merge(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> inline
|
template<class vobj> inline void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
|
||||||
void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
|
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_type scalar_type ;
|
typedef typename vobj::scalar_type scalar_type ;
|
||||||
typedef typename vobj::vector_type vector_type ;
|
typedef typename vobj::vector_type vector_type ;
|
||||||
@ -269,8 +268,7 @@ void merge1(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int
|
|||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> inline
|
template<class vobj> inline void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
|
||||||
void merge2(vobj &vec,std::vector<typename vobj::scalar_object *> &extracted,int offset)
|
|
||||||
{
|
{
|
||||||
typedef typename vobj::scalar_type scalar_type ;
|
typedef typename vobj::scalar_type scalar_type ;
|
||||||
typedef typename vobj::vector_type vector_type ;
|
typedef typename vobj::vector_type vector_type ;
|
||||||
|
Loading…
Reference in New Issue
Block a user