mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 03:05:55 +01:00
Merge branch 'feature/mpi3' into develop
This commit is contained in:
commit
b1508e4124
@ -153,9 +153,10 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "norm result "<< norm2(result)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert (norm2(err)< 1.0e-5 );
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
@ -192,7 +193,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage << "Called Dw s_inner "<<ncall<<" times in "<<t1-t0<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
sDw.Report();
|
||||
|
||||
if(0){
|
||||
@ -208,8 +209,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout<<GridLogMessage<< "res norms "<< norm2(result)<<" " <<norm2(sresult)<<std::endl;
|
||||
|
||||
|
||||
RealF sum=0;
|
||||
RealD sum=0;
|
||||
for(int x=0;x<latt4[0];x++){
|
||||
for(int y=0;y<latt4[1];y++){
|
||||
for(int z=0;z<latt4[2];z++){
|
||||
@ -221,18 +221,18 @@ int main (int argc, char ** argv)
|
||||
peekSite(simd,sresult,site);
|
||||
sum=sum+norm2(normal-simd);
|
||||
if (norm2(normal-simd) > 1.0e-6 ) {
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" "<<norm2(normal-simd)<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" normal "<<normal<<std::endl;
|
||||
std::cout << "site "<<x<<","<<y<<","<<z<<","<<t<<","<<s<<" simd "<<simd<<std::endl;
|
||||
}
|
||||
}}}}}
|
||||
std::cout<<GridLogMessage<<" difference between normal and simd is "<<sum<<std::endl;
|
||||
assert (sum< 1.0e-5 );
|
||||
|
||||
|
||||
if (1) {
|
||||
|
||||
LatticeFermion sr_eo(sFGrid);
|
||||
LatticeFermion serr(sFGrid);
|
||||
|
||||
LatticeFermion ssrc_e (sFrbGrid);
|
||||
LatticeFermion ssrc_o (sFrbGrid);
|
||||
@ -244,8 +244,6 @@ int main (int argc, char ** argv)
|
||||
|
||||
setCheckerboard(sr_eo,ssrc_o);
|
||||
setCheckerboard(sr_eo,ssrc_e);
|
||||
serr = sr_eo-ssrc;
|
||||
std::cout<<GridLogMessage << "EO src norm diff "<< norm2(serr)<<std::endl;
|
||||
|
||||
sr_e = zero;
|
||||
sr_o = zero;
|
||||
@ -263,7 +261,7 @@ int main (int argc, char ** argv)
|
||||
double flops=(1344.0*volume*ncall)/2;
|
||||
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "sDeo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
sDw.Report();
|
||||
|
||||
sDw.DhopEO(ssrc_o,sr_e,DaggerNo);
|
||||
@ -273,9 +271,18 @@ int main (int argc, char ** argv)
|
||||
pickCheckerboard(Even,ssrc_e,sresult);
|
||||
pickCheckerboard(Odd ,ssrc_o,sresult);
|
||||
ssrc_e = ssrc_e - sr_e;
|
||||
RealD error = norm2(ssrc_e);
|
||||
|
||||
std::cout<<GridLogMessage << "sE norm diff "<< norm2(ssrc_e)<< " vec nrm"<<norm2(sr_e) <<std::endl;
|
||||
ssrc_o = ssrc_o - sr_o;
|
||||
|
||||
error+= norm2(ssrc_o);
|
||||
std::cout<<GridLogMessage << "sO norm diff "<< norm2(ssrc_o)<< " vec nrm"<<norm2(sr_o) <<std::endl;
|
||||
if(error>1.0e-5) {
|
||||
setCheckerboard(ssrc,ssrc_o);
|
||||
setCheckerboard(ssrc,ssrc_e);
|
||||
std::cout<< ssrc << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -307,7 +314,7 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "norm ref "<< norm2(ref)<<std::endl;
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
assert(norm2(err)<1.0e-5);
|
||||
LatticeFermion src_e (FrbGrid);
|
||||
LatticeFermion src_o (FrbGrid);
|
||||
LatticeFermion r_e (FrbGrid);
|
||||
@ -334,7 +341,7 @@ int main (int argc, char ** argv)
|
||||
double flops=(1344.0*volume*ncall)/2;
|
||||
|
||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
@ -350,11 +357,14 @@ int main (int argc, char ** argv)
|
||||
|
||||
err = r_eo-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
assert(norm2(err)<1.0e-5);
|
||||
|
||||
pickCheckerboard(Even,src_e,err);
|
||||
pickCheckerboard(Odd,src_o,err);
|
||||
std::cout<<GridLogMessage << "norm diff even "<< norm2(src_e)<<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff odd "<< norm2(src_o)<<std::endl;
|
||||
assert(norm2(src_e)<1.0e-5);
|
||||
assert(norm2(src_o)<1.0e-5);
|
||||
|
||||
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ public:
|
||||
|
||||
if ( bcast != ptr ) {
|
||||
std::printf("inconsistent alloc pe %d %lx %lx \n",shmem_my_pe(),bcast,ptr);std::fflush(stdout);
|
||||
BACKTRACEFILE();
|
||||
// BACKTRACEFILE();
|
||||
exit(0);
|
||||
}
|
||||
assert( bcast == (void *) ptr);
|
||||
@ -155,15 +155,6 @@ public:
|
||||
void deallocate(pointer __p, size_type) {
|
||||
shmem_free((void *)__p);
|
||||
}
|
||||
#elif defined(GRID_COMMS_MPI3)
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
#error "implement MPI3 windowed allocate"
|
||||
|
||||
}
|
||||
void deallocate(pointer __p, size_type) {
|
||||
#error "implement MPI3 windowed allocate"
|
||||
}
|
||||
#else
|
||||
pointer allocate(size_type __n, const void* _p= 0)
|
||||
{
|
||||
|
16
lib/Init.cc
16
lib/Init.cc
@ -171,14 +171,17 @@ std::string GridCmdVectorIntToString(const std::vector<int> & vec){
|
||||
/////////////////////////////////////////////////////////
|
||||
//
|
||||
/////////////////////////////////////////////////////////
|
||||
static int Grid_is_initialised = 0;
|
||||
|
||||
|
||||
void Grid_init(int *argc,char ***argv)
|
||||
{
|
||||
GridLogger::StopWatch.Start();
|
||||
|
||||
CartesianCommunicator::Init(argc,argv);
|
||||
|
||||
// Parse command line args.
|
||||
|
||||
GridLogger::StopWatch.Start();
|
||||
|
||||
std::string arg;
|
||||
std::vector<std::string> logstreams;
|
||||
std::string defaultLog("Error,Warning,Message,Performance");
|
||||
@ -216,11 +219,14 @@ void Grid_init(int *argc,char ***argv)
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--lebesgue") ){
|
||||
LebesgueOrder::UseLebesgueOrder=1;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--cacheblocking") ){
|
||||
arg= GridCmdOptionPayload(*argv,*argv+*argc,"--cacheblocking");
|
||||
GridCmdOptionIntVector(arg,LebesgueOrder::Block);
|
||||
}
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--timestamp") ){
|
||||
GridLogTimestamp(1);
|
||||
}
|
||||
|
||||
GridParseLayout(*argv,*argc,
|
||||
Grid_default_latt,
|
||||
Grid_default_mpi);
|
||||
@ -274,12 +280,14 @@ void Grid_init(int *argc,char ***argv)
|
||||
std::cout << "GNU General Public License for more details."<<std::endl;
|
||||
std::cout << COL_BACKGROUND <<std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
Grid_is_initialised = 1;
|
||||
}
|
||||
|
||||
|
||||
void Grid_finalize(void)
|
||||
{
|
||||
#ifdef GRID_COMMS_MPI
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||
MPI_Finalize();
|
||||
Grid_unquiesce_nodes();
|
||||
#endif
|
||||
|
@ -33,6 +33,7 @@ namespace Grid {
|
||||
|
||||
void Grid_init(int *argc,char ***argv);
|
||||
void Grid_finalize(void);
|
||||
|
||||
// internal, controled with --handle
|
||||
void Grid_sa_signal_handler(int sig,siginfo_t *si,void * ptr);
|
||||
void Grid_debug_handler_init(void);
|
||||
@ -44,6 +45,7 @@ namespace Grid {
|
||||
const std::vector<int> &GridDefaultMpi(void);
|
||||
const int &GridThreads(void) ;
|
||||
void GridSetThreads(int t) ;
|
||||
void GridLogTimestamp(int);
|
||||
|
||||
// Common parsing chores
|
||||
std::string GridCmdOptionPayload(char ** begin, char ** end, const std::string & option);
|
||||
|
@ -34,8 +34,13 @@ directory
|
||||
namespace Grid {
|
||||
|
||||
GridStopWatch Logger::StopWatch;
|
||||
int Logger::timestamp;
|
||||
std::ostream Logger::devnull(0);
|
||||
|
||||
void GridLogTimestamp(int on){
|
||||
Logger::Timestamp(on);
|
||||
}
|
||||
|
||||
Colours GridLogColours(0);
|
||||
GridLogger GridLogError(1, "Error", GridLogColours, "RED");
|
||||
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
|
||||
@ -73,7 +78,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||
////////////////////////////////////////////////////////////
|
||||
void Grid_quiesce_nodes(void) {
|
||||
int me = 0;
|
||||
#ifdef GRID_COMMS_MPI
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3)
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
#endif
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
|
59
lib/Log.h
59
lib/Log.h
@ -37,10 +37,11 @@
|
||||
#include <execinfo.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dress the output; use std::chrono for time stamping via the StopWatch class
|
||||
int Rank(void); // used for early stage debug before library init
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
class Colours{
|
||||
@ -55,7 +56,6 @@ public:
|
||||
|
||||
void Active(bool activate){
|
||||
is_active=activate;
|
||||
|
||||
if (is_active){
|
||||
colour["BLACK"] ="\033[30m";
|
||||
colour["RED"] ="\033[31m";
|
||||
@ -66,21 +66,18 @@ public:
|
||||
colour["CYAN"] ="\033[36m";
|
||||
colour["WHITE"] ="\033[37m";
|
||||
colour["NORMAL"] ="\033[0;39m";
|
||||
} else {
|
||||
colour["BLACK"] ="";
|
||||
colour["RED"] ="";
|
||||
colour["GREEN"] ="";
|
||||
colour["YELLOW"]="";
|
||||
colour["BLUE"] ="";
|
||||
colour["PURPLE"]="";
|
||||
colour["CYAN"] ="";
|
||||
colour["WHITE"] ="";
|
||||
colour["NORMAL"]="";
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
} else {
|
||||
colour["BLACK"] ="";
|
||||
colour["RED"] ="";
|
||||
colour["GREEN"] ="";
|
||||
colour["YELLOW"]="";
|
||||
colour["BLUE"] ="";
|
||||
colour["PURPLE"]="";
|
||||
colour["CYAN"] ="";
|
||||
colour["WHITE"] ="";
|
||||
colour["NORMAL"]="";
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -88,6 +85,7 @@ class Logger {
|
||||
protected:
|
||||
Colours &Painter;
|
||||
int active;
|
||||
static int timestamp;
|
||||
std::string name, topName;
|
||||
std::string COLOUR;
|
||||
|
||||
@ -99,25 +97,28 @@ public:
|
||||
std::string evidence() {return Painter.colour["YELLOW"];}
|
||||
std::string colour() {return Painter.colour[COLOUR];}
|
||||
|
||||
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col)
|
||||
: active(on),
|
||||
name(nm),
|
||||
topName(topNm),
|
||||
Painter(col_class),
|
||||
COLOUR(col){} ;
|
||||
Logger(std::string topNm, int on, std::string nm, Colours& col_class, std::string col) : active(on),
|
||||
name(nm),
|
||||
topName(topNm),
|
||||
Painter(col_class),
|
||||
COLOUR(col) {} ;
|
||||
|
||||
void Active(int on) {active = on;};
|
||||
int isActive(void) {return active;};
|
||||
static void Timestamp(int on) {timestamp = on;};
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& stream, Logger& log){
|
||||
|
||||
if ( log.active ) {
|
||||
StopWatch.Stop();
|
||||
GridTime now = StopWatch.Elapsed();
|
||||
StopWatch.Start();
|
||||
stream << log.background()<< log.topName << log.background()<< " : ";
|
||||
stream << log.colour() <<std::setw(14) << std::left << log.name << log.background() << " : ";
|
||||
stream << log.evidence()<< now << log.background() << " : " << log.colour();
|
||||
if ( log.timestamp ) {
|
||||
StopWatch.Stop();
|
||||
GridTime now = StopWatch.Elapsed();
|
||||
StopWatch.Start();
|
||||
stream << log.evidence()<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
return stream;
|
||||
} else {
|
||||
return devnull;
|
||||
@ -149,7 +150,7 @@ extern void * Grid_backtrace_buffer[_NBACKTRACE];
|
||||
|
||||
#define BACKTRACEFILE() {\
|
||||
char string[20]; \
|
||||
std::sprintf(string,"backtrace.%d",Rank()); \
|
||||
std::sprintf(string,"backtrace.%d",CartesianCommunicator::RankWorld()); \
|
||||
std::FILE * fp = std::fopen(string,"w"); \
|
||||
BACKTRACEFP(fp)\
|
||||
std::fclose(fp); \
|
||||
|
@ -1,18 +1,22 @@
|
||||
extra_sources=
|
||||
if BUILD_COMMS_MPI
|
||||
extra_sources+=communicator/Communicator_mpi.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_MPI3
|
||||
extra_sources+=communicator/Communicator_mpi3.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_SHMEM
|
||||
extra_sources+=communicator/Communicator_shmem.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
if BUILD_COMMS_NONE
|
||||
extra_sources+=communicator/Communicator_none.cc
|
||||
extra_sources+=communicator/Communicator_base.cc
|
||||
endif
|
||||
|
||||
#
|
||||
|
1677
lib/Stencil.h
1677
lib/Stencil.h
File diff suppressed because it is too large
Load Diff
@ -127,6 +127,22 @@ class GridThread {
|
||||
ThreadBarrier();
|
||||
};
|
||||
|
||||
static void bcopy(const void *src, void *dst, size_t len) {
|
||||
#ifdef GRID_OMP
|
||||
#pragma omp parallel
|
||||
{
|
||||
const char *c_src =(char *) src;
|
||||
char *c_dest=(char *) dst;
|
||||
int me,mywork,myoff;
|
||||
GridThread::GetWorkBarrier(len,me, mywork,myoff);
|
||||
bcopy(&c_src[myoff],&c_dest[myoff],mywork);
|
||||
}
|
||||
#else
|
||||
bcopy(src,dst,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
132
lib/communicator/Communicator_base.cc
Normal file
132
lib/communicator/Communicator_base.cc
Normal file
@ -0,0 +1,132 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/communicator/Communicator_none.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include "Grid.h"
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmRank;
|
||||
int CartesianCommunicator::ShmSize;
|
||||
int CartesianCommunicator::GroupRank;
|
||||
int CartesianCommunicator::GroupSize;
|
||||
int CartesianCommunicator::WorldRank;
|
||||
int CartesianCommunicator::WorldSize;
|
||||
int CartesianCommunicator::Slave;
|
||||
void * CartesianCommunicator::ShmCommBuf;
|
||||
|
||||
/////////////////////////////////
|
||||
// Alloc, free shmem region
|
||||
/////////////////////////////////
|
||||
void *CartesianCommunicator::ShmBufferMalloc(size_t bytes){
|
||||
// bytes = (bytes+sizeof(vRealD))&(~(sizeof(vRealD)-1));// align up bytes
|
||||
void *ptr = (void *)heap_top;
|
||||
heap_top += bytes;
|
||||
heap_bytes+= bytes;
|
||||
std::cout <<"Shm alloc "<<ptr<<std::endl;
|
||||
assert(heap_bytes < MAX_MPI_SHM_BYTES);
|
||||
return ptr;
|
||||
}
|
||||
void CartesianCommunicator::ShmBufferFreeAll(void) {
|
||||
heap_top =(size_t)ShmBufferSelf();
|
||||
heap_bytes=0;
|
||||
}
|
||||
|
||||
/////////////////////////////////
|
||||
// Grid information queries
|
||||
/////////////////////////////////
|
||||
int CartesianCommunicator::IsBoss(void) { return _processor==0; };
|
||||
int CartesianCommunicator::BossRank(void) { return 0; };
|
||||
int CartesianCommunicator::ThisRank(void) { return _processor; };
|
||||
const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return _processor_coor; };
|
||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
|
||||
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::RankWorld(void){ return WorldRank; };
|
||||
int CartesianCommunicator::Ranks (void) { return WorldSize; };
|
||||
int CartesianCommunicator::Nodes (void) { return GroupSize; };
|
||||
int CartesianCommunicator::Cores (void) { return ShmSize; };
|
||||
int CartesianCommunicator::NodeRank (void) { return GroupRank; };
|
||||
int CartesianCommunicator::CoreRank (void) { return ShmRank; };
|
||||
|
||||
void CartesianCommunicator::GlobalSum(ComplexF &c)
|
||||
{
|
||||
GlobalSumVector((float *)&c,2);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(ComplexF *c,int N)
|
||||
{
|
||||
GlobalSumVector((float *)c,2*N);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSum(ComplexD &c)
|
||||
{
|
||||
GlobalSumVector((double *)&c,2);
|
||||
}
|
||||
void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||
{
|
||||
GlobalSumVector((double *)c,2*N);
|
||||
}
|
||||
|
||||
#ifndef GRID_COMMS_MPI3
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall)
|
||||
{
|
||||
SendToRecvFromComplete(waitall);
|
||||
}
|
||||
void CartesianCommunicator::StencilBarrier(void){};
|
||||
|
||||
commVector<uint8_t> CartesianCommunicator::ShmBufStorageVector;
|
||||
|
||||
void *CartesianCommunicator::ShmBufferSelf(void) { return ShmCommBuf; }
|
||||
|
||||
void *CartesianCommunicator::ShmBuffer(int rank) {
|
||||
return NULL;
|
||||
}
|
||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||
return NULL;
|
||||
}
|
||||
void CartesianCommunicator::ShmInitGeneric(void){
|
||||
ShmBufStorageVector.resize(MAX_MPI_SHM_BYTES);
|
||||
ShmCommBuf=(void *)&ShmBufStorageVector[0];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -40,143 +40,188 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
#include <mpp/shmem.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
class CartesianCommunicator {
|
||||
public:
|
||||
|
||||
// 65536 ranks per node adequate for now
|
||||
// 128MB shared memory for comms enought for 48^4 local vol comms
|
||||
// Give external control (command line override?) of this
|
||||
|
||||
static const int MAXLOG2RANKSPERNODE = 16;
|
||||
static const uint64_t MAX_MPI_SHM_BYTES = 128*1024*1024;
|
||||
|
||||
// Communicator should know nothing of the physics grid, only processor grid.
|
||||
int _Nprocessors; // How many in all
|
||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||
int _processor; // linear processor rank
|
||||
std::vector<int> _processor_coor; // linear processor coordinate
|
||||
unsigned long _ndimension;
|
||||
|
||||
int _Nprocessors; // How many in all
|
||||
std::vector<int> _processors; // Which dimensions get relayed out over processors lanes.
|
||||
int _processor; // linear processor rank
|
||||
std::vector<int> _processor_coor; // linear processor coordinate
|
||||
unsigned long _ndimension;
|
||||
|
||||
#ifdef GRID_COMMS_MPI
|
||||
MPI_Comm communicator;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#elif GRID_COMMS_MPI3
|
||||
MPI_Comm communicator;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
|
||||
const int MAXLOG2RANKSPERNODE = 16; // 65536 ranks per node adequate for now
|
||||
|
||||
std::vector<int> WorldDims;
|
||||
std::vector<int> GroupDims;
|
||||
std::vector<int> ShmDims;
|
||||
|
||||
std::vector<int> GroupCoor;
|
||||
std::vector<int> ShmCoor;
|
||||
std::vector<int> WorldCoor;
|
||||
|
||||
int GroupRank;
|
||||
int ShmRank;
|
||||
int WorldRank;
|
||||
|
||||
int GroupSize;
|
||||
int ShmSize;
|
||||
int WorldSize;
|
||||
|
||||
std::vector<int> LexicographicToWorldRank;
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||
MPI_Comm communicator;
|
||||
static MPI_Comm communicator_world;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#else
|
||||
typedef int CommsRequest_t;
|
||||
typedef int CommsRequest_t;
|
||||
#endif
|
||||
|
||||
static void Init(int *argc, char ***argv);
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Helper functionality for SHM Windows common to all other impls
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// Longer term; drop this in favour of a master / slave model with
|
||||
// cartesian communicator on a subset of ranks, slave ranks controlled
|
||||
// by group leader with data xfer via shared memory
|
||||
////////////////////////////////////////////////////////////////////
|
||||
#ifdef GRID_COMMS_MPI3
|
||||
std::vector<int> WorldDims;
|
||||
std::vector<int> GroupDims;
|
||||
std::vector<int> ShmDims;
|
||||
|
||||
// Constructor
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
std::vector<int> GroupCoor;
|
||||
std::vector<int> ShmCoor;
|
||||
std::vector<int> WorldCoor;
|
||||
|
||||
// Wraps MPI_Cart routines
|
||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||
static std::vector<int> GroupRanks;
|
||||
static std::vector<int> MyGroup;
|
||||
static int ShmSetup;
|
||||
static MPI_Win ShmWindow;
|
||||
static MPI_Comm ShmComm;
|
||||
|
||||
/////////////////////////////////
|
||||
// Grid information queries
|
||||
/////////////////////////////////
|
||||
int IsBoss(void) { return _processor==0; };
|
||||
int BossRank(void) { return 0; };
|
||||
int ThisRank(void) { return _processor; };
|
||||
const std::vector<int> & ThisProcessorCoor(void) { return _processor_coor; };
|
||||
const std::vector<int> & ProcessorGrid(void) { return _processors; };
|
||||
int ProcessorCount(void) { return _Nprocessors; };
|
||||
std::vector<int> LexicographicToWorldRank;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
////////////////////////////////////////////////////////////
|
||||
void GlobalSum(RealF &);
|
||||
void GlobalSumVector(RealF *,int N);
|
||||
static std::vector<void *> ShmCommBufs;
|
||||
#else
|
||||
static void ShmInitGeneric(void);
|
||||
static commVector<uint8_t> ShmBufStorageVector;
|
||||
#endif
|
||||
static void * ShmCommBuf;
|
||||
size_t heap_top;
|
||||
size_t heap_bytes;
|
||||
void *ShmBufferSelf(void);
|
||||
void *ShmBuffer(int rank);
|
||||
void *ShmBufferTranslate(int rank,void * local_p);
|
||||
void *ShmBufferMalloc(size_t bytes);
|
||||
void ShmBufferFreeAll(void) ;
|
||||
|
||||
void GlobalSum(RealD &);
|
||||
void GlobalSumVector(RealD *,int N);
|
||||
////////////////////////////////////////////////
|
||||
// Must call in Grid startup
|
||||
////////////////////////////////////////////////
|
||||
static void Init(int *argc, char ***argv);
|
||||
|
||||
void GlobalSum(uint32_t &);
|
||||
void GlobalSum(uint64_t &);
|
||||
////////////////////////////////////////////////
|
||||
// Constructor of any given grid
|
||||
////////////////////////////////////////////////
|
||||
CartesianCommunicator(const std::vector<int> &pdimensions_in);
|
||||
|
||||
void GlobalSum(ComplexF &c)
|
||||
{
|
||||
GlobalSumVector((float *)&c,2);
|
||||
}
|
||||
void GlobalSumVector(ComplexF *c,int N)
|
||||
{
|
||||
GlobalSumVector((float *)c,2*N);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Wraps MPI_Cart routines, or implements equivalent on other impls
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
void ShiftedRanks(int dim,int shift,int & source, int & dest);
|
||||
int RankFromProcessorCoor(std::vector<int> &coor);
|
||||
void ProcessorCoorFromRank(int rank,std::vector<int> &coor);
|
||||
|
||||
void GlobalSum(ComplexD &c)
|
||||
{
|
||||
GlobalSumVector((double *)&c,2);
|
||||
}
|
||||
void GlobalSumVector(ComplexD *c,int N)
|
||||
{
|
||||
GlobalSumVector((double *)c,2*N);
|
||||
}
|
||||
/////////////////////////////////
|
||||
// Grid information and queries
|
||||
/////////////////////////////////
|
||||
static int ShmRank;
|
||||
static int ShmSize;
|
||||
static int GroupSize;
|
||||
static int GroupRank;
|
||||
static int WorldRank;
|
||||
static int WorldSize;
|
||||
static int Slave;
|
||||
|
||||
template<class obj> void GlobalSum(obj &o){
|
||||
typedef typename obj::scalar_type scalar_type;
|
||||
int words = sizeof(obj)/sizeof(scalar_type);
|
||||
scalar_type * ptr = (scalar_type *)& o;
|
||||
GlobalSumVector(ptr,words);
|
||||
}
|
||||
////////////////////////////////////////////////////////////
|
||||
// Face exchange, buffer swap in translational invariant way
|
||||
////////////////////////////////////////////////////////////
|
||||
void SendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
int IsBoss(void) ;
|
||||
int BossRank(void) ;
|
||||
int ThisRank(void) ;
|
||||
const std::vector<int> & ThisProcessorCoor(void) ;
|
||||
const std::vector<int> & ProcessorGrid(void) ;
|
||||
int ProcessorCount(void) ;
|
||||
static int Ranks (void);
|
||||
static int Nodes (void);
|
||||
static int Cores (void);
|
||||
static int NodeRank (void);
|
||||
static int CoreRank (void);
|
||||
|
||||
void SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static int RankWorld(void) ;
|
||||
static void BroadcastWorld(int root,void* data, int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
////////////////////////////////////////////////////////////
|
||||
void GlobalSum(RealF &);
|
||||
void GlobalSumVector(RealF *,int N);
|
||||
void GlobalSum(RealD &);
|
||||
void GlobalSumVector(RealD *,int N);
|
||||
void GlobalSum(uint32_t &);
|
||||
void GlobalSum(uint64_t &);
|
||||
void GlobalSum(ComplexF &c);
|
||||
void GlobalSumVector(ComplexF *c,int N);
|
||||
void GlobalSum(ComplexD &c);
|
||||
void GlobalSumVector(ComplexD *c,int N);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Barrier
|
||||
////////////////////////////////////////////////////////////
|
||||
void Barrier(void);
|
||||
template<class obj> void GlobalSum(obj &o){
|
||||
typedef typename obj::scalar_type scalar_type;
|
||||
int words = sizeof(obj)/sizeof(scalar_type);
|
||||
scalar_type * ptr = (scalar_type *)& o;
|
||||
GlobalSumVector(ptr,words);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Broadcast a buffer and composite larger
|
||||
////////////////////////////////////////////////////////////
|
||||
void Broadcast(int root,void* data, int bytes);
|
||||
template<class obj> void Broadcast(int root,obj &data)
|
||||
////////////////////////////////////////////////////////////
|
||||
// Face exchange, buffer swap in translational invariant way
|
||||
////////////////////////////////////////////////////////////
|
||||
void SendToRecvFrom(void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void SendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
|
||||
void StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes);
|
||||
|
||||
void StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &waitall);
|
||||
void StencilBarrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Barrier
|
||||
////////////////////////////////////////////////////////////
|
||||
void Barrier(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Broadcast a buffer and composite larger
|
||||
////////////////////////////////////////////////////////////
|
||||
void Broadcast(int root,void* data, int bytes);
|
||||
|
||||
template<class obj> void Broadcast(int root,obj &data)
|
||||
{
|
||||
Broadcast(root,(void *)&data,sizeof(data));
|
||||
};
|
||||
|
||||
static void BroadcastWorld(int root,void* data, int bytes);
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -30,21 +30,30 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Should error check all MPI calls.
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm CartesianCommunicator::communicator_world;
|
||||
|
||||
// Should error check all MPI calls.
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int flag;
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init(argc,argv);
|
||||
}
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
MPI_Comm_rank(communicator_world,&WorldRank);
|
||||
MPI_Comm_size(communicator_world,&WorldSize);
|
||||
ShmRank=0;
|
||||
ShmSize=1;
|
||||
GroupRank=WorldRank;
|
||||
GroupSize=WorldSize;
|
||||
Slave =0;
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
int Rank(void) {
|
||||
int pe;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
||||
return pe;
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
@ -54,7 +63,7 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
_processors = processors;
|
||||
_processor_coor.resize(_ndimension);
|
||||
|
||||
MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||
MPI_Cart_create(communicator_world, _ndimension,&_processors[0],&periodic[0],1,&communicator);
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
@ -67,7 +76,6 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
|
||||
assert(Size==_Nprocessors);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::GlobalSum(uint32_t &u){
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
@ -168,7 +176,6 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
||||
int nreq=list.size();
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
@ -187,14 +194,17 @@ void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
communicator);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// Should only be used prior to Grid Init finished.
|
||||
// Check for this?
|
||||
///////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr= MPI_Bcast(data,
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
MPI_COMM_WORLD);
|
||||
communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
|
@ -30,25 +30,199 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
// Global used by Init and nowhere else. How to hide?
|
||||
int Rank(void) {
|
||||
int pe;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&pe);
|
||||
return pe;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
int CartesianCommunicator::ShmSetup = 0;
|
||||
|
||||
MPI_Comm CartesianCommunicator::communicator_world;
|
||||
MPI_Comm CartesianCommunicator::ShmComm;
|
||||
MPI_Win CartesianCommunicator::ShmWindow;
|
||||
|
||||
std::vector<int> CartesianCommunicator::GroupRanks;
|
||||
std::vector<int> CartesianCommunicator::MyGroup;
|
||||
std::vector<void *> CartesianCommunicator::ShmCommBufs;
|
||||
|
||||
void *CartesianCommunicator::ShmBufferSelf(void)
|
||||
{
|
||||
return ShmCommBufs[ShmRank];
|
||||
}
|
||||
// Should error check all MPI calls.
|
||||
void *CartesianCommunicator::ShmBuffer(int rank)
|
||||
{
|
||||
int gpeer = GroupRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
return ShmCommBufs[gpeer];
|
||||
}
|
||||
}
|
||||
void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p)
|
||||
{
|
||||
int gpeer = GroupRanks[rank];
|
||||
if (gpeer == MPI_UNDEFINED){
|
||||
return NULL;
|
||||
} else {
|
||||
uint64_t offset = (uint64_t)local_p - (uint64_t)ShmCommBufs[ShmRank];
|
||||
uint64_t remote = (uint64_t)ShmCommBufs[gpeer]+offset;
|
||||
return (void *) remote;
|
||||
}
|
||||
}
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
int flag;
|
||||
MPI_Initialized(&flag); // needed to coexist with other libs apparently
|
||||
if ( !flag ) {
|
||||
MPI_Init(argc,argv);
|
||||
}
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
MPI_Comm_rank(communicator_world,&WorldRank);
|
||||
MPI_Comm_size(communicator_world,&WorldSize);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// Split into groups that can share memory
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm_split_type(communicator_world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&ShmComm);
|
||||
MPI_Comm_rank(ShmComm ,&ShmRank);
|
||||
MPI_Comm_size(ShmComm ,&ShmSize);
|
||||
GroupSize = WorldSize/ShmSize;
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// find world ranks in our SHM group (i.e. which ranks are on our node)
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
MPI_Group WorldGroup, ShmGroup;
|
||||
MPI_Comm_group (communicator_world, &WorldGroup);
|
||||
MPI_Comm_group (ShmComm, &ShmGroup);
|
||||
|
||||
std::vector<int> world_ranks(WorldSize);
|
||||
GroupRanks.resize(WorldSize);
|
||||
MyGroup.resize(ShmSize);
|
||||
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||
|
||||
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &GroupRanks[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify who is in my group and noninate the leader
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int g=0;
|
||||
for(int rank=0;rank<WorldSize;rank++){
|
||||
if(GroupRanks[rank]!=MPI_UNDEFINED){
|
||||
assert(g<ShmSize);
|
||||
MyGroup[g++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(MyGroup.begin(),MyGroup.end(),std::less<int>());
|
||||
int myleader = MyGroup[0];
|
||||
|
||||
std::vector<int> leaders_1hot(WorldSize,0);
|
||||
std::vector<int> leaders_group(GroupSize,0);
|
||||
leaders_1hot [ myleader ] = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// global sum leaders over comm world
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator_world);
|
||||
assert(ierr==0);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// find the group leaders world rank
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int group=0;
|
||||
for(int l=0;l<WorldSize;l++){
|
||||
if(leaders_1hot[l]){
|
||||
leaders_group[group++] = l;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify the rank of the group in which I (and my leader) live
|
||||
///////////////////////////////////////////////////////////////////
|
||||
GroupRank=-1;
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
if (myleader == leaders_group[g]){
|
||||
GroupRank=g;
|
||||
}
|
||||
}
|
||||
assert(GroupRank!=-1);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// allocate the shared window for our group
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ShmCommBuf = 0;
|
||||
ierr = MPI_Win_allocate_shared(MAX_MPI_SHM_BYTES,1,MPI_INFO_NULL,ShmComm,&ShmCommBuf,&ShmWindow);
|
||||
assert(ierr==0);
|
||||
// KNL hack -- force to numa-domain 1 in flat
|
||||
#if 0
|
||||
//#include <numaif.h>
|
||||
for(uint64_t page=0;page<MAX_MPI_SHM_BYTES;page+=4096){
|
||||
void *pages = (void *) ( page + ShmCommBuf );
|
||||
int status;
|
||||
int flags=MPOL_MF_MOVE_ALL;
|
||||
int nodes=1; // numa domain == MCDRAM
|
||||
unsigned long count=1;
|
||||
ierr= move_pages(0,count, &pages,&nodes,&status,flags);
|
||||
if (ierr && (page==0)) perror("numa relocate command failed");
|
||||
}
|
||||
#endif
|
||||
MPI_Win_lock_all (MPI_MODE_NOCHECK, ShmWindow);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Plan: allocate a fixed SHM region. Scratch that is just used via some scheme during stencil comms, with no allocate free.
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
ShmCommBufs.resize(ShmSize);
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
MPI_Aint sz;
|
||||
int dsp_unit;
|
||||
MPI_Win_shared_query (ShmWindow, r, &sz, &dsp_unit, &ShmCommBufs[r]);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Verbose for now
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
if (WorldRank == 0){
|
||||
std::cout<<GridLogMessage<< "Grid MPI-3 configuration: detected ";
|
||||
std::cout<< WorldSize << " Ranks " ;
|
||||
std::cout<< GroupSize << " Nodes " ;
|
||||
std::cout<< ShmSize << " with ranks-per-node "<<std::endl;
|
||||
|
||||
std::cout<<GridLogMessage <<"Grid MPI-3 configuration: allocated shared memory region of size ";
|
||||
std::cout<<std::hex << MAX_MPI_SHM_BYTES <<" ShmCommBuf address = "<<ShmCommBuf << std::dec<<std::endl;
|
||||
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
std::cout<<GridLogMessage<<" Node "<<g<<" led by MPI rank "<<leaders_group[g]<<std::endl;
|
||||
}
|
||||
|
||||
std::cout<<GridLogMessage<<" Boss Node Shm Pointers are {";
|
||||
for(int g=0;g<ShmSize;g++){
|
||||
std::cout<<std::hex<<ShmCommBufs[g]<<std::dec;
|
||||
if(g!=ShmSize-1) std::cout<<",";
|
||||
else std::cout<<"}"<<std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
if ( (ShmRank == 0) && (GroupRank==g) ) std::cout<<GridLogMessage<<"["<<g<<"] Node Group "<<g<<" is ranks {";
|
||||
for(int r=0;r<ShmSize;r++){
|
||||
if ( (ShmRank == 0) && (GroupRank==g) ) {
|
||||
std::cout<<MyGroup[r];
|
||||
if(r<ShmSize-1) std::cout<<",";
|
||||
else std::cout<<"}"<<std::endl;
|
||||
}
|
||||
MPI_Barrier(communicator_world);
|
||||
}
|
||||
}
|
||||
|
||||
assert(ShmSetup==0); ShmSetup=1;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Want to implement some magic ... Group sub-cubes into those on same node
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
std::vector<int> coor = _processor_coor;
|
||||
@ -78,27 +252,11 @@ void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &c
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
int ierr;
|
||||
|
||||
communicator=communicator_world;
|
||||
|
||||
_ndimension = processors.size();
|
||||
std::cout << "Creating "<< _ndimension << " dim communicator "<<std::endl;
|
||||
for(int d =0;d<_ndimension;d++){
|
||||
std::cout << processors[d]<<" ";
|
||||
};
|
||||
std::cout << std::endl;
|
||||
|
||||
WorldDims = processors;
|
||||
|
||||
communicator = MPI_COMM_WORLD;
|
||||
MPI_Comm shmcomm;
|
||||
MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&shmcomm);
|
||||
MPI_Comm_rank(communicator,&WorldRank);
|
||||
MPI_Comm_size(communicator,&WorldSize);
|
||||
MPI_Comm_rank(shmcomm ,&ShmRank);
|
||||
MPI_Comm_size(shmcomm ,&ShmSize);
|
||||
GroupSize = WorldSize/ShmSize;
|
||||
|
||||
std::cout<< "Ranks per node "<< ShmSize << std::endl;
|
||||
std::cout<< "Nodes "<< GroupSize << std::endl;
|
||||
std::cout<< "Ranks "<< WorldSize << std::endl;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Assert power of two shm_size.
|
||||
@ -118,46 +276,27 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
////////////////////////////////////////////////////////////////
|
||||
int dim = 0;
|
||||
|
||||
std::vector<int> WorldDims = processors;
|
||||
|
||||
ShmDims.resize(_ndimension,1);
|
||||
GroupDims.resize(_ndimension);
|
||||
|
||||
ShmCoor.resize(_ndimension);
|
||||
GroupCoor.resize(_ndimension);
|
||||
WorldCoor.resize(_ndimension);
|
||||
|
||||
for(int l2=0;l2<log2size;l2++){
|
||||
while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
|
||||
ShmDims[dim]*=2;
|
||||
dim=(dim+1)%_ndimension;
|
||||
}
|
||||
|
||||
std::cout << "Shm group dims "<<std::endl;
|
||||
for(int d =0;d<_ndimension;d++){
|
||||
std::cout << ShmDims[d]<<" ";
|
||||
};
|
||||
std::cout << std::endl;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish torus of processes and nodes with sub-blockings
|
||||
////////////////////////////////////////////////////////////////
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
GroupDims[d] = WorldDims[d]/ShmDims[d];
|
||||
}
|
||||
std::cout << "Group dims "<<std::endl;
|
||||
for(int d =0;d<_ndimension;d++){
|
||||
std::cout << GroupDims[d]<<" ";
|
||||
};
|
||||
std::cout << std::endl;
|
||||
|
||||
MPI_Group WorldGroup, ShmGroup;
|
||||
MPI_Comm_group (communicator, &WorldGroup);
|
||||
MPI_Comm_group (shmcomm, &ShmGroup);
|
||||
|
||||
std::vector<int> world_ranks(WorldSize);
|
||||
std::vector<int> group_ranks(WorldSize);
|
||||
std::vector<int> mygroup(GroupSize);
|
||||
for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
|
||||
|
||||
MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &group_ranks[0]);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Check processor counts match
|
||||
@ -166,56 +305,10 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
_processors = processors;
|
||||
_processor_coor.resize(_ndimension);
|
||||
for(int i=0;i<_ndimension;i++){
|
||||
std::cout << " p " << _processors[i]<<std::endl;
|
||||
_Nprocessors*=_processors[i];
|
||||
}
|
||||
std::cout << " World " <<WorldSize <<" Nproc "<<_Nprocessors<<std::endl;
|
||||
assert(WorldSize==_Nprocessors);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify who is in my group and noninate the leader
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int g=0;
|
||||
for(int rank=0;rank<WorldSize;rank++){
|
||||
if(group_ranks[rank]!=MPI_UNDEFINED){
|
||||
mygroup[g] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(mygroup.begin(),mygroup.end(),std::greater<int>());
|
||||
int myleader = mygroup[0];
|
||||
|
||||
std::vector<int> leaders_1hot(WorldSize,0);
|
||||
std::vector<int> leaders_group(GroupSize,0);
|
||||
leaders_1hot [ myleader ] = 1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// global sum leaders over comm world
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator);
|
||||
assert(ierr==0);
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// find the group leaders world rank
|
||||
///////////////////////////////////////////////////////////////////
|
||||
int group=0;
|
||||
for(int l=0;l<WorldSize;l++){
|
||||
if(leaders_1hot[l]){
|
||||
leaders_group[group++] = l;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// Identify the rank of the group in which I (and my leader) live
|
||||
///////////////////////////////////////////////////////////////////
|
||||
GroupRank=-1;
|
||||
for(int g=0;g<GroupSize;g++){
|
||||
if (myleader == leaders_group[g]){
|
||||
GroupRank=g;
|
||||
}
|
||||
}
|
||||
assert(GroupRank!=-1);
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Establish mapping between lexico physics coord and WorldRank
|
||||
//
|
||||
@ -307,6 +400,80 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
#if 0
|
||||
this->StencilBarrier();
|
||||
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
static int sequence;
|
||||
|
||||
int ierr;
|
||||
int tag;
|
||||
int check;
|
||||
|
||||
assert(dest != _processor);
|
||||
assert(from != _processor);
|
||||
|
||||
int gdest = GroupRanks[dest];
|
||||
int gfrom = GroupRanks[from];
|
||||
int gme = GroupRanks[_processor];
|
||||
|
||||
sequence++;
|
||||
|
||||
char *from_ptr = (char *)ShmCommBufs[ShmRank];
|
||||
|
||||
int small = (bytes<MAX_MPI_SHM_BYTES);
|
||||
|
||||
typedef uint64_t T;
|
||||
int words = bytes/sizeof(T);
|
||||
|
||||
assert(((size_t)bytes &(sizeof(T)-1))==0);
|
||||
assert(gme == ShmRank);
|
||||
|
||||
if ( small && (gdest !=MPI_UNDEFINED) ) {
|
||||
|
||||
char *to_ptr = (char *)ShmCommBufs[gdest];
|
||||
|
||||
assert(gme != gdest);
|
||||
|
||||
T *ip = (T *)xmit;
|
||||
T *op = (T *)to_ptr;
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int w=0;w<words;w++) {
|
||||
op[w]=ip[w];
|
||||
}
|
||||
|
||||
bcopy(&_processor,&to_ptr[bytes],sizeof(_processor));
|
||||
bcopy(& sequence,&to_ptr[bytes+4],sizeof(sequence));
|
||||
} else {
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
}
|
||||
|
||||
this->StencilBarrier();
|
||||
|
||||
if (small && (gfrom !=MPI_UNDEFINED) ) {
|
||||
T *ip = (T *)from_ptr;
|
||||
T *op = (T *)recv;
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int w=0;w<words;w++) {
|
||||
op[w]=ip[w];
|
||||
}
|
||||
bcopy(&from_ptr[bytes] ,&tag ,sizeof(tag));
|
||||
bcopy(&from_ptr[bytes+4],&check,sizeof(check));
|
||||
assert(check==sequence);
|
||||
assert(tag==from);
|
||||
} else {
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(rrq);
|
||||
}
|
||||
|
||||
this->StencilBarrier();
|
||||
|
||||
#else
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
int rank = _processor;
|
||||
@ -318,13 +485,62 @@ void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &lis
|
||||
|
||||
list.push_back(xrq);
|
||||
list.push_back(rrq);
|
||||
#endif
|
||||
}
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
MPI_Request xrq;
|
||||
MPI_Request rrq;
|
||||
|
||||
int ierr;
|
||||
|
||||
assert(dest != _processor);
|
||||
assert(from != _processor);
|
||||
|
||||
int gdest = GroupRanks[dest];
|
||||
int gfrom = GroupRanks[from];
|
||||
int gme = GroupRanks[_processor];
|
||||
|
||||
assert(gme == ShmRank);
|
||||
|
||||
if ( gdest == MPI_UNDEFINED ) {
|
||||
ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(xrq);
|
||||
}
|
||||
|
||||
if ( gfrom ==MPI_UNDEFINED) {
|
||||
ierr=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
|
||||
assert(ierr==0);
|
||||
list.push_back(rrq);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
SendToRecvFromComplete(list);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::StencilBarrier(void)
|
||||
{
|
||||
MPI_Win_sync (ShmWindow);
|
||||
MPI_Barrier (ShmComm);
|
||||
MPI_Win_sync (ShmWindow);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
int nreq=list.size();
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
|
||||
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
@ -350,7 +566,7 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
bytes,
|
||||
MPI_BYTE,
|
||||
root,
|
||||
MPI_COMM_WORLD);
|
||||
communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
|
@ -28,12 +28,22 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#include "Grid.h"
|
||||
namespace Grid {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||
{
|
||||
WorldRank = 0;
|
||||
WorldSize = 1;
|
||||
ShmRank=0;
|
||||
ShmSize=1;
|
||||
GroupRank=WorldRank;
|
||||
GroupSize=WorldSize;
|
||||
Slave =0;
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
int Rank(void ){ return 0; };
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_processors = processors;
|
||||
@ -89,30 +99,16 @@ void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::Barrier(void)
|
||||
{
|
||||
}
|
||||
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
|
||||
{
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void CartesianCommunicator::Barrier(void){}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ assert(0);}
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
source =0;
|
||||
dest=0;
|
||||
}
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -39,17 +39,22 @@ namespace Grid {
|
||||
BACKTRACEFILE(); \
|
||||
}\
|
||||
}
|
||||
int Rank(void) {
|
||||
return shmem_my_pe();
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct HandShake_t {
|
||||
uint64_t seq_local;
|
||||
uint64_t seq_remote;
|
||||
} HandShake;
|
||||
|
||||
|
||||
static Vector< HandShake > XConnections;
|
||||
static Vector< HandShake > RConnections;
|
||||
|
||||
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
shmem_init();
|
||||
XConnections.resize(shmem_n_pes());
|
||||
@ -60,8 +65,17 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
RConnections[pe].seq_local = 0;
|
||||
RConnections[pe].seq_remote= 0;
|
||||
}
|
||||
WorldSize = shmem_n_pes();
|
||||
WorldRank = shmem_my_pe();
|
||||
ShmRank=0;
|
||||
ShmSize=1;
|
||||
GroupRank=WorldRank;
|
||||
GroupSize=WorldSize;
|
||||
Slave =0;
|
||||
shmem_barrier_all();
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
@ -230,12 +244,9 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
|
||||
if ( _processor == sender ) {
|
||||
|
||||
printf("Sender SHMEM pt2pt %d -> %d\n",sender,receiver);
|
||||
// Check he has posted a receive
|
||||
while(SendSeq->seq_remote == SendSeq->seq_local);
|
||||
|
||||
printf("Sender receive %d posted\n",sender,receiver);
|
||||
|
||||
// Advance our send count
|
||||
seq = ++(SendSeq->seq_local);
|
||||
|
||||
@ -244,26 +255,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
shmem_putmem(recv,xmit,bytes,receiver);
|
||||
shmem_fence();
|
||||
|
||||
printf("Sender sent payload %d\n",seq);
|
||||
//Notify him we're done
|
||||
shmem_putmem((void *)&(RecvSeq->seq_remote),&seq,sizeof(seq),receiver);
|
||||
shmem_fence();
|
||||
printf("Sender ringing door bell %d\n",seq);
|
||||
}
|
||||
if ( _processor == receiver ) {
|
||||
|
||||
printf("Receiver SHMEM pt2pt %d->%d\n",sender,receiver);
|
||||
// Post a receive
|
||||
seq = ++(RecvSeq->seq_local);
|
||||
shmem_putmem((void *)&(SendSeq->seq_remote),&seq,sizeof(seq),sender);
|
||||
|
||||
printf("Receiver Opening letter box %d\n",seq);
|
||||
|
||||
|
||||
// Now wait until he has advanced our reception counter
|
||||
while(RecvSeq->seq_remote != RecvSeq->seq_local);
|
||||
|
||||
printf("Receiver Got the mail %d\n",seq);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -297,8 +297,9 @@ namespace Grid {
|
||||
|
||||
int l_idx=generator_idx(o_idx,i_idx);
|
||||
|
||||
std::vector<int> site_seeds(4);
|
||||
for(int i=0;i<4;i++){
|
||||
const int num_rand_seed=16;
|
||||
std::vector<int> site_seeds(num_rand_seed);
|
||||
for(int i=0;i<site_seeds.size();i++){
|
||||
site_seeds[i]= ui(pseeder);
|
||||
}
|
||||
|
||||
|
@ -33,511 +33,500 @@ directory
|
||||
#define GRID_QCD_FERMION_OPERATOR_IMPL_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
namespace QCD {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
//////////////////////////////////////////////
|
||||
// Template parameter class constructs to package
|
||||
// externally control Fermion implementations
|
||||
// in orthogonal directions
|
||||
//
|
||||
// Ultimately need Impl to always define types where XXX is opaque
|
||||
//
|
||||
// typedef typename XXX Simd;
|
||||
// typedef typename XXX GaugeLinkField;
|
||||
// typedef typename XXX GaugeField;
|
||||
// typedef typename XXX GaugeActField;
|
||||
// typedef typename XXX FermionField;
|
||||
// typedef typename XXX DoubledGaugeField;
|
||||
// typedef typename XXX SiteSpinor;
|
||||
// typedef typename XXX SiteHalfSpinor;
|
||||
// typedef typename XXX Compressor;
|
||||
//
|
||||
// and Methods:
|
||||
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
||||
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||
//
|
||||
//
|
||||
// To acquire the typedefs from "Base" (either a base class or template param) use:
|
||||
//
|
||||
// INHERIT_GIMPL_TYPES(Base)
|
||||
// INHERIT_FIMPL_TYPES(Base)
|
||||
// INHERIT_IMPL_TYPES(Base)
|
||||
//
|
||||
// The Fermion operators will do the following:
|
||||
//
|
||||
// struct MyOpParams {
|
||||
// RealD mass;
|
||||
// };
|
||||
//
|
||||
//
|
||||
// template<class Impl>
|
||||
// class MyOp : public<Impl> {
|
||||
// public:
|
||||
//
|
||||
// INHERIT_ALL_IMPL_TYPES(Impl);
|
||||
//
|
||||
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
|
||||
// {
|
||||
//
|
||||
// };
|
||||
//
|
||||
// }
|
||||
//////////////////////////////////////////////
|
||||
//////////////////////////////////////////////
|
||||
// Template parameter class constructs to package
|
||||
// externally control Fermion implementations
|
||||
// in orthogonal directions
|
||||
//
|
||||
// Ultimately need Impl to always define types where XXX is opaque
|
||||
//
|
||||
// typedef typename XXX Simd;
|
||||
// typedef typename XXX GaugeLinkField;
|
||||
// typedef typename XXX GaugeField;
|
||||
// typedef typename XXX GaugeActField;
|
||||
// typedef typename XXX FermionField;
|
||||
// typedef typename XXX DoubledGaugeField;
|
||||
// typedef typename XXX SiteSpinor;
|
||||
// typedef typename XXX SiteHalfSpinor;
|
||||
// typedef typename XXX Compressor;
|
||||
//
|
||||
// and Methods:
|
||||
// void ImportGauge(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
// void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
// void multLink(SiteHalfSpinor &phi,const SiteDoubledGaugeField &U,const SiteHalfSpinor &chi,int mu,StencilEntry *SE,StencilImpl &St)
|
||||
// void InsertForce4D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||
// void InsertForce5D(GaugeField &mat,const FermionField &Btilde,const FermionField &A,int mu)
|
||||
//
|
||||
//
|
||||
// To acquire the typedefs from "Base" (either a base class or template param) use:
|
||||
//
|
||||
// INHERIT_GIMPL_TYPES(Base)
|
||||
// INHERIT_FIMPL_TYPES(Base)
|
||||
// INHERIT_IMPL_TYPES(Base)
|
||||
//
|
||||
// The Fermion operators will do the following:
|
||||
//
|
||||
// struct MyOpParams {
|
||||
// RealD mass;
|
||||
// };
|
||||
//
|
||||
//
|
||||
// template<class Impl>
|
||||
// class MyOp : public<Impl> {
|
||||
// public:
|
||||
//
|
||||
// INHERIT_ALL_IMPL_TYPES(Impl);
|
||||
//
|
||||
// MyOp(MyOpParams Myparm, ImplParams &ImplParam) : Impl(ImplParam)
|
||||
// {
|
||||
//
|
||||
// };
|
||||
//
|
||||
// }
|
||||
//////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Implementation dependent fermion types
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Implementation dependent fermion types
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define INHERIT_FIMPL_TYPES(Impl)\
|
||||
typedef typename Impl::FermionField FermionField; \
|
||||
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||
typedef typename Impl::Compressor Compressor; \
|
||||
typedef typename Impl::StencilImpl StencilImpl; \
|
||||
typedef typename Impl::ImplParams ImplParams; \
|
||||
typedef typename Impl::Coeff_t Coeff_t;
|
||||
typedef typename Impl::FermionField FermionField; \
|
||||
typedef typename Impl::DoubledGaugeField DoubledGaugeField; \
|
||||
typedef typename Impl::SiteSpinor SiteSpinor; \
|
||||
typedef typename Impl::SiteHalfSpinor SiteHalfSpinor; \
|
||||
typedef typename Impl::Compressor Compressor; \
|
||||
typedef typename Impl::StencilImpl StencilImpl; \
|
||||
typedef typename Impl::ImplParams ImplParams; \
|
||||
typedef typename Impl::Coeff_t Coeff_t;
|
||||
|
||||
#define INHERIT_IMPL_TYPES(Base) \
|
||||
INHERIT_GIMPL_TYPES(Base) \
|
||||
INHERIT_FIMPL_TYPES(Base)
|
||||
INHERIT_GIMPL_TYPES(Base) \
|
||||
INHERIT_FIMPL_TYPES(Base)
|
||||
|
||||
///////
|
||||
// Single flavour four spinors with colour index
|
||||
///////
|
||||
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
|
||||
class WilsonImpl
|
||||
: public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
public:
|
||||
static const int Dimension = Representation::Dimension;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour four spinors with colour index
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
template <class S, class Representation = FundamentalRepresentation,class _Coeff_t = RealD >
|
||||
class WilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
|
||||
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
const bool LsVectorised=false;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||
|
||||
inline void multLink(SiteHalfSpinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi,
|
||||
int mu,
|
||||
StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
mult(&phi(), &U(mu), &chi());
|
||||
}
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®,
|
||||
ref &memory) {
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Umu) {
|
||||
conformable(Uds._grid, GaugeGrid);
|
||||
conformable(Umu._grid, GaugeGrid);
|
||||
GaugeLinkField U(GaugeGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||
}
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||
GaugeLinkField link(mat._grid);
|
||||
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||
|
||||
int Ls=Btilde._grid->_fdimensions[0];
|
||||
GaugeLinkField tmp(mat._grid);
|
||||
tmp = zero;
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
///////
|
||||
// Single flavour four spinors with colour index, 5d redblack
|
||||
///////
|
||||
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||
public:
|
||||
|
||||
static const int Dimension = Nrepresentation;
|
||||
const bool LsVectorised=true;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
|
||||
static const int Dimension = Representation::Dimension;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
//Necessary?
|
||||
constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
|
||||
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
|
||||
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||
const bool LsVectorised=false;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
// Make the doubled gauge field a *scalar*
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type>
|
||||
SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type>
|
||||
SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink<typename Simd::scalar_type>
|
||||
SiteScalarGaugeLink; // scalar
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Dimension>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
|
||||
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
ImplParams Params;
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
ImplParams Params;
|
||||
|
||||
bool overlapCommsCompute(void) { return false; };
|
||||
WilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
vsplat(reg, memory);
|
||||
}
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
SiteGaugeLink UU;
|
||||
for (int i = 0; i < Nrepresentation; i++) {
|
||||
for (int j = 0; j < Nrepresentation; j++) {
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
}
|
||||
}
|
||||
mult(&phi(), &UU(), &chi());
|
||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||
|
||||
inline void multLink(SiteHalfSpinor &phi,
|
||||
const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi,
|
||||
int mu,
|
||||
StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
mult(&phi(), &U(mu), &chi());
|
||||
}
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
reg = memory;
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,
|
||||
DoubledGaugeField &Uds,
|
||||
const GaugeField &Umu) {
|
||||
conformable(Uds._grid, GaugeGrid);
|
||||
conformable(Umu._grid, GaugeGrid);
|
||||
GaugeLinkField U(GaugeGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uds, U, mu + 4);
|
||||
}
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,
|
||||
const GaugeField &Umu) {
|
||||
SiteScalarGaugeField ScalarUmu;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
|
||||
GaugeLinkField link(mat._grid);
|
||||
link = TraceIndex<SpinIndex>(outerProduct(Btilde,A));
|
||||
PokeIndex<LorentzIndex>(mat,link,mu);
|
||||
}
|
||||
|
||||
GaugeLinkField U(Umu._grid);
|
||||
GaugeField Uadj(Umu._grid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
||||
}
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu){
|
||||
|
||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
int Ls=Btilde._grid->_fdimensions[0];
|
||||
GaugeLinkField tmp(mat._grid);
|
||||
tmp = zero;
|
||||
|
||||
peekLocalSite(ScalarUmu, Umu, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||
|
||||
peekLocalSite(ScalarUmu, Uadj, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||
|
||||
pokeLocalSite(ScalarUds, Uds, lcoor);
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int sss=0;sss<tmp._grid->oSites();sss++){
|
||||
int sU=sss;
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sF = s+Ls*sU;
|
||||
tmp[sU] = tmp[sU]+ traceIndex<SpinIndex>(outerProduct(Btilde[sF],Atilde[sF])); // ordering here
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat,tmp,mu);
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
|
||||
FermionField &A, int mu) {
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Single flavour four spinors with colour index, 5d redblack
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class S,int Nrepresentation=Nc,class _Coeff_t = RealD>
|
||||
class DomainWallVec5dImpl : public PeriodicGaugeImpl< GaugeImplTypes< S,Nrepresentation> > {
|
||||
public:
|
||||
|
||||
static const int Dimension = Nrepresentation;
|
||||
const bool LsVectorised=true;
|
||||
typedef _Coeff_t Coeff_t;
|
||||
typedef PeriodicGaugeImpl<GaugeImplTypes<S, Nrepresentation> > Gimpl;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
template <typename vtype> using iImplSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Ns> >;
|
||||
template <typename vtype> using iImplHalfSpinor = iScalar<iVector<iVector<vtype, Nrepresentation>, Nhs> >;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>;
|
||||
template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>;
|
||||
template <typename vtype> using iImplGaugeLink = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >;
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
|
||||
// Make the doubled gauge field a *scalar*
|
||||
typedef iImplDoubledGaugeField<typename Simd::scalar_type> SiteDoubledGaugeField; // This is a scalar
|
||||
typedef iImplGaugeField<typename Simd::scalar_type> SiteScalarGaugeField; // scalar
|
||||
typedef iImplGaugeLink<typename Simd::scalar_type> SiteScalarGaugeLink; // scalar
|
||||
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonImplParams ImplParams;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
DomainWallVec5dImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
bool overlapCommsCompute(void) { return false; };
|
||||
|
||||
template <class ref>
|
||||
inline void loadLinkElement(Simd ®, ref &memory) {
|
||||
vsplat(reg, memory);
|
||||
}
|
||||
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
SiteGaugeLink UU;
|
||||
for (int i = 0; i < Nrepresentation; i++) {
|
||||
for (int j = 0; j < Nrepresentation; j++) {
|
||||
vsplat(UU()()(i, j), U(mu)()(i, j));
|
||||
}
|
||||
}
|
||||
mult(&phi(), &UU(), &chi());
|
||||
}
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid, DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
SiteScalarGaugeField ScalarUmu;
|
||||
SiteDoubledGaugeField ScalarUds;
|
||||
|
||||
GaugeLinkField U(Umu._grid);
|
||||
GaugeField Uadj(Umu._grid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
U = adj(Cshift(U, mu, -1));
|
||||
PokeIndex<LorentzIndex>(Uadj, U, mu);
|
||||
}
|
||||
|
||||
for (int lidx = 0; lidx < GaugeGrid->lSites(); lidx++) {
|
||||
std::vector<int> lcoor;
|
||||
GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
|
||||
|
||||
peekLocalSite(ScalarUmu, Umu, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
|
||||
|
||||
peekLocalSite(ScalarUmu, Uadj, lcoor);
|
||||
for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
|
||||
|
||||
pokeLocalSite(ScalarUds, Uds, lcoor);
|
||||
}
|
||||
}
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,FermionField &A, int mu)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,FermionField Ã, int mu)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
|
||||
FermionField Ã, int mu) {
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Flavour doubled spinors; is Gparity the only? what about C*?
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <class S, int Nrepresentation,class _Coeff_t = RealD>
|
||||
class GparityWilsonImpl
|
||||
: public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||
public:
|
||||
static const int Dimension = Nrepresentation;
|
||||
template <class S, int Nrepresentation,class _Coeff_t = RealD>
|
||||
class GparityWilsonImpl : public ConjugateGaugeImpl<GaugeImplTypes<S, Nrepresentation> > {
|
||||
public:
|
||||
|
||||
const bool LsVectorised=false;
|
||||
static const int Dimension = Nrepresentation;
|
||||
|
||||
typedef _Coeff_t Coeff_t;
|
||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
||||
const bool LsVectorised=false;
|
||||
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
typedef _Coeff_t Coeff_t;
|
||||
typedef ConjugateGaugeImpl< GaugeImplTypes<S,Nrepresentation> > Gimpl;
|
||||
|
||||
template <typename vtype>
|
||||
using iImplSpinor =
|
||||
iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||
template <typename vtype>
|
||||
using iImplHalfSpinor =
|
||||
iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
|
||||
template <typename vtype>
|
||||
using iImplDoubledGaugeField =
|
||||
iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
template <typename vtype> using iImplSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Ns>, Ngp>;
|
||||
template <typename vtype> using iImplHalfSpinor = iVector<iVector<iVector<vtype, Nrepresentation>, Nhs>, Ngp>;
|
||||
template <typename vtype> using iImplDoubledGaugeField = iVector<iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nds>, Ngp>;
|
||||
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
typedef iImplSpinor<Simd> SiteSpinor;
|
||||
typedef iImplHalfSpinor<Simd> SiteHalfSpinor;
|
||||
typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
|
||||
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
typedef Lattice<SiteSpinor> FermionField;
|
||||
typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
|
||||
|
||||
typedef GparityWilsonImplParams ImplParams;
|
||||
typedef WilsonCompressor<SiteHalfSpinor, SiteSpinor> Compressor;
|
||||
typedef WilsonStencil<SiteSpinor, SiteHalfSpinor> StencilImpl;
|
||||
|
||||
ImplParams Params;
|
||||
typedef GparityWilsonImplParams ImplParams;
|
||||
|
||||
ImplParams Params;
|
||||
|
||||
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
GparityWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){};
|
||||
|
||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||
bool overlapCommsCompute(void) { return Params.overlapCommsCompute; };
|
||||
|
||||
// provide the multiply by link that is differentiated between Gparity (with
|
||||
// flavour index) and non-Gparity
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
typedef SiteHalfSpinor vobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
// provide the multiply by link that is differentiated between Gparity (with
|
||||
// flavour index) and non-Gparity
|
||||
inline void multLink(SiteHalfSpinor &phi, const SiteDoubledGaugeField &U,
|
||||
const SiteHalfSpinor &chi, int mu, StencilEntry *SE,
|
||||
StencilImpl &St) {
|
||||
|
||||
vobj vtmp;
|
||||
sobj stmp;
|
||||
typedef SiteHalfSpinor vobj;
|
||||
typedef typename SiteHalfSpinor::scalar_object sobj;
|
||||
|
||||
GridBase *grid = St._grid;
|
||||
vobj vtmp;
|
||||
sobj stmp;
|
||||
|
||||
const int Nsimd = grid->Nsimd();
|
||||
GridBase *grid = St._grid;
|
||||
|
||||
int direction = St._directions[mu];
|
||||
int distance = St._distances[mu];
|
||||
int ptype = St._permute_type[mu];
|
||||
int sl = St._grid->_simd_layout[direction];
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
// Fixme X.Y.Z.T hardcode in stencil
|
||||
int mmu = mu % Nd;
|
||||
int direction = St._directions[mu];
|
||||
int distance = St._distances[mu];
|
||||
int ptype = St._permute_type[mu];
|
||||
int sl = St._grid->_simd_layout[direction];
|
||||
|
||||
// assert our assumptions
|
||||
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
|
||||
assert((sl == 1) || (sl == 2));
|
||||
// Fixme X.Y.Z.T hardcode in stencil
|
||||
int mmu = mu % Nd;
|
||||
|
||||
std::vector<int> icoor;
|
||||
// assert our assumptions
|
||||
assert((distance == 1) || (distance == -1)); // nearest neighbour stencil hard code
|
||||
assert((sl == 1) || (sl == 2));
|
||||
|
||||
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
||||
std::vector<int> icoor;
|
||||
|
||||
if ( sl == 2 ) {
|
||||
if ( SE->_around_the_world && Params.twists[mmu] ) {
|
||||
|
||||
std::vector<sobj> vals(Nsimd);
|
||||
if ( sl == 2 ) {
|
||||
|
||||
extract(chi,vals);
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
std::vector<sobj> vals(Nsimd);
|
||||
|
||||
grid->iCoorFromIindex(icoor,s);
|
||||
extract(chi,vals);
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
|
||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||
grid->iCoorFromIindex(icoor,s);
|
||||
|
||||
int permute_lane;
|
||||
if ( distance == 1) {
|
||||
permute_lane = icoor[direction]?1:0;
|
||||
} else {
|
||||
permute_lane = icoor[direction]?0:1;
|
||||
assert((icoor[direction]==0)||(icoor[direction]==1));
|
||||
|
||||
int permute_lane;
|
||||
if ( distance == 1) {
|
||||
permute_lane = icoor[direction]?1:0;
|
||||
} else {
|
||||
permute_lane = icoor[direction]?0:1;
|
||||
}
|
||||
|
||||
if ( permute_lane ) {
|
||||
stmp(0) = vals[s](1);
|
||||
stmp(1) = vals[s](0);
|
||||
vals[s] = stmp;
|
||||
}
|
||||
}
|
||||
merge(vtmp,vals);
|
||||
|
||||
if ( permute_lane ) {
|
||||
stmp(0) = vals[s](1);
|
||||
stmp(1) = vals[s](0);
|
||||
vals[s] = stmp;
|
||||
}
|
||||
}
|
||||
merge(vtmp,vals);
|
||||
} else {
|
||||
vtmp(0) = chi(1);
|
||||
vtmp(1) = chi(0);
|
||||
}
|
||||
mult(&phi(0),&U(0)(mu),&vtmp(0));
|
||||
mult(&phi(1),&U(1)(mu),&vtmp(1));
|
||||
|
||||
} else {
|
||||
vtmp(0) = chi(1);
|
||||
vtmp(1) = chi(0);
|
||||
}
|
||||
mult(&phi(0),&U(0)(mu),&vtmp(0));
|
||||
mult(&phi(1),&U(1)(mu),&vtmp(1));
|
||||
} else {
|
||||
mult(&phi(0),&U(0)(mu),&chi(0));
|
||||
mult(&phi(1),&U(1)(mu),&chi(1));
|
||||
}
|
||||
|
||||
} else {
|
||||
mult(&phi(0),&U(0)(mu),&chi(0));
|
||||
mult(&phi(1),&U(1)(mu),&chi(1));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
conformable(Uds._grid,GaugeGrid);
|
||||
conformable(Umu._grid,GaugeGrid);
|
||||
|
||||
inline void DoubleStore(GridBase *GaugeGrid,DoubledGaugeField &Uds,const GaugeField &Umu)
|
||||
{
|
||||
GaugeLinkField Utmp (GaugeGrid);
|
||||
GaugeLinkField U (GaugeGrid);
|
||||
GaugeLinkField Uconj(GaugeGrid);
|
||||
|
||||
conformable(Uds._grid,GaugeGrid);
|
||||
conformable(Umu._grid,GaugeGrid);
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
|
||||
GaugeLinkField Utmp (GaugeGrid);
|
||||
GaugeLinkField U (GaugeGrid);
|
||||
GaugeLinkField Uconj(GaugeGrid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
|
||||
Lattice<iScalar<vInteger> > coor(GaugeGrid);
|
||||
LatticeCoordinate(coor,mu);
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
Uconj = conjugate(U);
|
||||
|
||||
// This phase could come from a simple bc 1,1,-1,1 ..
|
||||
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
||||
if ( Params.twists[mu] ) {
|
||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](0)(mu) = U[ss]();
|
||||
Uds[ss](1)(mu) = Uconj[ss]();
|
||||
}
|
||||
|
||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||
Uconj = adj(Cshift(Uconj,mu,-1));
|
||||
|
||||
Utmp = U;
|
||||
if ( Params.twists[mu] ) {
|
||||
Utmp = where(coor==0,Uconj,Utmp);
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||
}
|
||||
|
||||
Utmp = Uconj;
|
||||
if ( Params.twists[mu] ) {
|
||||
Utmp = where(coor==0,U,Utmp);
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A, int mu) {
|
||||
|
||||
LatticeCoordinate(coor,mu);
|
||||
// DhopDir provides U or Uconj depending on coor/flavour.
|
||||
GaugeLinkField link(mat._grid);
|
||||
// use lorentz for flavour as hack.
|
||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||
PARALLEL_FOR_LOOP
|
||||
for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
|
||||
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, link, mu);
|
||||
return;
|
||||
}
|
||||
|
||||
U = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
Uconj = conjugate(U);
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã, int mu) {
|
||||
|
||||
// This phase could come from a simple bc 1,1,-1,1 ..
|
||||
int neglink = GaugeGrid->GlobalDimensions()[mu]-1;
|
||||
if ( Params.twists[mu] ) {
|
||||
Uconj = where(coor==neglink,-Uconj,Uconj);
|
||||
}
|
||||
int Ls = Btilde._grid->_fdimensions[0];
|
||||
|
||||
GaugeLinkField tmp(mat._grid);
|
||||
tmp = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
int sF = s + Ls * ss;
|
||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
|
||||
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||
return;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](0)(mu) = U[ss]();
|
||||
Uds[ss](1)(mu) = Uconj[ss]();
|
||||
}
|
||||
};
|
||||
|
||||
U = adj(Cshift(U ,mu,-1)); // correct except for spanning the boundary
|
||||
Uconj = adj(Cshift(Uconj,mu,-1));
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
|
||||
|
||||
Utmp = U;
|
||||
if ( Params.twists[mu] ) {
|
||||
Utmp = where(coor==0,Uconj,Utmp);
|
||||
}
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](0)(mu+4) = Utmp[ss]();
|
||||
}
|
||||
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
|
||||
|
||||
Utmp = Uconj;
|
||||
if ( Params.twists[mu] ) {
|
||||
Utmp = where(coor==0,U,Utmp);
|
||||
}
|
||||
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for(auto ss=U.begin();ss<U.end();ss++){
|
||||
Uds[ss](1)(mu+4) = Utmp[ss]();
|
||||
}
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||
|
||||
}
|
||||
}
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
|
||||
|
||||
typedef GparityWilsonImpl<vComplex , Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
|
||||
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
|
||||
|
||||
inline void InsertForce4D(GaugeField &mat, FermionField &Btilde,
|
||||
FermionField &A, int mu) {
|
||||
// DhopDir provides U or Uconj depending on coor/flavour.
|
||||
GaugeLinkField link(mat._grid);
|
||||
// use lorentz for flavour as hack.
|
||||
auto tmp = TraceIndex<SpinIndex>(outerProduct(Btilde, A));
|
||||
PARALLEL_FOR_LOOP
|
||||
for (auto ss = tmp.begin(); ss < tmp.end(); ss++) {
|
||||
link[ss]() = tmp[ss](0, 0) - conjugate(tmp[ss](1, 1));
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, link, mu);
|
||||
return;
|
||||
}
|
||||
}}
|
||||
|
||||
inline void InsertForce5D(GaugeField &mat, FermionField &Btilde,
|
||||
FermionField Ã, int mu) {
|
||||
int Ls = Btilde._grid->_fdimensions[0];
|
||||
|
||||
GaugeLinkField tmp(mat._grid);
|
||||
tmp = zero;
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int ss = 0; ss < tmp._grid->oSites(); ss++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
int sF = s + Ls * ss;
|
||||
auto ttmp = traceIndex<SpinIndex>(outerProduct(Btilde[sF], Atilde[sF]));
|
||||
tmp[ss]() = tmp[ss]() + ttmp(0, 0) + conjugate(ttmp(1, 1));
|
||||
}
|
||||
}
|
||||
PokeIndex<LorentzIndex>(mat, tmp, mu);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation > WilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation > WilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation > WilsonImplD; // Double
|
||||
|
||||
|
||||
typedef WilsonImpl<vComplex, FundamentalRepresentation, ComplexD > ZWilsonImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, FundamentalRepresentation, ComplexD > ZWilsonImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, FundamentalRepresentation, ComplexD > ZWilsonImplD; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, AdjointRepresentation > WilsonAdjImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, AdjointRepresentation > WilsonAdjImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, AdjointRepresentation > WilsonAdjImplD; // Double
|
||||
|
||||
typedef WilsonImpl<vComplex, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplR; // Real.. whichever prec
|
||||
typedef WilsonImpl<vComplexF, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplF; // Float
|
||||
typedef WilsonImpl<vComplexD, TwoIndexSymmetricRepresentation > WilsonTwoIndexSymmetricImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc> DomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc> DomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc> DomainWallVec5dImplD; // Double
|
||||
|
||||
typedef DomainWallVec5dImpl<vComplex ,Nc,ComplexD> ZDomainWallVec5dImplR; // Real.. whichever prec
|
||||
typedef DomainWallVec5dImpl<vComplexF,Nc,ComplexD> ZDomainWallVec5dImplF; // Float
|
||||
typedef DomainWallVec5dImpl<vComplexD,Nc,ComplexD> ZDomainWallVec5dImplD; // Double
|
||||
|
||||
typedef GparityWilsonImpl<vComplex, Nc> GparityWilsonImplR; // Real.. whichever prec
|
||||
typedef GparityWilsonImpl<vComplexF, Nc> GparityWilsonImplF; // Float
|
||||
typedef GparityWilsonImpl<vComplexD, Nc> GparityWilsonImplD; // Double
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -166,7 +166,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
////////////////////////
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < B._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sss, sss, B, Btilde, mu,
|
||||
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sss, sss, B, Btilde, mu,
|
||||
gamma);
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ void WilsonFermion<Impl>::DhopDirDisp(const FermionField &in, FermionField &out,
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.comm_buf, sss, sss, in, out,
|
||||
Kernels::DiracOptDhopDir(Stencil, Umu, Stencil.CommBuf(), sss, sss, in, out,
|
||||
dirdisp, gamma);
|
||||
}
|
||||
};
|
||||
@ -295,13 +295,13 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
if (dag == DaggerYes) {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
|
||||
out);
|
||||
}
|
||||
} else {
|
||||
PARALLEL_FOR_LOOP
|
||||
for (int sss = 0; sss < in._grid->oSites(); sss++) {
|
||||
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sss, sss, 1, 1, in,
|
||||
Kernels::DiracOptDhopSite(st, lo, U, st.CommBuf(), sss, sss, 1, 1, in,
|
||||
out);
|
||||
}
|
||||
}
|
||||
|
@ -184,44 +184,37 @@ void WilsonFermion5D<Impl>::Report(void)
|
||||
|
||||
if ( DhopCalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime
|
||||
<< " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : "
|
||||
<< DhopComputeTime << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Number of Dhop Calls : " << DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " << DhopCommTime<< " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " << DhopComputeTime << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime;
|
||||
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
|
||||
}
|
||||
|
||||
if ( DerivCalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
|
||||
|
||||
|
||||
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Communication time : " <<DerivCommTime <<" us"<<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Compute time : " <<DerivComputeTime <<" us"<<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Total Dhop Compute time : " <<DerivDhopComputeTime <<" us"<<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
|
||||
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||
}
|
||||
|
||||
if (DerivCalls > 0 || DhopCalls > 0){
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil"<<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd"<<std::endl; StencilOdd.Report();
|
||||
}
|
||||
}
|
||||
|
||||
@ -275,7 +268,7 @@ PARALLEL_FOR_LOOP
|
||||
for(int s=0;s<Ls;s++){
|
||||
int sU=ss;
|
||||
int sF = s+Ls*sU;
|
||||
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.comm_buf,sF,sU,in,out,dirdisp,gamma);
|
||||
Kernels::DiracOptDhopDir(Stencil,Umu,Stencil.CommBuf(),sF,sU,in,out,dirdisp,gamma);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -327,8 +320,7 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
assert(sF < B._grid->oSites());
|
||||
assert(sU < U._grid->oSites());
|
||||
|
||||
Kernels::DiracOptDhopDir(st, U, st.comm_buf, sF, sU, B, Btilde, mu,
|
||||
gamma);
|
||||
Kernels::DiracOptDhopDir(st, U, st.CommBuf(), sF, sU, B, Btilde, mu, gamma);
|
||||
|
||||
////////////////////////////
|
||||
// spin trace outer product
|
||||
@ -342,10 +334,10 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
void WilsonFermion5D<Impl>::DhopDeriv(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
conformable(A._grid,FermionGrid());
|
||||
conformable(A._grid,B._grid);
|
||||
@ -358,9 +350,9 @@ void WilsonFermion5D<Impl>::DhopDeriv( GaugeField &mat,
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
conformable(A._grid,FermionRedBlackGrid());
|
||||
conformable(GaugeRedBlackGrid(),mat._grid);
|
||||
@ -376,9 +368,9 @@ void WilsonFermion5D<Impl>::DhopDerivEO(GaugeField &mat,
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
conformable(A._grid,FermionRedBlackGrid());
|
||||
conformable(GaugeRedBlackGrid(),mat._grid);
|
||||
@ -393,10 +385,9 @@ void WilsonFermion5D<Impl>::DhopDerivOE(GaugeField &mat,
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
// assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
Compressor compressor(dag);
|
||||
|
||||
@ -413,27 +404,25 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||
out);
|
||||
Kernels::DiracOptDhopSiteDag(st, lo, U, st.CommBuf(), sF, sU, LLs, 1, in, out);
|
||||
}
|
||||
#ifdef AVX512
|
||||
} else if (stat.is_init() ) {
|
||||
|
||||
int nthreads;
|
||||
stat.start();
|
||||
#pragma omp parallel
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp master
|
||||
#pragma omp master
|
||||
nthreads = omp_get_num_threads();
|
||||
int mythread = omp_get_thread_num();
|
||||
stat.enter(mythread);
|
||||
#pragma omp for nowait
|
||||
for(int ss=0;ss<U._grid->oSites();ss++)
|
||||
{
|
||||
int sU=ss;
|
||||
int sF=LLs*sU;
|
||||
Kernels::DiracOptDhopSite(st,lo,U,st.comm_buf,sF,sU,LLs,1,in,out);
|
||||
}
|
||||
#pragma omp for nowait
|
||||
for(int ss=0;ss<U._grid->oSites();ss++) {
|
||||
int sU=ss;
|
||||
int sF=LLs*sU;
|
||||
Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||
}
|
||||
stat.exit(mythread);
|
||||
}
|
||||
stat.accum(nthreads);
|
||||
@ -443,8 +432,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
for (int ss = 0; ss < U._grid->oSites(); ss++) {
|
||||
int sU = ss;
|
||||
int sF = LLs * sU;
|
||||
Kernels::DiracOptDhopSite(st, lo, U, st.comm_buf, sF, sU, LLs, 1, in,
|
||||
out);
|
||||
Kernels::DiracOptDhopSite(st,lo,U,st.CommBuf(),sF,sU,LLs,1,in,out);
|
||||
}
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
@ -454,6 +442,7 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
@ -465,6 +454,7 @@ void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
conformable(in._grid,FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in._grid,out._grid); // drops the cb check
|
||||
|
||||
@ -476,6 +466,7 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in._grid,FermionGrid()); // verifies full grid
|
||||
conformable(in._grid,out._grid);
|
||||
|
||||
|
@ -34,155 +34,154 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/Stat.h>
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
namespace QCD {
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// This is the 4d red black case appropriate to support
|
||||
//
|
||||
// parity = (x+y+z+t)|2;
|
||||
// generalised five dim fermions like mobius, zolotarev etc..
|
||||
//
|
||||
// i.e. even even contains fifth dim hopping term.
|
||||
//
|
||||
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// This is the 4d red black case appropriate to support
|
||||
//
|
||||
// parity = (x+y+z+t)|2;
|
||||
// generalised five dim fermions like mobius, zolotarev etc..
|
||||
//
|
||||
// i.e. even even contains fifth dim hopping term.
|
||||
//
|
||||
// [DIFFERS from original CPS red black implementation parity = (x+y+z+t+s)|2 ]
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class WilsonFermion5DStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
const int npoint = 8;
|
||||
};
|
||||
|
||||
class WilsonFermion5DStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static const std::vector<int> directions;
|
||||
static const std::vector<int> displacements;
|
||||
const int npoint = 8;
|
||||
};
|
||||
template<class Impl>
|
||||
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef WilsonKernels<Impl> Kernels;
|
||||
PmuStat stat;
|
||||
|
||||
template<class Impl>
|
||||
class WilsonFermion5D : public WilsonKernels<Impl>, public WilsonFermion5DStatic
|
||||
{
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef WilsonKernels<Impl> Kernels;
|
||||
PmuStat stat;
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DerivCalls;
|
||||
double DerivCommTime;
|
||||
double DerivComputeTime;
|
||||
double DerivDhopComputeTime;
|
||||
|
||||
double DerivCalls;
|
||||
double DerivCommTime;
|
||||
double DerivComputeTime;
|
||||
double DerivDhopComputeTime;
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
|
||||
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
|
||||
GridBase *FermionGrid(void) { return _FiveDimGrid;}
|
||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
GridBase *GaugeGrid(void) { return _FourDimGrid ;}
|
||||
GridBase *GaugeRedBlackGrid(void) { return _FourDimRedBlackGrid ;}
|
||||
GridBase *FermionGrid(void) { return _FiveDimGrid;}
|
||||
GridBase *FermionRedBlackGrid(void) { return _FiveDimRedBlackGrid;}
|
||||
// full checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
||||
|
||||
// full checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual RealD M (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
||||
virtual RealD Mdag (const FermionField &in, FermionField &out){assert(0); return 0.0;};
|
||||
// half checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
|
||||
|
||||
// half checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
// These can be overridden by fancy 5d chiral action
|
||||
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
|
||||
// These can be overridden by fancy 5d chiral action
|
||||
virtual void DhopDeriv (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
|
||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||
// Implement s-diagonal DW
|
||||
void DW (const FermionField &in, FermionField &out,int dag);
|
||||
void Dhop (const FermionField &in, FermionField &out,int dag);
|
||||
void DhopOE(const FermionField &in, FermionField &out,int dag);
|
||||
void DhopEO(const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
// Implement hopping term non-hermitian hopping term; half cb or both
|
||||
// Implement s-diagonal DW
|
||||
void DW (const FermionField &in, FermionField &out,int dag);
|
||||
void Dhop (const FermionField &in, FermionField &out,int dag);
|
||||
void DhopOE(const FermionField &in, FermionField &out,int dag);
|
||||
void DhopEO(const FermionField &in, FermionField &out,int dag);
|
||||
|
||||
// add a DhopComm
|
||||
// add a DhopComm
|
||||
// -- suboptimal interface will presently trigger multiple comms.
|
||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// New methods added
|
||||
///////////////////////////////////////////////////////////////
|
||||
void DerivInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag);
|
||||
///////////////////////////////////////////////////////////////
|
||||
// New methods added
|
||||
///////////////////////////////////////////////////////////////
|
||||
void DerivInternal(StencilImpl & st,
|
||||
DoubledGaugeField & U,
|
||||
GaugeField &mat,
|
||||
const FermionField &A,
|
||||
const FermionField &B,
|
||||
int dag);
|
||||
|
||||
void DhopInternal(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
void DhopInternal(StencilImpl & st,
|
||||
LebesgueOrder &lo,
|
||||
DoubledGaugeField &U,
|
||||
const FermionField &in,
|
||||
FermionField &out,
|
||||
int dag);
|
||||
|
||||
// Constructors
|
||||
WilsonFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
// Constructors
|
||||
WilsonFermion5D(GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
// Constructors
|
||||
/*
|
||||
// Constructors
|
||||
/*
|
||||
WilsonFermion5D(int simd,
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
*/
|
||||
GaugeField &_Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
double _M5,const ImplParams &p= ImplParams());
|
||||
*/
|
||||
|
||||
// DoubleStore
|
||||
void ImportGauge(const GaugeField &_Umu);
|
||||
// DoubleStore
|
||||
void ImportGauge(const GaugeField &_Umu);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
public:
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Data members require to support the functionality
|
||||
///////////////////////////////////////////////////////////////
|
||||
public:
|
||||
|
||||
// Add these to the support from Wilson
|
||||
GridBase *_FourDimGrid;
|
||||
GridBase *_FourDimRedBlackGrid;
|
||||
GridBase *_FiveDimGrid;
|
||||
GridBase *_FiveDimRedBlackGrid;
|
||||
// Add these to the support from Wilson
|
||||
GridBase *_FourDimGrid;
|
||||
GridBase *_FourDimRedBlackGrid;
|
||||
GridBase *_FiveDimGrid;
|
||||
GridBase *_FiveDimRedBlackGrid;
|
||||
|
||||
double M5;
|
||||
int Ls;
|
||||
double M5;
|
||||
int Ls;
|
||||
|
||||
//Defines the stencils for even and odd
|
||||
StencilImpl Stencil;
|
||||
StencilImpl StencilEven;
|
||||
StencilImpl StencilOdd;
|
||||
//Defines the stencils for even and odd
|
||||
StencilImpl Stencil;
|
||||
StencilImpl StencilEven;
|
||||
StencilImpl StencilOdd;
|
||||
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
DoubledGaugeField Umu;
|
||||
DoubledGaugeField UmuEven;
|
||||
DoubledGaugeField UmuOdd;
|
||||
// Copy of the gauge field , with even and odd subsets
|
||||
DoubledGaugeField Umu;
|
||||
DoubledGaugeField UmuEven;
|
||||
DoubledGaugeField UmuOdd;
|
||||
|
||||
LebesgueOrder Lebesgue;
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
LebesgueOrder Lebesgue;
|
||||
LebesgueOrder LebesgueEvenOdd;
|
||||
|
||||
// Comms buffer
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
// Comms buffer
|
||||
std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> > comm_buf;
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
@ -43,10 +43,9 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
|
||||
////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out) {
|
||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out) {
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
@ -220,10 +219,9 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
|
||||
|
||||
// Need controls to do interior, exterior, or both
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out) {
|
||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out) {
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteHalfSpinor *chi_p;
|
||||
@ -396,10 +394,9 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptDhopDir(
|
||||
StencilImpl &st, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||
void WilsonKernels<Impl>::DiracOptDhopDir( StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor *buf, int sF,
|
||||
int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
|
||||
|
||||
SiteHalfSpinor tmp;
|
||||
SiteHalfSpinor chi;
|
||||
SiteSpinor result;
|
||||
|
@ -32,175 +32,132 @@ directory
|
||||
#define GRID_QCD_DHOP_H
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
namespace QCD {
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper routines that implement Wilson stencil for a single site.
|
||||
// Common to both the WilsonFermion and WilsonFermion5D
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class WilsonKernelsStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int AsmOpt; // these are a temporary hack
|
||||
static int HandOpt; // these are a temporary hack
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper routines that implement Wilson stencil for a single site.
|
||||
// Common to both the WilsonFermion and WilsonFermion5D
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class WilsonKernelsStatic {
|
||||
public:
|
||||
// S-direction is INNERMOST and takes no part in the parity.
|
||||
static int AsmOpt; // these are a temporary hack
|
||||
static int HandOpt; // these are a temporary hack
|
||||
};
|
||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
||||
public:
|
||||
|
||||
template<class Impl> class WilsonKernels : public FermionOperator<Impl> , public WilsonKernelsStatic {
|
||||
public:
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
typedef FermionOperator<Impl> Base;
|
||||
public:
|
||||
|
||||
public:
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||
DiracOptDhopSite(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||
FermionField &out) {
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
|
||||
DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||
#ifdef AVX512
|
||||
if (AsmOpt) {
|
||||
WilsonKernels<Impl>::DiracOptAsmDhopSite(st, lo, U, buf, sF, sU, Ls, Ns,
|
||||
in, out);
|
||||
|
||||
} else {
|
||||
if (AsmOpt) {
|
||||
WilsonKernels<Impl>::DiracOptAsmDhopSite(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
{
|
||||
#endif
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if (HandOpt)
|
||||
WilsonKernels<Impl>::DiracOptHandDhopSite(st, lo, U, buf, sF, sU,
|
||||
in, out);
|
||||
else
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU,
|
||||
in, out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if (HandOpt)
|
||||
WilsonKernels<Impl>::DiracOptHandDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
else
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSite(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||
DiracOptDhopSite(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||
FermionField &out) {
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in,
|
||||
out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,
|
||||
void>::type
|
||||
DiracOptDhopSiteDag(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||
FermionField &out) {
|
||||
#ifdef AVX512
|
||||
if (AsmOpt) {
|
||||
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st, lo, U, buf, sF, sU, Ls,
|
||||
Ns, in, out);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if (HandOpt)
|
||||
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st, lo, U, buf, sF, sU,
|
||||
in, out);
|
||||
else
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF,
|
||||
sU, in, out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<
|
||||
(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,
|
||||
void>::type
|
||||
DiracOptDhopSiteDag(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||
FermionField &out) {
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st, lo, U, buf, sF, sU,
|
||||
in, out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
void DiracOptDhopDir(
|
||||
StencilImpl &st, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
|
||||
int gamma);
|
||||
|
||||
private:
|
||||
// Specialised variants
|
||||
void DiracOptGenericDhopSite(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptGenericDhopSiteDag(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptAsmDhopSite(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||
FermionField &out);
|
||||
|
||||
void DiracOptAsmDhopSiteDag(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,
|
||||
FermionField &out);
|
||||
|
||||
void DiracOptHandDhopSite(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptHandDhopSiteDag(
|
||||
StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
public:
|
||||
WilsonKernels(const ImplParams &p = ImplParams());
|
||||
};
|
||||
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
|
||||
DiracOptDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSite(st, lo, U, buf, sF, sU, in, out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<Impl::Dimension == 3 && Nc == 3 && EnableBool,void>::type
|
||||
DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||
#ifdef AVX512
|
||||
if (AsmOpt) {
|
||||
WilsonKernels<Impl>::DiracOptAsmDhopSiteDag(st,lo,U,buf,sF,sU,Ls,Ns,in,out);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
if (HandOpt)
|
||||
WilsonKernels<Impl>::DiracOptHandDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||
else
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool EnableBool = true>
|
||||
typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool,void>::type
|
||||
DiracOptDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out) {
|
||||
|
||||
for (int site = 0; site < Ns; site++) {
|
||||
for (int s = 0; s < Ls; s++) {
|
||||
WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(st,lo,U,buf,sF,sU,in,out);
|
||||
sF++;
|
||||
}
|
||||
sU++;
|
||||
}
|
||||
}
|
||||
|
||||
void DiracOptDhopDir(StencilImpl &st, DoubledGaugeField &U,SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out, int dirdisp, int gamma);
|
||||
|
||||
private:
|
||||
// Specialised variants
|
||||
void DiracOptGenericDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptGenericDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptAsmDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in,FermionField &out);
|
||||
|
||||
void DiracOptAsmDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, int Ls, int Ns, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptHandDhopSite(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
void DiracOptHandDhopSiteDag(StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U, SiteHalfSpinor * buf,
|
||||
int sF, int sU, const FermionField &in, FermionField &out);
|
||||
|
||||
public:
|
||||
|
||||
WilsonKernels(const ImplParams &p = ImplParams());
|
||||
|
||||
};
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
@ -33,31 +33,27 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
|
||||
namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Default to no assembler implementation
|
||||
///////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
namespace QCD {
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Default to no assembler implementation
|
||||
///////////////////////////////////////////////////////////
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
#if defined(AVX512)
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are AVX512 specialise the single precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
@ -65,16 +61,16 @@ namespace Grid {
|
||||
#include <simd/Intel512wilson.h>
|
||||
#include <simd/Intel512single.h>
|
||||
|
||||
static Vector<vComplexF> signs;
|
||||
static Vector<vComplexF> signs;
|
||||
|
||||
int setupSigns(void ){
|
||||
Vector<vComplexF> bother(2);
|
||||
signs = bother;
|
||||
vrsign(signs[0]);
|
||||
visign(signs[1]);
|
||||
return 1;
|
||||
}
|
||||
static int signInit = setupSigns();
|
||||
int setupSigns(void ){
|
||||
Vector<vComplexF> bother(2);
|
||||
signs = bother;
|
||||
vrsign(signs[0]);
|
||||
visign(signs[1]);
|
||||
return 1;
|
||||
}
|
||||
static int signInit = setupSigns();
|
||||
|
||||
#define label(A) ilabel(A)
|
||||
#define ilabel(A) ".globl\n" #A ":\n"
|
||||
@ -84,17 +80,15 @@ namespace Grid {
|
||||
#define FX(A) WILSONASM_ ##A
|
||||
|
||||
#undef KERNEL_DAG
|
||||
template<>
|
||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#define KERNEL_DAG
|
||||
template<>
|
||||
void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#undef VMOVIDUP
|
||||
@ -109,31 +103,26 @@ namespace Grid {
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||
|
||||
#undef KERNEL_DAG
|
||||
template<>
|
||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#define KERNEL_DAG
|
||||
template<>
|
||||
void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define INSTANTIATE_ASM(A)\
|
||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
||||
commVector<SiteHalfSpinor> &buf,\
|
||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
|
||||
commVector<SiteHalfSpinor> &buf,\
|
||||
\
|
||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
|
||||
|
||||
|
||||
INSTANTIATE_ASM(WilsonImplF);
|
||||
INSTANTIATE_ASM(WilsonImplD);
|
||||
INSTANTIATE_ASM(ZWilsonImplF);
|
||||
@ -144,6 +133,6 @@ INSTANTIATE_ASM(DomainWallVec5dImplF);
|
||||
INSTANTIATE_ASM(DomainWallVec5dImplD);
|
||||
INSTANTIATE_ASM(ZDomainWallVec5dImplF);
|
||||
INSTANTIATE_ASM(ZDomainWallVec5dImplD);
|
||||
}
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
|
@ -311,10 +311,9 @@ namespace Grid {
|
||||
namespace QCD {
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
template<class Impl> void
|
||||
WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
typedef typename Simd::scalar_type S;
|
||||
typedef typename Simd::vector_type V;
|
||||
@ -554,10 +553,9 @@ namespace QCD {
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
template<class Impl>
|
||||
void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
// std::cout << "Hand op Dhop "<<std::endl;
|
||||
typedef typename Simd::scalar_type S;
|
||||
@ -798,38 +796,35 @@ namespace QCD {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////
|
||||
// Specialise Gparity to simple implementation
|
||||
////////////////////////////////////////////////
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
SiteHalfSpinor *buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<>
|
||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
|
||||
commVector<SiteHalfSpinor> &buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
template<> void
|
||||
WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int sF,int sU,const FermionField &in, FermionField &out)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
@ -840,12 +835,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
|
||||
// Need Nc=3 though //
|
||||
|
||||
#define INSTANTIATE_THEM(A) \
|
||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||
commVector<SiteHalfSpinor> &buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out);\
|
||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
|
||||
commVector<SiteHalfSpinor> &buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out); \
|
||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,SiteHalfSpinor *buf,\
|
||||
int ss,int sU,const FermionField &in, FermionField &out);
|
||||
|
||||
INSTANTIATE_THEM(WilsonImplF);
|
||||
INSTANTIATE_THEM(WilsonImplD);
|
||||
|
@ -42,20 +42,14 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
namespace Grid{
|
||||
namespace Optimization {
|
||||
|
||||
template<class vtype>
|
||||
union uconv {
|
||||
__m512 f;
|
||||
vtype v;
|
||||
};
|
||||
|
||||
union u512f {
|
||||
__m512 v;
|
||||
float f[8];
|
||||
float f[16];
|
||||
};
|
||||
|
||||
union u512d {
|
||||
__m512 v;
|
||||
double f[4];
|
||||
__m512d v;
|
||||
double f[8];
|
||||
};
|
||||
|
||||
struct Vsplat{
|
||||
@ -377,9 +371,14 @@ namespace Optimization {
|
||||
// Some Template specialization
|
||||
|
||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||
<<<<<<< HEAD
|
||||
#define GNU_CLANG_COMPILER
|
||||
#ifdef GNU_CLANG_COMPILER
|
||||
=======
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||
>>>>>>> develop
|
||||
//Complex float Reduce
|
||||
template<>
|
||||
inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
|
||||
|
@ -116,7 +116,7 @@ int main (int argc, char ** argv)
|
||||
else if (SE->_is_local)
|
||||
Check._odata[i] = Foo._odata[SE->_offset];
|
||||
else
|
||||
Check._odata[i] = myStencil.comm_buf[SE->_offset];
|
||||
Check._odata[i] = myStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
|
||||
Real nrmC = norm2(Check);
|
||||
@ -207,7 +207,7 @@ int main (int argc, char ** argv)
|
||||
else if (SE->_is_local)
|
||||
OCheck._odata[i] = EFoo._odata[SE->_offset];
|
||||
else
|
||||
OCheck._odata[i] = EStencil.comm_buf[SE->_offset];
|
||||
OCheck._odata[i] = EStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
for(int i=0;i<ECheck._grid->oSites();i++){
|
||||
int permute_type;
|
||||
@ -220,7 +220,7 @@ int main (int argc, char ** argv)
|
||||
else if (SE->_is_local)
|
||||
ECheck._odata[i] = OFoo._odata[SE->_offset];
|
||||
else
|
||||
ECheck._odata[i] = OStencil.comm_buf[SE->_offset];
|
||||
ECheck._odata[i] = OStencil.CommBuf()[SE->_offset];
|
||||
}
|
||||
|
||||
setCheckerboard(Check,ECheck);
|
||||
|
Loading…
x
Reference in New Issue
Block a user