mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	commVector
This commit is contained in:
		
							
								
								
									
										358
									
								
								lib/communicator/Communicator_mpi3.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										358
									
								
								lib/communicator/Communicator_mpi3.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,358 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/communicator/Communicator_mpi.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include "Grid.h"
 | 
				
			||||||
 | 
					#include <mpi.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Global used by Init and nowhere else. How to hide?
 | 
				
			||||||
 | 
					int Rank(void) {
 | 
				
			||||||
 | 
					  int pe;
 | 
				
			||||||
 | 
					  MPI_Comm_rank(MPI_COMM_WORLD,&pe);
 | 
				
			||||||
 | 
					  return pe;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					  // Should error check all MPI calls.
 | 
				
			||||||
 | 
					void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
				
			||||||
 | 
					  int flag;
 | 
				
			||||||
 | 
					  MPI_Initialized(&flag); // needed to coexist with other libs apparently
 | 
				
			||||||
 | 
					  if ( !flag ) {
 | 
				
			||||||
 | 
					    MPI_Init(argc,argv);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Want to implement some magic ... Group sub-cubes into those on same node
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> coor = _processor_coor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(std::abs(shift) <_processors[dim]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  coor[dim] = (_processor_coor[dim] + shift + _processors[dim])%_processors[dim];
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(coor,source,_processors);
 | 
				
			||||||
 | 
					  source = LexicographicToWorldRank[source];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  coor[dim] = (_processor_coor[dim] - shift + _processors[dim])%_processors[dim];
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(coor,dest,_processors);
 | 
				
			||||||
 | 
					  dest = LexicographicToWorldRank[dest];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(coor,rank,_processors);
 | 
				
			||||||
 | 
					  rank = LexicographicToWorldRank[rank];
 | 
				
			||||||
 | 
					  return rank;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void  CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Lexicographic::CoorFromIndex(coor,rank,_processors);
 | 
				
			||||||
 | 
					  rank = LexicographicToWorldRank[rank];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
				
			||||||
 | 
					{ 
 | 
				
			||||||
 | 
					  _ndimension = processors.size();
 | 
				
			||||||
 | 
					  std::cout << "Creating "<< _ndimension << " dim communicator "<<std::endl;
 | 
				
			||||||
 | 
					  for(int d =0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    std::cout << processors[d]<<" ";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  WorldDims = processors;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  communicator = MPI_COMM_WORLD;
 | 
				
			||||||
 | 
					  MPI_Comm shmcomm;
 | 
				
			||||||
 | 
					  MPI_Comm_split_type(communicator, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL,&shmcomm);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(communicator,&WorldRank);
 | 
				
			||||||
 | 
					  MPI_Comm_size(communicator,&WorldSize);
 | 
				
			||||||
 | 
					  MPI_Comm_rank(shmcomm     ,&ShmRank);
 | 
				
			||||||
 | 
					  MPI_Comm_size(shmcomm     ,&ShmSize);
 | 
				
			||||||
 | 
					  GroupSize = WorldSize/ShmSize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<< "Ranks per node "<< ShmSize << std::endl;
 | 
				
			||||||
 | 
					  std::cout<< "Nodes          "<< GroupSize << std::endl;
 | 
				
			||||||
 | 
					  std::cout<< "Ranks          "<< WorldSize << std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Assert power of two shm_size.
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int log2size = -1;
 | 
				
			||||||
 | 
					  for(int i=0;i<=MAXLOG2RANKSPERNODE;i++){  
 | 
				
			||||||
 | 
					    if ( (0x1<<i) == ShmSize ) {
 | 
				
			||||||
 | 
					      log2size = i;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(log2size != -1);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Identify subblock of ranks on node spreading across dims
 | 
				
			||||||
 | 
					  // in a maximally symmetrical way
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int dim = 0;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ShmDims.resize(_ndimension,1);
 | 
				
			||||||
 | 
					  GroupDims.resize(_ndimension);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ShmCoor.resize(_ndimension);
 | 
				
			||||||
 | 
					  GroupCoor.resize(_ndimension);
 | 
				
			||||||
 | 
					  WorldCoor.resize(_ndimension);
 | 
				
			||||||
 | 
					  for(int l2=0;l2<log2size;l2++){
 | 
				
			||||||
 | 
					    while ( WorldDims[dim] / ShmDims[dim] <= 1 ) dim=(dim+1)%_ndimension;
 | 
				
			||||||
 | 
					    ShmDims[dim]*=2;
 | 
				
			||||||
 | 
					    dim=(dim+1)%_ndimension;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout << "Shm group dims "<<std::endl;
 | 
				
			||||||
 | 
					  for(int d =0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    std::cout << ShmDims[d]<<" ";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Establish torus of processes and nodes with sub-blockings
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  for(int d=0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    GroupDims[d] = WorldDims[d]/ShmDims[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << "Group dims "<<std::endl;
 | 
				
			||||||
 | 
					  for(int d =0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    std::cout << GroupDims[d]<<" ";
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					  std::cout << std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  MPI_Group WorldGroup, ShmGroup;
 | 
				
			||||||
 | 
					  MPI_Comm_group (communicator, &WorldGroup); 
 | 
				
			||||||
 | 
					  MPI_Comm_group (shmcomm, &ShmGroup);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<int> world_ranks(WorldSize); 
 | 
				
			||||||
 | 
					  std::vector<int> group_ranks(WorldSize); 
 | 
				
			||||||
 | 
					  std::vector<int> mygroup(GroupSize);
 | 
				
			||||||
 | 
					  for(int r=0;r<WorldSize;r++) world_ranks[r]=r;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  MPI_Group_translate_ranks (WorldGroup,WorldSize,&world_ranks[0],ShmGroup, &group_ranks[0]); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Check processor counts match
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  _Nprocessors=1;
 | 
				
			||||||
 | 
					  _processors = processors;
 | 
				
			||||||
 | 
					  _processor_coor.resize(_ndimension);
 | 
				
			||||||
 | 
					  for(int i=0;i<_ndimension;i++){
 | 
				
			||||||
 | 
					    std::cout << " p " << _processors[i]<<std::endl;
 | 
				
			||||||
 | 
					    _Nprocessors*=_processors[i];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << " World " <<WorldSize <<" Nproc "<<_Nprocessors<<std::endl;
 | 
				
			||||||
 | 
					  assert(WorldSize==_Nprocessors);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Identify who is in my group and noninate the leader
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int g=0;
 | 
				
			||||||
 | 
					  for(int rank=0;rank<WorldSize;rank++){
 | 
				
			||||||
 | 
					    if(group_ranks[rank]!=MPI_UNDEFINED){
 | 
				
			||||||
 | 
						  mygroup[g] = rank;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::sort(mygroup.begin(),mygroup.end(),std::greater<int>());
 | 
				
			||||||
 | 
					  int myleader = mygroup[0];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::vector<int> leaders_1hot(WorldSize,0);
 | 
				
			||||||
 | 
					  std::vector<int> leaders_group(GroupSize,0);
 | 
				
			||||||
 | 
					  leaders_1hot [ myleader ] = 1;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // global sum leaders over comm world
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&leaders_1hot[0],WorldSize,MPI_INT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // find the group leaders world rank
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int group=0;
 | 
				
			||||||
 | 
					  for(int l=0;l<WorldSize;l++){
 | 
				
			||||||
 | 
					    if(leaders_1hot[l]){
 | 
				
			||||||
 | 
					      leaders_group[group++] = l;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Identify the rank of the group in which I (and my leader) live
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  GroupRank=-1;
 | 
				
			||||||
 | 
					  for(int g=0;g<GroupSize;g++){
 | 
				
			||||||
 | 
					    if (myleader == leaders_group[g]){
 | 
				
			||||||
 | 
					      GroupRank=g;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(GroupRank!=-1);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Establish mapping between lexico physics coord and WorldRank
 | 
				
			||||||
 | 
					  // 
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  LexicographicToWorldRank.resize(WorldSize,0);
 | 
				
			||||||
 | 
					  Lexicographic::CoorFromIndex(GroupCoor,GroupRank,GroupDims);
 | 
				
			||||||
 | 
					  Lexicographic::CoorFromIndex(ShmCoor,ShmRank,ShmDims);
 | 
				
			||||||
 | 
					  for(int d=0;d<_ndimension;d++){
 | 
				
			||||||
 | 
					    WorldCoor[d] = GroupCoor[d]*ShmDims[d]+ShmCoor[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  _processor_coor = WorldCoor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int lexico;
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(WorldCoor,lexico,WorldDims);
 | 
				
			||||||
 | 
					  LexicographicToWorldRank[lexico]=WorldRank;
 | 
				
			||||||
 | 
					  _processor = lexico;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // global sum Lexico to World mapping
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  ierr=MPI_Allreduce(MPI_IN_PLACE,&LexicographicToWorldRank[0],WorldSize,MPI_INT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(uint32_t &u){
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(uint64_t &u){
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(float &f){
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSumVector(float *f,int N)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr=MPI_Allreduce(MPI_IN_PLACE,f,N,MPI_FLOAT,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSum(double &d)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr = MPI_Allreduce(MPI_IN_PLACE,&d,1,MPI_DOUBLE,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::GlobalSumVector(double *d,int N)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr = MPI_Allreduce(MPI_IN_PLACE,d,N,MPI_DOUBLE,MPI_SUM,communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Basic Halo comms primitive
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
				
			||||||
 | 
										   int dest,
 | 
				
			||||||
 | 
										   void *recv,
 | 
				
			||||||
 | 
										   int from,
 | 
				
			||||||
 | 
										   int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<CommsRequest_t> reqs(0);
 | 
				
			||||||
 | 
					  SendToRecvFromBegin(reqs,xmit,dest,recv,from,bytes);
 | 
				
			||||||
 | 
					  SendToRecvFromComplete(reqs);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
				
			||||||
 | 
										   void *recv,
 | 
				
			||||||
 | 
										   int sender,
 | 
				
			||||||
 | 
										   int receiver,
 | 
				
			||||||
 | 
										   int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MPI_Status stat;
 | 
				
			||||||
 | 
					  assert(sender != receiver);
 | 
				
			||||||
 | 
					  int tag = sender;
 | 
				
			||||||
 | 
					  if ( _processor == sender ) {
 | 
				
			||||||
 | 
					    MPI_Send(xmit, bytes, MPI_CHAR,receiver,tag,communicator);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if ( _processor == receiver ) { 
 | 
				
			||||||
 | 
					    MPI_Recv(recv, bytes, MPI_CHAR,sender,tag,communicator,&stat);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Basic Halo comms primitive
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
				
			||||||
 | 
											void *xmit,
 | 
				
			||||||
 | 
											int dest,
 | 
				
			||||||
 | 
											void *recv,
 | 
				
			||||||
 | 
											int from,
 | 
				
			||||||
 | 
											int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  MPI_Request xrq;
 | 
				
			||||||
 | 
					  MPI_Request rrq;
 | 
				
			||||||
 | 
					  int rank = _processor;
 | 
				
			||||||
 | 
					  int ierr;
 | 
				
			||||||
 | 
					  ierr =MPI_Isend(xmit, bytes, MPI_CHAR,dest,_processor,communicator,&xrq);
 | 
				
			||||||
 | 
					  ierr|=MPI_Irecv(recv, bytes, MPI_CHAR,from,from,communicator,&rrq);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  list.push_back(xrq);
 | 
				
			||||||
 | 
					  list.push_back(rrq);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					void CartesianCommunicator::SendToRecvFromComplete(std::vector<CommsRequest_t> &list)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int nreq=list.size();
 | 
				
			||||||
 | 
					  std::vector<MPI_Status> status(nreq);
 | 
				
			||||||
 | 
					  int ierr = MPI_Waitall(nreq,&list[0],&status[0]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::Barrier(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr = MPI_Barrier(communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::Broadcast(int root,void* data, int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr=MPI_Bcast(data,
 | 
				
			||||||
 | 
							     bytes,
 | 
				
			||||||
 | 
							     MPI_BYTE,
 | 
				
			||||||
 | 
							     root,
 | 
				
			||||||
 | 
							     communicator);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int ierr= MPI_Bcast(data,
 | 
				
			||||||
 | 
							      bytes,
 | 
				
			||||||
 | 
							      MPI_BYTE,
 | 
				
			||||||
 | 
							      root,
 | 
				
			||||||
 | 
							      MPI_COMM_WORLD);
 | 
				
			||||||
 | 
					  assert(ierr==0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -45,7 +45,7 @@ WilsonKernels<Impl>::WilsonKernels(const ImplParams &p) : Base(p){};
 | 
				
			|||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
					void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
				
			||||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
					    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
					    commVector<SiteHalfSpinor> &buf, int sF,
 | 
				
			||||||
    int sU, const FermionField &in, FermionField &out) {
 | 
					    int sU, const FermionField &in, FermionField &out) {
 | 
				
			||||||
  SiteHalfSpinor tmp;
 | 
					  SiteHalfSpinor tmp;
 | 
				
			||||||
  SiteHalfSpinor chi;
 | 
					  SiteHalfSpinor chi;
 | 
				
			||||||
@@ -222,7 +222,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSiteDag(
 | 
				
			|||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
					void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
				
			||||||
    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
					    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
					    commVector<SiteHalfSpinor> &buf, int sF,
 | 
				
			||||||
    int sU, const FermionField &in, FermionField &out) {
 | 
					    int sU, const FermionField &in, FermionField &out) {
 | 
				
			||||||
  SiteHalfSpinor tmp;
 | 
					  SiteHalfSpinor tmp;
 | 
				
			||||||
  SiteHalfSpinor chi;
 | 
					  SiteHalfSpinor chi;
 | 
				
			||||||
@@ -398,7 +398,7 @@ void WilsonKernels<Impl>::DiracOptGenericDhopSite(
 | 
				
			|||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonKernels<Impl>::DiracOptDhopDir(
 | 
					void WilsonKernels<Impl>::DiracOptDhopDir(
 | 
				
			||||||
    StencilImpl &st, DoubledGaugeField &U,
 | 
					    StencilImpl &st, DoubledGaugeField &U,
 | 
				
			||||||
    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf, int sF,
 | 
					    commVector<SiteHalfSpinor> &buf, int sF,
 | 
				
			||||||
    int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
 | 
					    int sU, const FermionField &in, FermionField &out, int dir, int gamma) {
 | 
				
			||||||
  SiteHalfSpinor tmp;
 | 
					  SiteHalfSpinor tmp;
 | 
				
			||||||
  SiteHalfSpinor chi;
 | 
					  SiteHalfSpinor chi;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -58,7 +58,7 @@ namespace Grid {
 | 
				
			|||||||
      typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
					      typename std::enable_if<Impl::Dimension == 3 && Nc == 3 &&EnableBool, void>::type
 | 
				
			||||||
	DiracOptDhopSite(
 | 
						DiracOptDhopSite(
 | 
				
			||||||
			 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
			 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
								 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
			 int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
								 int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
			 FermionField &out) {
 | 
								 FermionField &out) {
 | 
				
			||||||
#ifdef AVX512
 | 
					#ifdef AVX512
 | 
				
			||||||
@@ -89,7 +89,7 @@ namespace Grid {
 | 
				
			|||||||
	  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
						  typename std::enable_if<(Impl::Dimension != 3 || (Impl::Dimension == 3 && Nc != 3)) && EnableBool, void>::type
 | 
				
			||||||
	  DiracOptDhopSite(
 | 
						  DiracOptDhopSite(
 | 
				
			||||||
			   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
								   StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
			   std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
								   commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
			   int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
								   int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
			   FermionField &out) {
 | 
								   FermionField &out) {
 | 
				
			||||||
	  for (int site = 0; site < Ns; site++) {
 | 
						  for (int site = 0; site < Ns; site++) {
 | 
				
			||||||
@@ -107,7 +107,7 @@ namespace Grid {
 | 
				
			|||||||
				  void>::type
 | 
									  void>::type
 | 
				
			||||||
	  DiracOptDhopSiteDag(
 | 
						  DiracOptDhopSiteDag(
 | 
				
			||||||
			      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
								      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
			      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
								      commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
			      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
								      int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
			      FermionField &out) {
 | 
								      FermionField &out) {
 | 
				
			||||||
#ifdef AVX512
 | 
					#ifdef AVX512
 | 
				
			||||||
@@ -139,7 +139,7 @@ namespace Grid {
 | 
				
			|||||||
				      void>::type
 | 
									      void>::type
 | 
				
			||||||
				      DiracOptDhopSiteDag(
 | 
									      DiracOptDhopSiteDag(
 | 
				
			||||||
							  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
												  StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
							  std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												  commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							  int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
												  int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
							  FermionField &out) {
 | 
												  FermionField &out) {
 | 
				
			||||||
					for (int site = 0; site < Ns; site++) {
 | 
										for (int site = 0; site < Ns; site++) {
 | 
				
			||||||
@@ -154,7 +154,7 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
				    void DiracOptDhopDir(
 | 
									    void DiracOptDhopDir(
 | 
				
			||||||
							 StencilImpl &st, DoubledGaugeField &U,
 | 
												 StencilImpl &st, DoubledGaugeField &U,
 | 
				
			||||||
							 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
												 int sF, int sU, const FermionField &in, FermionField &out, int dirdisp,
 | 
				
			||||||
							 int gamma);
 | 
												 int gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -162,34 +162,34 @@ namespace Grid {
 | 
				
			|||||||
				    // Specialised variants
 | 
									    // Specialised variants
 | 
				
			||||||
				    void DiracOptGenericDhopSite(
 | 
									    void DiracOptGenericDhopSite(
 | 
				
			||||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
													 int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptGenericDhopSiteDag(
 | 
									    void DiracOptGenericDhopSiteDag(
 | 
				
			||||||
								    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													    StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								    std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													    commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								    int sF, int sU, const FermionField &in, FermionField &out);
 | 
													    int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptAsmDhopSite(
 | 
									    void DiracOptAsmDhopSite(
 | 
				
			||||||
							     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
												     StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												     commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							     int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
												     int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
							     FermionField &out);
 | 
												     FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptAsmDhopSiteDag(
 | 
									    void DiracOptAsmDhopSiteDag(
 | 
				
			||||||
								StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
													int sF, int sU, int Ls, int Ns, const FermionField &in,
 | 
				
			||||||
								FermionField &out);
 | 
													FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptHandDhopSite(
 | 
									    void DiracOptHandDhopSite(
 | 
				
			||||||
							      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
												      StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
							      std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
												      commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
							      int sF, int sU, const FermionField &in, FermionField &out);
 | 
												      int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				    void DiracOptHandDhopSiteDag(
 | 
									    void DiracOptHandDhopSiteDag(
 | 
				
			||||||
								 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
													 StencilImpl &st, LebesgueOrder &lo, DoubledGaugeField &U,
 | 
				
			||||||
								 std::vector<SiteHalfSpinor, alignedAllocator<SiteHalfSpinor> > &buf,
 | 
													 commVector<SiteHalfSpinor> &buf,
 | 
				
			||||||
								 int sF, int sU, const FermionField &in, FermionField &out);
 | 
													 int sF, int sU, const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	public:
 | 
						public:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,14 +40,14 @@ namespace Grid {
 | 
				
			|||||||
    ///////////////////////////////////////////////////////////
 | 
					    ///////////////////////////////////////////////////////////
 | 
				
			||||||
    template<class Impl>
 | 
					    template<class Impl>
 | 
				
			||||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					      void WilsonKernels<Impl >::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
                             std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					                             commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
                             int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
					                             int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      assert(0);
 | 
					      assert(0);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    template<class Impl>
 | 
					    template<class Impl>
 | 
				
			||||||
      void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					      void WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
                                std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
					                                commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
                                int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
					                                int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      assert(0);
 | 
					      assert(0);
 | 
				
			||||||
@@ -86,14 +86,14 @@ namespace Grid {
 | 
				
			|||||||
#undef KERNEL_DAG
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
							 std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												 commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
												 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
#define KERNEL_DAG
 | 
					#define KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
							    std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												    commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
												    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
				    
 | 
									    
 | 
				
			||||||
@@ -111,14 +111,14 @@ namespace Grid {
 | 
				
			|||||||
#undef KERNEL_DAG
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													  commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
													  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
				    
 | 
									    
 | 
				
			||||||
#define KERNEL_DAG
 | 
					#define KERNEL_DAG
 | 
				
			||||||
    template<>
 | 
					    template<>
 | 
				
			||||||
    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
					    void WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,
 | 
				
			||||||
								     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													     commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
													     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
				    
 | 
									    
 | 
				
			||||||
@@ -127,10 +127,10 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define INSTANTIATE_ASM(A)\
 | 
					#define INSTANTIATE_ASM(A)\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
				
			||||||
                                   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
					                                   commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,\
 | 
				
			||||||
                                   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
					                                   commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -313,7 +313,7 @@ namespace QCD {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
  void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					  void WilsonKernels<Impl>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										       commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
										       int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  typedef typename Simd::scalar_type S;
 | 
					  typedef typename Simd::scalar_type S;
 | 
				
			||||||
@@ -556,7 +556,7 @@ namespace QCD {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  template<class Impl>
 | 
					  template<class Impl>
 | 
				
			||||||
  void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					  void WilsonKernels<Impl>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
					       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
										       commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
					       int ss,int sU,const FermionField &in, FermionField &out)
 | 
										       int ss,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
					  //  std::cout << "Hand op Dhop "<<std::endl;
 | 
				
			||||||
@@ -804,7 +804,7 @@ namespace QCD {
 | 
				
			|||||||
  ////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												     commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -812,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,Leb
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								int sF,int sU,const FermionField &in, FermionField &out)
 | 
													int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -820,7 +820,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
												     commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
							     int sF,int sU,const FermionField &in, FermionField &out)
 | 
												     int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -828,7 +828,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Leb
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<>
 | 
					template<>
 | 
				
			||||||
void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
					void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,
 | 
				
			||||||
								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
 | 
													commVector<SiteHalfSpinor>  &buf,
 | 
				
			||||||
								int sF,int sU,const FermionField &in, FermionField &out)
 | 
													int sF,int sU,const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
@@ -841,10 +841,10 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#define INSTANTIATE_THEM(A) \
 | 
					#define INSTANTIATE_THEM(A) \
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptHandDhopSite(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
				
			||||||
							       std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
												       commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
							       int ss,int sU,const FermionField &in, FermionField &out);\
 | 
												       int ss,int sU,const FermionField &in, FermionField &out);\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
					template void WilsonKernels<A>::DiracOptHandDhopSiteDag(StencilImpl &st,LebesgueOrder &lo,DoubledGaugeField &U,\
 | 
				
			||||||
								  std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,\
 | 
													  commVector<SiteHalfSpinor>  &buf,\
 | 
				
			||||||
								  int ss,int sU,const FermionField &in, FermionField &out);
 | 
													  int ss,int sU,const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
INSTANTIATE_THEM(WilsonImplF);
 | 
					INSTANTIATE_THEM(WilsonImplF);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user