mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Compare commits
	
		
			17 Commits
		
	
	
		
			hotfix/unw
			...
			feature/S2
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					d3ca16c76d | ||
| 
						 | 
					d81d00a889 | ||
| 
						 | 
					d0ee38d1da | ||
| 
						 | 
					da8dc3da0d | ||
| 
						 | 
					21514d8487 | ||
| 
						 | 
					77b2e9fb61 | ||
| 
						 | 
					a71ba05bd7 | ||
| 
						 | 
					1e95e64035 | ||
| 
						 | 
					defcac92ab | ||
| 
						 | 
					4869378f1e | ||
| 
						 | 
					c7b74db317 | ||
| 
						 | 
					0ce201efbe | ||
| 
						 | 
					6d8a3d8bb2 | ||
| 
						 | 
					7dfd207ebb | ||
| 
						 | 
					3a65a096f2 | ||
| 
						 | 
					85b2bd4c93 | ||
| 
						 | 
					35e10a1159 | 
@@ -37,6 +37,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/qcd/QCD.h>
 | 
					#include <Grid/qcd/QCD.h>
 | 
				
			||||||
#include <Grid/qcd/spin/Spin.h>
 | 
					#include <Grid/qcd/spin/Spin.h>
 | 
				
			||||||
#include <Grid/qcd/gparity/Gparity.h>
 | 
					#include <Grid/qcd/gparity/Gparity.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/spin/Pauli.h> // depends on Gparity
 | 
				
			||||||
#include <Grid/qcd/utils/Utils.h>
 | 
					#include <Grid/qcd/utils/Utils.h>
 | 
				
			||||||
#include <Grid/qcd/representations/Representations.h>
 | 
					#include <Grid/qcd/representations/Representations.h>
 | 
				
			||||||
NAMESPACE_CHECK(GridQCDCore);
 | 
					NAMESPACE_CHECK(GridQCDCore);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -31,5 +31,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/cartesian/Cartesian_base.h>
 | 
					#include <Grid/cartesian/Cartesian_base.h>
 | 
				
			||||||
#include <Grid/cartesian/Cartesian_full.h>
 | 
					#include <Grid/cartesian/Cartesian_full.h>
 | 
				
			||||||
#include <Grid/cartesian/Cartesian_red_black.h> 
 | 
					#include <Grid/cartesian/Cartesian_red_black.h> 
 | 
				
			||||||
 | 
					#include <Grid/cartesian/CartesianCrossIcosahedron.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										235
									
								
								Grid/cartesian/CartesianCrossIcosahedron.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								Grid/cartesian/CartesianCrossIcosahedron.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,235 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/cartesian/CartesianCrossIcosahedron.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2025
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					/////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Grid Support.
 | 
				
			||||||
 | 
					/////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum IcosahedralMeshType {
 | 
				
			||||||
 | 
					  IcosahedralVertices,
 | 
				
			||||||
 | 
					  IcosahedralEdges
 | 
				
			||||||
 | 
					} ;
 | 
				
			||||||
 | 
					enum NorthSouth {
 | 
				
			||||||
 | 
					  North = 1,
 | 
				
			||||||
 | 
					  South = 0
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const int IcosahedralPatches = 10;
 | 
				
			||||||
 | 
					const int HemiPatches=IcosahedralPatches/2;
 | 
				
			||||||
 | 
					const int NorthernHemisphere = HemiPatches;
 | 
				
			||||||
 | 
					const int SouthernHemisphere = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class GridCartesianCrossIcosahedron: public GridCartesian {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  IcosahedralMeshType meshType;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  IcosahedralMeshType MeshType(void) { return meshType; };
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Constructor takes a parent grid and possibly subdivides communicator.
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  /*
 | 
				
			||||||
 | 
					  GridCartesian(const Coordinate &dimensions,
 | 
				
			||||||
 | 
							const Coordinate &simd_layout,
 | 
				
			||||||
 | 
							const Coordinate &processor_grid,
 | 
				
			||||||
 | 
							const GridCartesian &parent) : GridBase(processor_grid,parent,dummy)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    assert(0); // No subdivision
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  GridCartesian(const Coordinate &dimensions,
 | 
				
			||||||
 | 
							const Coordinate &simd_layout,
 | 
				
			||||||
 | 
							const Coordinate &processor_grid,
 | 
				
			||||||
 | 
							const GridCartesian &parent,int &split_rank) : GridBase(processor_grid,parent,split_rank)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    assert(0); // No subdivision
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  */
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Construct from comm world
 | 
				
			||||||
 | 
					  /////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  GridCartesianCrossIcosahedron(const Coordinate &dimensions,
 | 
				
			||||||
 | 
									const Coordinate &simd_layout,
 | 
				
			||||||
 | 
									const Coordinate &processor_grid,
 | 
				
			||||||
 | 
									IcosahedralMeshType _meshType) : GridCartesian(dimensions,simd_layout,processor_grid)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    meshType = _meshType;
 | 
				
			||||||
 | 
					    Coordinate S2dimensions=dimensions;
 | 
				
			||||||
 | 
					    Coordinate S2simd      =simd_layout;
 | 
				
			||||||
 | 
					    Coordinate S2procs     =processor_grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(simd_layout[0]==1); // Force simd into perpendicular dimensions
 | 
				
			||||||
 | 
					    assert(simd_layout[1]==1); // to avoid pole storage complexity interacting with SIMD.
 | 
				
			||||||
 | 
					    assert(dimensions[_ndimension-1]==IcosahedralPatches);
 | 
				
			||||||
 | 
					    assert(processor_grid[_ndimension-1]<=2); // Keeps the patches that need a pole on the same node
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Save a copy of the basic cartesian initialisation volume
 | 
				
			||||||
 | 
					    cartesianOsites = this->_osites;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // allocate the pole storage if we are seeking vertex domain data
 | 
				
			||||||
 | 
					    if ( meshType == IcosahedralVertices ) {
 | 
				
			||||||
 | 
					      InitPoles();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual ~GridCartesianCrossIcosahedron() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Use to decide if a given grid is icosahedral
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  int hasNorthPole;
 | 
				
			||||||
 | 
					  int hasSouthPole;
 | 
				
			||||||
 | 
					  int northPoleOsite;
 | 
				
			||||||
 | 
					  int southPoleOsite;
 | 
				
			||||||
 | 
					  int northPoleOsites;
 | 
				
			||||||
 | 
					  int southPoleOsites;
 | 
				
			||||||
 | 
					  int cartesianOsites;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual int isIcosahedral(void)           override { return 1;}
 | 
				
			||||||
 | 
					  virtual int isIcosahedralVertex(void)     override { return meshType==IcosahedralVertices;}
 | 
				
			||||||
 | 
					  virtual int isIcosahedralEdge  (void)     override { return meshType==IcosahedralEdges;}
 | 
				
			||||||
 | 
					  virtual int NorthPoleOsite(void)  const override { return northPoleOsite; };
 | 
				
			||||||
 | 
					  virtual int NorthPoleOsites(void) const override { return northPoleOsites; };
 | 
				
			||||||
 | 
					  virtual int SouthPoleOsite(void)  const override { return southPoleOsite; };
 | 
				
			||||||
 | 
					  virtual int SouthPoleOsites(void) const override { return southPoleOsites; };
 | 
				
			||||||
 | 
					  virtual int ownsNorthPole(void)   const override { return hasNorthPole; };
 | 
				
			||||||
 | 
					  virtual int ownsSouthPole(void)   const override { return hasSouthPole; };
 | 
				
			||||||
 | 
					  virtual int CartesianOsites(void) const override { return cartesianOsites; };
 | 
				
			||||||
 | 
					  virtual int64_t PoleIdxForOcoor(Coordinate &Coor) override
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    // Work out the pole_osite. Pick the higher dims
 | 
				
			||||||
 | 
					    Coordinate rdims;
 | 
				
			||||||
 | 
					    Coordinate ocoor;
 | 
				
			||||||
 | 
					    int64_t pole_idx;
 | 
				
			||||||
 | 
					    int Ndm1 = this->Nd()-1;
 | 
				
			||||||
 | 
					    for(int d=2;d<Ndm1;d++){
 | 
				
			||||||
 | 
					      int dd=d-2;
 | 
				
			||||||
 | 
					      rdims.push_back(this->_rdimensions[d]);
 | 
				
			||||||
 | 
					      ocoor.push_back(Coor[d]%this->_rdimensions[d]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    Lexicographic::IndexFromCoor(ocoor,pole_idx,rdims);
 | 
				
			||||||
 | 
					    return pole_idx;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  virtual int64_t PoleSiteForOcoor(Coordinate &Coor) override
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int Ndm1 = this->Nd()-1;
 | 
				
			||||||
 | 
					    int64_t pole_idx = this->PoleIdxForOcoor(Coor);
 | 
				
			||||||
 | 
					    int64_t pole_osite;
 | 
				
			||||||
 | 
					    if ( Coor[Ndm1] >= HemiPatches ) {
 | 
				
			||||||
 | 
					      pole_osite = pole_idx + this->NorthPoleOsite();
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      pole_osite = pole_idx + this->SouthPoleOsite();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return pole_osite;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void InitPoles(void)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int Ndm1 = _ndimension-1;
 | 
				
			||||||
 | 
					    ///////////////////////
 | 
				
			||||||
 | 
					    // Add the extra pole storage
 | 
				
			||||||
 | 
					    ///////////////////////
 | 
				
			||||||
 | 
					    // Vertices = 1x LxLx D1...Dn + 2.D1...Dn
 | 
				
			||||||
 | 
					    // Start after the LxL and don't include the 10 patch dim
 | 
				
			||||||
 | 
					    int OrthogSize = 1;
 | 
				
			||||||
 | 
					    for (int d = 2; d < Ndm1; d++) {
 | 
				
			||||||
 | 
					      OrthogSize *= _gdimensions[d];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    _fsites += OrthogSize*2;
 | 
				
			||||||
 | 
					    _gsites += OrthogSize*2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Simd reduced sizes are multiplied up.
 | 
				
			||||||
 | 
					    // If the leading LxL are simd-ized, the vector objects will contain "redundant" lanes
 | 
				
			||||||
 | 
					    // which should contain identical north (south) pole data
 | 
				
			||||||
 | 
					    OrthogSize = 1;
 | 
				
			||||||
 | 
					    for (int d = 2; d < Ndm1; d++) {
 | 
				
			||||||
 | 
					      OrthogSize *= _rdimensions[d];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Grow the local volume to hold pole data
 | 
				
			||||||
 | 
					    // on rank (0,0) in the LxL planes
 | 
				
			||||||
 | 
					    // since SIMD must be placed in the orthogonal directions
 | 
				
			||||||
 | 
					    Coordinate pcoor = this->ThisProcessorCoor();
 | 
				
			||||||
 | 
					    Coordinate pgrid = this->ProcessorGrid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int xdim=0;
 | 
				
			||||||
 | 
					    const int ydim=1;
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     *  /\/\/\/\/\
 | 
				
			||||||
 | 
					     * /\/\/\/\/\/
 | 
				
			||||||
 | 
					     * \/\/\/\/\/
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     *  y
 | 
				
			||||||
 | 
					     * /
 | 
				
			||||||
 | 
					     * \x
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * Labelling patches as 5 6 7 8 9
 | 
				
			||||||
 | 
					     *                      0 1 2 3 4
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * Will ban distribution of the patch dimension by more than 2.
 | 
				
			||||||
 | 
					     *
 | 
				
			||||||
 | 
					     * Hence all 5 patches associated with the pole must have the
 | 
				
			||||||
 | 
					     * appropriate "corner" of the patch L^2 located on the SAME rank.
 | 
				
			||||||
 | 
					     */ 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if( (pcoor[xdim]==pgrid[xdim]-1) && (pcoor[ydim]==0) && (pcoor[Ndm1]==0) ){
 | 
				
			||||||
 | 
					      hasSouthPole   =1;
 | 
				
			||||||
 | 
					      southPoleOsite=this->_osites; 
 | 
				
			||||||
 | 
					      southPoleOsites=OrthogSize;
 | 
				
			||||||
 | 
					      this->_osites += OrthogSize;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      hasSouthPole   =0;
 | 
				
			||||||
 | 
					      southPoleOsites=0;
 | 
				
			||||||
 | 
					      southPoleOsite=0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if( (pcoor[xdim]==0) && (pcoor[ydim]==pgrid[ydim]-1) && (pcoor[Ndm1]==pgrid[Ndm1]-1) ){
 | 
				
			||||||
 | 
					      hasNorthPole   =1;
 | 
				
			||||||
 | 
					      northPoleOsite=this->_osites;
 | 
				
			||||||
 | 
					      northPoleOsites=OrthogSize;
 | 
				
			||||||
 | 
					      this->_osites += OrthogSize;
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      hasNorthPole   =0;
 | 
				
			||||||
 | 
					      northPoleOsites=0;
 | 
				
			||||||
 | 
					      northPoleOsite=0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    std::cout << GridLogDebug<<"Icosahedral vertex field volume " << this->_osites<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogDebug<<"Icosahedral south pole offset   " << this->southPoleOsite<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogDebug<<"Icosahedral north pole offset   " << this->northPoleOsite<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogDebug<<"Icosahedral south pole size     " << this->southPoleOsites<<std::endl;
 | 
				
			||||||
 | 
					    std::cout << GridLogDebug<<"Icosahedral north pole size     " << this->northPoleOsites<<std::endl;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
@@ -86,10 +86,25 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Icosahedral decisions
 | 
				
			||||||
 | 
					  virtual int isIcosahedral(void) { return 0;}
 | 
				
			||||||
 | 
					  virtual int isIcosahedralVertex(void) { return 0;}
 | 
				
			||||||
 | 
					  virtual int isIcosahedralEdge  (void) { return 0;}
 | 
				
			||||||
 | 
					  virtual int ownsNorthPole(void) const { return 0; };
 | 
				
			||||||
 | 
					  virtual int ownsSouthPole(void) const { return 0; };
 | 
				
			||||||
 | 
					  virtual int NorthPoleOsite(void) const { return 0; };
 | 
				
			||||||
 | 
					  virtual int SouthPoleOsite(void) const { return 0; };
 | 
				
			||||||
 | 
					  virtual int NorthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
 | 
				
			||||||
 | 
					  virtual int SouthPoleOsites(void) const { std::cout << "base osites" <<std::endl;return 0; };
 | 
				
			||||||
 | 
					  virtual int CartesianOsites(void) const { return this->oSites(); };
 | 
				
			||||||
 | 
					  virtual int64_t PoleIdxForOcoor(Coordinate &Coor) { return 0;};
 | 
				
			||||||
 | 
					  virtual int64_t PoleSiteForOcoor(Coordinate &Coor){ return 0;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Checkerboarding interface is virtual and overridden by 
 | 
					  // Checkerboarding interface is virtual and overridden by 
 | 
				
			||||||
  // GridCartesian / GridRedBlackCartesian
 | 
					  // GridCartesian / GridRedBlackCartesian
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  virtual int CheckerBoarded(int dim) =0;
 | 
					  virtual int CheckerBoarded(int dim) =0;
 | 
				
			||||||
  virtual int CheckerBoard(const Coordinate &site)=0;
 | 
					  virtual int CheckerBoard(const Coordinate &site)=0;
 | 
				
			||||||
  virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
 | 
					  virtual int CheckerBoardDestination(int source_cb,int shift,int dim)=0;
 | 
				
			||||||
@@ -176,6 +191,8 @@ public:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    return permute_type;
 | 
					    return permute_type;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // Array sizing queries
 | 
					  // Array sizing queries
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,8 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
const int Cshift_verbose=0;
 | 
					const int Cshift_verbose=0;
 | 
				
			||||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
 | 
					template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  assert(!rhs.Grid()->isIcosahedral());
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  typedef typename vobj::vector_type vector_type;
 | 
					  typedef typename vobj::vector_type vector_type;
 | 
				
			||||||
  typedef typename vobj::scalar_type scalar_type;
 | 
					  typedef typename vobj::scalar_type scalar_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,6 +30,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
 | 
					template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension,int shift)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  assert(!rhs.Grid()->isIcosahedral());
 | 
				
			||||||
  Lattice<vobj> ret(rhs.Grid());
 | 
					  Lattice<vobj> ret(rhs.Grid());
 | 
				
			||||||
  ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
 | 
					  ret.Checkerboard() = rhs.Grid()->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
 | 
				
			||||||
  Cshift_local(ret,rhs,dimension,shift);
 | 
					  Cshift_local(ret,rhs,dimension,shift);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -373,14 +373,17 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
 | 
					template<class vobj> std::ostream& operator<< (std::ostream& stream, const Lattice<vobj> &o){
 | 
				
			||||||
  typedef typename vobj::scalar_object sobj;
 | 
					  typedef typename vobj::scalar_object sobj;
 | 
				
			||||||
  for(int64_t g=0;g<o.Grid()->_gsites;g++){
 | 
					  uint64_t gsites=1;
 | 
				
			||||||
 | 
					  uint64_t polesites=0;
 | 
				
			||||||
 | 
					  for(int d=0;d<o.Grid()->_ndimension;d++) gsites *= o.Grid()->_gdimensions[d];
 | 
				
			||||||
 | 
					  for(int64_t g=0;g<gsites;g++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Coordinate gcoor;
 | 
					    Coordinate gcoor;
 | 
				
			||||||
    o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
 | 
					    o.Grid()->GlobalIndexToGlobalCoor(g,gcoor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    sobj ss;
 | 
					    sobj ss;
 | 
				
			||||||
    peekSite(ss,o,gcoor);
 | 
					    peekSite(ss,o,gcoor);
 | 
				
			||||||
    stream<<"[";
 | 
					    stream<<"["<<  g<<" : ";
 | 
				
			||||||
    for(int d=0;d<gcoor.size();d++){
 | 
					    for(int d=0;d<gcoor.size();d++){
 | 
				
			||||||
      stream<<gcoor[d];
 | 
					      stream<<gcoor[d];
 | 
				
			||||||
      if(d!=gcoor.size()-1) stream<<",";
 | 
					      if(d!=gcoor.size()-1) stream<<",";
 | 
				
			||||||
@@ -388,6 +391,41 @@ template<class vobj> std::ostream& operator<< (std::ostream& stream, const Latti
 | 
				
			|||||||
    stream<<"]\t";
 | 
					    stream<<"]\t";
 | 
				
			||||||
    stream<<ss<<std::endl;
 | 
					    stream<<ss<<std::endl;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  if ( o.Grid()->isIcosahedralVertex() ) {
 | 
				
			||||||
 | 
					    uint64_t psites=1;
 | 
				
			||||||
 | 
					    Coordinate perpdims;
 | 
				
			||||||
 | 
					    for(int d=2;d<o.Grid()->_ndimension-1;d++){
 | 
				
			||||||
 | 
					      int pd=o.Grid()->_gdimensions[d];
 | 
				
			||||||
 | 
					      psites*=pd;
 | 
				
			||||||
 | 
					      perpdims.push_back(pd);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for(uint64_t p=0;p<psites;p++){
 | 
				
			||||||
 | 
					      sobj ss;
 | 
				
			||||||
 | 
					      Coordinate orthog;
 | 
				
			||||||
 | 
					      Lexicographic::CoorFromIndex(orthog,p,perpdims);
 | 
				
			||||||
 | 
					      peekPole(ss,o,orthog,South);
 | 
				
			||||||
 | 
					      stream<<"[ SouthPole : ";
 | 
				
			||||||
 | 
					      for(int d=0;d<orthog.size();d++){
 | 
				
			||||||
 | 
						stream<<orthog[d];
 | 
				
			||||||
 | 
						if(d!=orthog.size()-1) stream<<",";
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      stream<<"]\t";
 | 
				
			||||||
 | 
					      stream<<ss<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for(uint64_t p=0;p<psites;p++){
 | 
				
			||||||
 | 
					      sobj ss;
 | 
				
			||||||
 | 
					      Coordinate orthog;
 | 
				
			||||||
 | 
					      Lexicographic::CoorFromIndex(orthog,p,perpdims);
 | 
				
			||||||
 | 
					      peekPole(ss,o,orthog,North);
 | 
				
			||||||
 | 
					      stream<<"[ NorthPole : ";
 | 
				
			||||||
 | 
					      for(int d=0;d<orthog.size();d++){
 | 
				
			||||||
 | 
						stream<<orthog[d];
 | 
				
			||||||
 | 
						if(d!=orthog.size()-1) stream<<",";
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      stream<<"]\t";
 | 
				
			||||||
 | 
					      stream<<ss<<std::endl;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  return stream;
 | 
					  return stream;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,22 +34,86 @@ template<class iobj> inline void LatticeCoordinate(Lattice<iobj> &l,int mu)
 | 
				
			|||||||
  typedef typename iobj::scalar_type scalar_type;
 | 
					  typedef typename iobj::scalar_type scalar_type;
 | 
				
			||||||
  typedef typename iobj::vector_type vector_type;
 | 
					  typedef typename iobj::vector_type vector_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  l=Zero();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  GridBase *grid = l.Grid();
 | 
					  GridBase *grid = l.Grid();
 | 
				
			||||||
  int Nsimd = grid->iSites();
 | 
					  int Nsimd = grid->iSites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  autoView(l_v, l, CpuWrite);
 | 
					  int cartesian_vol = grid->oSites();
 | 
				
			||||||
  thread_for( o, grid->oSites(), {
 | 
					  if ( grid->isIcosahedral() ) {
 | 
				
			||||||
    vector_type vI;
 | 
					    cartesian_vol = cartesian_vol - grid->NorthPoleOsites()-grid->SouthPoleOsites();
 | 
				
			||||||
    Coordinate gcoor;
 | 
					  }
 | 
				
			||||||
    ExtractBuffer<scalar_type> mergebuf(Nsimd);
 | 
					  {
 | 
				
			||||||
    for(int i=0;i<grid->iSites();i++){
 | 
					    autoView(l_v, l, CpuWrite);
 | 
				
			||||||
      grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
 | 
					    thread_for( o, cartesian_vol, {
 | 
				
			||||||
      mergebuf[i]=(Integer)gcoor[mu];
 | 
						vector_type vI;
 | 
				
			||||||
 | 
						Coordinate gcoor;
 | 
				
			||||||
 | 
						ExtractBuffer<scalar_type> mergebuf(Nsimd);
 | 
				
			||||||
 | 
						for(int i=0;i<grid->iSites();i++){
 | 
				
			||||||
 | 
						  grid->RankIndexToGlobalCoor(grid->ThisRank(),o,i,gcoor);
 | 
				
			||||||
 | 
						  mergebuf[i]=(Integer)gcoor[mu];
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						merge<vector_type,scalar_type>(vI,mergebuf);
 | 
				
			||||||
 | 
						l_v[o]=vI;
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (grid->isIcosahedralVertex()) {
 | 
				
			||||||
 | 
					    uint64_t psites=1;
 | 
				
			||||||
 | 
					    Coordinate perpdims;
 | 
				
			||||||
 | 
					    typename iobj::scalar_object ss;
 | 
				
			||||||
 | 
					    for(int d=2;d<grid->_ndimension-1;d++){
 | 
				
			||||||
 | 
					      int pd=grid->_gdimensions[d];
 | 
				
			||||||
 | 
					      psites*=pd;
 | 
				
			||||||
 | 
					      perpdims.push_back(pd);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    merge<vector_type,scalar_type>(vI,mergebuf);
 | 
					    for(uint64_t p=0;p<psites;p++){
 | 
				
			||||||
    l_v[o]=vI;
 | 
					      Coordinate orthog;
 | 
				
			||||||
  });
 | 
					      Lexicographic::CoorFromIndex(orthog,p,perpdims);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      int icoor;
 | 
				
			||||||
 | 
					      if ( mu>=2 && mu < grid->_ndimension-1) {
 | 
				
			||||||
 | 
						icoor = orthog[mu-2];
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						icoor = -1;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      ss=scalar_type(icoor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      pokePole(ss,l,orthog,South);
 | 
				
			||||||
 | 
					      pokePole(ss,l,orthog,North);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class iobj> inline void LatticePole(Lattice<iobj> &l,NorthSouth pole)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef typename iobj::scalar_object sobj;
 | 
				
			||||||
 | 
					  typedef typename iobj::scalar_type scalar_type;
 | 
				
			||||||
 | 
					  typedef typename iobj::vector_type vector_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GridBase *grid = l.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  l=Zero();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedralVertex());
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if (grid->isIcosahedralVertex()) {
 | 
				
			||||||
 | 
					    uint64_t psites=1;
 | 
				
			||||||
 | 
					    Coordinate perpdims;
 | 
				
			||||||
 | 
					    sobj ss;
 | 
				
			||||||
 | 
					    scalar_type one(1.0);
 | 
				
			||||||
 | 
					    ss=one;
 | 
				
			||||||
 | 
					    for(int d=2;d<l.Grid()->_ndimension-1;d++){
 | 
				
			||||||
 | 
					      int pd=l.Grid()->_gdimensions[d];
 | 
				
			||||||
 | 
					      psites*=pd;
 | 
				
			||||||
 | 
					      perpdims.push_back(pd);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    for(uint64_t p=0;p<psites;p++){
 | 
				
			||||||
 | 
					      Coordinate orthog;
 | 
				
			||||||
 | 
					      Lexicographic::CoorFromIndex(orthog,p,perpdims);
 | 
				
			||||||
 | 
					      pokePole(ss,l,orthog,pole);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -141,7 +141,7 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
 | 
				
			|||||||
  grid->GlobalCoorToRankIndex(rank,odx,idx,site);
 | 
					  grid->GlobalCoorToRankIndex(rank,odx,idx,site);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  ExtractBuffer<sobj> buf(Nsimd);
 | 
					  ExtractBuffer<sobj> buf(Nsimd);
 | 
				
			||||||
  autoView( l_v , l, CpuWrite);
 | 
					  autoView( l_v , l, CpuRead);
 | 
				
			||||||
  extract(l_v[odx],buf);
 | 
					  extract(l_v[odx],buf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  s = buf[idx];
 | 
					  s = buf[idx];
 | 
				
			||||||
@@ -151,6 +151,261 @@ void peekSite(sobj &s,const Lattice<vobj> &l,const Coordinate &site){
 | 
				
			|||||||
  return;
 | 
					  return;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// zero for south pole, one for north pole
 | 
				
			||||||
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 | 
					void peekPole(sobj &s,const Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  s=Zero();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  GridBase *grid=l.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedral());
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedralVertex());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Nsimd = grid->Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Ndm1         = grid->_ndimension-1;
 | 
				
			||||||
 | 
					  Coordinate pgrid = grid->ProcessorGrid();
 | 
				
			||||||
 | 
					  const int xdim=0;
 | 
				
			||||||
 | 
					  const int ydim=1;
 | 
				
			||||||
 | 
					  const int pdim=Ndm1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int64_t pole_osite;
 | 
				
			||||||
 | 
					  int64_t pole_isite;
 | 
				
			||||||
 | 
					  Coordinate rdims;
 | 
				
			||||||
 | 
					  Coordinate idims;
 | 
				
			||||||
 | 
					  Coordinate ocoor;
 | 
				
			||||||
 | 
					  Coordinate icoor;
 | 
				
			||||||
 | 
					  Coordinate pcoor(grid->_ndimension);
 | 
				
			||||||
 | 
					  for(int d=2;d<Ndm1;d++){
 | 
				
			||||||
 | 
					    int dd=d-2;
 | 
				
			||||||
 | 
					    rdims.push_back(grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    idims.push_back(grid->_simd_layout[d]);
 | 
				
			||||||
 | 
					    icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    pcoor[d] = orthog[dd]/grid->_ldimensions[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  int64_t osite;
 | 
				
			||||||
 | 
					  if(isNorth == North){
 | 
				
			||||||
 | 
					    pcoor[xdim] = 0;
 | 
				
			||||||
 | 
					    pcoor[ydim] = pgrid[ydim]-1;
 | 
				
			||||||
 | 
					    pcoor[Ndm1] = pgrid[Ndm1]-1;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->NorthPoleOsite();
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    pcoor[xdim] = pgrid[xdim]-1;
 | 
				
			||||||
 | 
					    pcoor[ydim] = 0;
 | 
				
			||||||
 | 
					    pcoor[Ndm1] = 0;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->SouthPoleOsite();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  rank = grid->RankFromProcessorCoor(pcoor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( rank == grid->ThisRank() ) {
 | 
				
			||||||
 | 
					    ExtractBuffer<sobj> buf(Nsimd);
 | 
				
			||||||
 | 
					    autoView( l_v , l, CpuWrite);
 | 
				
			||||||
 | 
					    extract(l_v[osite],buf);
 | 
				
			||||||
 | 
					    s = buf[pole_isite];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  grid->Broadcast(rank,s);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 | 
					void pokePole(const sobj &s,Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GridBase *grid=l.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedral());
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedralVertex());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  grid->Broadcast(grid->BossRank(),s);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Nsimd = grid->Nsimd();
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					  int Ndm1         = grid->_ndimension-1;
 | 
				
			||||||
 | 
					  Coordinate pgrid = grid->ProcessorGrid();
 | 
				
			||||||
 | 
					  const int xdim=0;
 | 
				
			||||||
 | 
					  const int ydim=1;
 | 
				
			||||||
 | 
					  const int pdim=Ndm1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int64_t pole_osite;
 | 
				
			||||||
 | 
					  int64_t pole_isite;
 | 
				
			||||||
 | 
					  Coordinate rdims;
 | 
				
			||||||
 | 
					  Coordinate idims;
 | 
				
			||||||
 | 
					  Coordinate ocoor;
 | 
				
			||||||
 | 
					  Coordinate icoor;
 | 
				
			||||||
 | 
					  Coordinate pcoor(grid->_ndimension,0);
 | 
				
			||||||
 | 
					  for(int d=2;d<Ndm1;d++){
 | 
				
			||||||
 | 
					    int dd = d-2;
 | 
				
			||||||
 | 
					    rdims.push_back(grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    idims.push_back(grid->_simd_layout[d]);
 | 
				
			||||||
 | 
					    icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    pcoor[d] = orthog[dd]/grid->_ldimensions[d];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int o = orthog[dd];
 | 
				
			||||||
 | 
					    int r = grid->_rdimensions[d];
 | 
				
			||||||
 | 
					    int omr = o % r;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  int64_t osite;
 | 
				
			||||||
 | 
					  if(isNorth ==North){
 | 
				
			||||||
 | 
					    pcoor[xdim] = 0;
 | 
				
			||||||
 | 
					    pcoor[ydim] = pgrid[ydim]-1;
 | 
				
			||||||
 | 
					    pcoor[Ndm1] = pgrid[Ndm1]-1;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->NorthPoleOsite();
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    pcoor[xdim] = pgrid[xdim]-1;
 | 
				
			||||||
 | 
					    pcoor[ydim] = 0;
 | 
				
			||||||
 | 
					    pcoor[Ndm1] = 0;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->SouthPoleOsite();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  rank = grid->RankFromProcessorCoor(pcoor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // extract-modify-merge cycle is easiest way and this is not perf critical
 | 
				
			||||||
 | 
					  if ( rank == grid->ThisRank() ) {
 | 
				
			||||||
 | 
					    ExtractBuffer<sobj> buf(Nsimd);
 | 
				
			||||||
 | 
					    autoView( l_v , l, CpuWrite);
 | 
				
			||||||
 | 
					    extract(l_v[osite],buf);
 | 
				
			||||||
 | 
					    buf[pole_isite] = s;
 | 
				
			||||||
 | 
					    merge(l_v[osite],buf);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 | 
					void peekLocalPole(sobj &s,const Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  s=Zero();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  GridBase *grid=l.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedral());
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedralVertex());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Nsimd = grid->Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Ndm1         = grid->_ndimension-1;
 | 
				
			||||||
 | 
					  Coordinate pgrid = grid->ProcessorGrid();
 | 
				
			||||||
 | 
					  const int xdim=0;
 | 
				
			||||||
 | 
					  const int ydim=1;
 | 
				
			||||||
 | 
					  const int pdim=Ndm1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int64_t pole_osite;
 | 
				
			||||||
 | 
					  int64_t pole_isite;
 | 
				
			||||||
 | 
					  Coordinate rdims;
 | 
				
			||||||
 | 
					  Coordinate idims;
 | 
				
			||||||
 | 
					  Coordinate ocoor;
 | 
				
			||||||
 | 
					  Coordinate icoor;
 | 
				
			||||||
 | 
					  //  Coordinate pcoor(grid->_ndimension);
 | 
				
			||||||
 | 
					  for(int d=2;d<Ndm1;d++){
 | 
				
			||||||
 | 
					    int dd=d-2;
 | 
				
			||||||
 | 
					    rdims.push_back(grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    idims.push_back(grid->_simd_layout[d]);
 | 
				
			||||||
 | 
					    icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    //    pcoor[d] = orthog[dd]/grid->_ldimensions[d];
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  int64_t osite;
 | 
				
			||||||
 | 
					  if(isNorth == North){
 | 
				
			||||||
 | 
					    //    pcoor[xdim] = 0;
 | 
				
			||||||
 | 
					    //    pcoor[ydim] = pgrid[ydim]-1;
 | 
				
			||||||
 | 
					    //    pcoor[Ndm1] = pgrid[Ndm1]-1;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->NorthPoleOsite();
 | 
				
			||||||
 | 
					    assert(grid->ownsNorthPole());
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    //    pcoor[xdim] = pgrid[xdim]-1;
 | 
				
			||||||
 | 
					    //    pcoor[ydim] = 0;
 | 
				
			||||||
 | 
					    //    pcoor[Ndm1] = 0;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->SouthPoleOsite();
 | 
				
			||||||
 | 
					    assert(grid->ownsSouthPole());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ExtractBuffer<sobj> buf(Nsimd);
 | 
				
			||||||
 | 
					  autoView( l_v , l, CpuWrite);
 | 
				
			||||||
 | 
					  extract(l_v[osite],buf);
 | 
				
			||||||
 | 
					  s = buf[pole_isite];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 | 
					void pokeLocalPole(const sobj &s,Lattice<vobj> &l,const Coordinate &orthog,NorthSouth isNorth)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GridBase *grid=l.Grid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedral());
 | 
				
			||||||
 | 
					  assert(grid->isIcosahedralVertex());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Nsimd = grid->Nsimd();
 | 
				
			||||||
 | 
					  int rank;
 | 
				
			||||||
 | 
					  int Ndm1         = grid->_ndimension-1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int xdim=0;
 | 
				
			||||||
 | 
					  const int ydim=1;
 | 
				
			||||||
 | 
					  const int pdim=Ndm1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int64_t pole_osite;
 | 
				
			||||||
 | 
					  int64_t pole_isite;
 | 
				
			||||||
 | 
					  Coordinate rdims;
 | 
				
			||||||
 | 
					  Coordinate idims;
 | 
				
			||||||
 | 
					  Coordinate ocoor;
 | 
				
			||||||
 | 
					  Coordinate icoor;
 | 
				
			||||||
 | 
					  //  Coordinate pcoor(grid->_ndimension,0);
 | 
				
			||||||
 | 
					  for(int d=2;d<Ndm1;d++){
 | 
				
			||||||
 | 
					    int dd = d-2;
 | 
				
			||||||
 | 
					    rdims.push_back(grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    idims.push_back(grid->_simd_layout[d]);
 | 
				
			||||||
 | 
					    icoor.push_back((orthog[dd]%grid->_ldimensions[d])/grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    ocoor.push_back(orthog[dd]%grid->_rdimensions[d]);
 | 
				
			||||||
 | 
					    //    pcoor[d] = orthog[dd]/grid->_ldimensions[d];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int o = orthog[dd];
 | 
				
			||||||
 | 
					    int r = grid->_rdimensions[d];
 | 
				
			||||||
 | 
					    int omr = o % r;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(ocoor,pole_osite,rdims);
 | 
				
			||||||
 | 
					  Lexicographic::IndexFromCoor(icoor,pole_isite,idims);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  int64_t osite;
 | 
				
			||||||
 | 
					  int insert=0;
 | 
				
			||||||
 | 
					  if(isNorth ==North){
 | 
				
			||||||
 | 
					    //    pcoor[xdim] = 0;
 | 
				
			||||||
 | 
					    //    pcoor[ydim] = pgrid[ydim]-1;
 | 
				
			||||||
 | 
					    //    pcoor[Ndm1] = pgrid[Ndm1]-1;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->NorthPoleOsite();
 | 
				
			||||||
 | 
					    assert(grid->ownsNorthPole());
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    //    pcoor[xdim] = pgrid[xdim]-1;
 | 
				
			||||||
 | 
					    //    pcoor[ydim] = 0;
 | 
				
			||||||
 | 
					    //    pcoor[Ndm1] = 0;
 | 
				
			||||||
 | 
					    osite = pole_osite + grid->SouthPoleOsite();
 | 
				
			||||||
 | 
					    assert(grid->ownsSouthPole());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // extract-modify-merge cycle is easiest way and this is not perf critical
 | 
				
			||||||
 | 
					  ExtractBuffer<sobj> buf(Nsimd);
 | 
				
			||||||
 | 
					  autoView( l_v , l, CpuWrite);
 | 
				
			||||||
 | 
					  extract(l_v[osite],buf);
 | 
				
			||||||
 | 
					  buf[pole_isite] = s;
 | 
				
			||||||
 | 
					  merge(l_v[osite],buf);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  return;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////
 | 
				
			||||||
// Peek a scalar object from the SIMD array
 | 
					// Peek a scalar object from the SIMD array
 | 
				
			||||||
//////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -179,7 +434,7 @@ inline void peekLocalSite(sobj &s,const LatticeView<vobj> &l,Coordinate &site)
 | 
				
			|||||||
  for(int w=0;w<words;w++){
 | 
					  for(int w=0;w<words;w++){
 | 
				
			||||||
    pt[w] = getlane(vp[w],idx);
 | 
					    pt[w] = getlane(vp[w],idx);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  //  std::cout << "peekLocalSite "<<site<<" "<<odx<<","<<idx<<" "<<s<<std::endl;
 | 
					
 | 
				
			||||||
  return;
 | 
					  return;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
template<class vobj,class sobj>
 | 
					template<class vobj,class sobj>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -48,31 +48,45 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
//////////////////////////////////////////////////////////////
 | 
					//////////////////////////////////////////////////////////////
 | 
				
			||||||
inline int RNGfillable(GridBase *coarse,GridBase *fine)
 | 
					inline int RNGfillable(GridBase *coarse,GridBase *fine)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					  if ( coarse == fine ) return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int rngdims = coarse->_ndimension;
 | 
					  if ( coarse->isIcosahedral()) assert(coarse->isIcosahedralEdge());
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  // trivially extended in higher dims, with locality guaranteeing RNG state is local to node
 | 
					  if ( fine->isIcosahedralVertex() && coarse->isIcosahedralEdge() ) {
 | 
				
			||||||
  int lowerdims   = fine->_ndimension - coarse->_ndimension;
 | 
					    assert(fine->Nd()==coarse->Nd());
 | 
				
			||||||
  assert(lowerdims >= 0);
 | 
					    for(int d=0;d<fine->Nd();d++){
 | 
				
			||||||
  for(int d=0;d<lowerdims;d++){
 | 
					      assert(fine->LocalDimensions()[d] == coarse->LocalDimensions()[d]);
 | 
				
			||||||
    assert(fine->_simd_layout[d]==1);
 | 
					    }
 | 
				
			||||||
    assert(fine->_processors[d]==1);
 | 
					    return 1;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
  int multiplicity=1;
 | 
					  {
 | 
				
			||||||
  for(int d=0;d<lowerdims;d++){
 | 
					 | 
				
			||||||
    multiplicity=multiplicity*fine->_rdimensions[d];
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  // local and global volumes subdivide cleanly after SIMDization
 | 
					 | 
				
			||||||
  for(int d=0;d<rngdims;d++){
 | 
					 | 
				
			||||||
    int fd= d+lowerdims;
 | 
					 | 
				
			||||||
    assert(coarse->_processors[d]  == fine->_processors[fd]);
 | 
					 | 
				
			||||||
    assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
 | 
					 | 
				
			||||||
    assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]); 
 | 
					 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; 
 | 
					    int rngdims = coarse->_ndimension;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // trivially extended in higher dims, with locality guaranteeing RNG state is local to node
 | 
				
			||||||
 | 
					    int lowerdims   = fine->_ndimension - coarse->_ndimension;
 | 
				
			||||||
 | 
					    assert(lowerdims >= 0);
 | 
				
			||||||
 | 
					    for(int d=0;d<lowerdims;d++){
 | 
				
			||||||
 | 
					      assert(fine->_simd_layout[d]==1);
 | 
				
			||||||
 | 
					      assert(fine->_processors[d]==1);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int multiplicity=1;
 | 
				
			||||||
 | 
					    for(int d=0;d<lowerdims;d++){
 | 
				
			||||||
 | 
					      multiplicity=multiplicity*fine->_rdimensions[d];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // local and global volumes subdivide cleanly after SIMDization
 | 
				
			||||||
 | 
					    for(int d=0;d<rngdims;d++){
 | 
				
			||||||
 | 
					      int fd= d+lowerdims;
 | 
				
			||||||
 | 
					      assert(coarse->_processors[d]  == fine->_processors[fd]);
 | 
				
			||||||
 | 
					      assert(coarse->_simd_layout[d] == fine->_simd_layout[fd]);
 | 
				
			||||||
 | 
					      assert(((fine->_rdimensions[fd] / coarse->_rdimensions[d])* coarse->_rdimensions[d])==fine->_rdimensions[fd]); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      multiplicity = multiplicity *fine->_rdimensions[fd] / coarse->_rdimensions[d]; 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return multiplicity;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  return multiplicity;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@@ -80,6 +94,19 @@ inline int RNGfillable(GridBase *coarse,GridBase *fine)
 | 
				
			|||||||
// this function is necessary for the LS vectorised field
 | 
					// this function is necessary for the LS vectorised field
 | 
				
			||||||
inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
 | 
					inline int RNGfillable_general(GridBase *coarse,GridBase *fine)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( coarse == fine ) return 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( coarse->isIcosahedral()) assert(coarse->isIcosahedralEdge());
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if ( fine->isIcosahedralVertex() && coarse->isIcosahedralEdge() ) {
 | 
				
			||||||
 | 
					    assert(fine->Nd()==coarse->Nd());
 | 
				
			||||||
 | 
					    for(int d=0;d<fine->Nd();d++){
 | 
				
			||||||
 | 
					      assert(fine->LocalDimensions()[d] == coarse->LocalDimensions()[d]);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int rngdims = coarse->_ndimension;
 | 
					  int rngdims = coarse->_ndimension;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
  // trivially extended in higher dims, with locality guaranteeing RNG state is local to node
 | 
					  // trivially extended in higher dims, with locality guaranteeing RNG state is local to node
 | 
				
			||||||
@@ -352,12 +379,12 @@ private:
 | 
				
			|||||||
public:
 | 
					public:
 | 
				
			||||||
  GridBase *Grid(void) const { return _grid; }
 | 
					  GridBase *Grid(void) const { return _grid; }
 | 
				
			||||||
  int generator_idx(int os,int is) {
 | 
					  int generator_idx(int os,int is) {
 | 
				
			||||||
    return is*_grid->oSites()+os;
 | 
					    return (is*_grid->CartesianOsites()+os)%_grid->lSites(); // On the pole sites wrap back to normal generators; Icosahedral hack
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  GridParallelRNG(GridBase *grid) : GridRNGbase() {
 | 
					  GridParallelRNG(GridBase *grid) : GridRNGbase() {
 | 
				
			||||||
    _grid = grid;
 | 
					    _grid = grid;
 | 
				
			||||||
    _vol  =_grid->iSites()*_grid->oSites();
 | 
					    _vol  =_grid->lSites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    _generators.resize(_vol);
 | 
					    _generators.resize(_vol);
 | 
				
			||||||
    _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
 | 
					    _uniform.resize(_vol,std::uniform_real_distribution<RealD>{0,1});
 | 
				
			||||||
@@ -381,7 +408,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    int multiplicity = RNGfillable_general(_grid, l.Grid()); // l has finer or same grid
 | 
					    int multiplicity = RNGfillable_general(_grid, l.Grid()); // l has finer or same grid
 | 
				
			||||||
    int Nsimd  = _grid->Nsimd();  // guaranteed to be the same for l.Grid() too
 | 
					    int Nsimd  = _grid->Nsimd();  // guaranteed to be the same for l.Grid() too
 | 
				
			||||||
    int osites = _grid->oSites();  // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity
 | 
					    int osites = _grid->CartesianOsites();  // guaranteed to be <= l.Grid()->oSites() by a factor multiplicity, except on Icosahedral
 | 
				
			||||||
    int words  = sizeof(scalar_object) / sizeof(scalar_type);
 | 
					    int words  = sizeof(scalar_object) / sizeof(scalar_type);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    autoView(l_v, l, CpuWrite);
 | 
					    autoView(l_v, l, CpuWrite);
 | 
				
			||||||
@@ -402,8 +429,27 @@ public:
 | 
				
			|||||||
	// merge into SIMD lanes, FIXME suboptimal implementation
 | 
						// merge into SIMD lanes, FIXME suboptimal implementation
 | 
				
			||||||
	merge(l_v[sm], buf);
 | 
						merge(l_v[sm], buf);
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      });
 | 
					    });
 | 
				
			||||||
    //    });
 | 
					
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
 | 
					     * Fill in the poles for an Icosahedral vertex mesh
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    if (l.Grid()->isIcosahedralVertex()) { 
 | 
				
			||||||
 | 
					      int64_t pole_sites=l.Grid()->NorthPoleOsites()+l.Grid()->SouthPoleOsites();
 | 
				
			||||||
 | 
					      int64_t pole_base =l.Grid()->CartesianOsites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      ExtractBuffer<scalar_object> buf(Nsimd);
 | 
				
			||||||
 | 
					      for (int m = 0; m < pole_sites; m++) {  // Draw from same generator multiplicity times                                                                                                           
 | 
				
			||||||
 | 
					        for (int si = 0; si < Nsimd; si++) {
 | 
				
			||||||
 | 
					          int gdx = 0;
 | 
				
			||||||
 | 
						  scalar_type *pointer = (scalar_type *)&buf[si];
 | 
				
			||||||
 | 
					          dist[gdx].reset();
 | 
				
			||||||
 | 
					          for (int idx = 0; idx < words; idx++)
 | 
				
			||||||
 | 
					            fillScalar(pointer[idx], dist[gdx], _generators[gdx]);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        merge(l_v[pole_base+m], buf);
 | 
				
			||||||
 | 
					      }      
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    _time_counter += usecond()- inner_time_counter;
 | 
					    _time_counter += usecond()- inner_time_counter;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -49,7 +49,7 @@ static constexpr int Tm = 7;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
static constexpr int Nc=Config_Nc;
 | 
					static constexpr int Nc=Config_Nc;
 | 
				
			||||||
static constexpr int Ns=4;
 | 
					static constexpr int Ns=4;
 | 
				
			||||||
static constexpr int Nd=4;
 | 
					static constexpr int Nd=Config_Nd;
 | 
				
			||||||
static constexpr int Nhs=2; // half spinor
 | 
					static constexpr int Nhs=2; // half spinor
 | 
				
			||||||
static constexpr int Nds=8; // double stored gauge field
 | 
					static constexpr int Nds=8; // double stored gauge field
 | 
				
			||||||
static constexpr int Ngp=2; // gparity index range
 | 
					static constexpr int Ngp=2; // gparity index range
 | 
				
			||||||
@@ -75,6 +75,7 @@ static constexpr int InverseYes=1;
 | 
				
			|||||||
//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
					//typename std::enable_if<matchGridTensorIndex<iVector<vtype,Ns>,SpinorIndex>::value,iVector<vtype,Ns> >::type *SFINAE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const int SpinorIndex = 2;
 | 
					const int SpinorIndex = 2;
 | 
				
			||||||
 | 
					const int PauliIndex  = 2; //TensorLevel counts from the bottom!
 | 
				
			||||||
template<typename T> struct isSpinor {
 | 
					template<typename T> struct isSpinor {
 | 
				
			||||||
  static constexpr bool value = (SpinorIndex==T::TensorLevel);
 | 
					  static constexpr bool value = (SpinorIndex==T::TensorLevel);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -123,10 +123,10 @@ public:
 | 
				
			|||||||
      GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
 | 
					      GaugeGrid->LocalIndexToLocalCoor(lidx, lcoor);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      peekLocalSite(ScalarUmu, Umu_v, lcoor);
 | 
					      peekLocalSite(ScalarUmu, Umu_v, lcoor);
 | 
				
			||||||
      for (int mu = 0; mu < 4; mu++) ScalarUds(mu) = ScalarUmu(mu);
 | 
					      for (int mu = 0; mu < Nd; mu++) ScalarUds(mu) = ScalarUmu(mu);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      peekLocalSite(ScalarUmu, Uadj_v, lcoor);
 | 
					      peekLocalSite(ScalarUmu, Uadj_v, lcoor);
 | 
				
			||||||
      for (int mu = 0; mu < 4; mu++) ScalarUds(mu + 4) = ScalarUmu(mu);
 | 
					      for (int mu = 0; mu < Nd; mu++) ScalarUds(mu + Nd) = ScalarUmu(mu);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      pokeLocalSite(ScalarUds, Uds_v, lcoor);
 | 
					      pokeLocalSite(ScalarUds, Uds_v, lcoor);
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -85,6 +85,15 @@ NAMESPACE_CHECK(DomainWall);
 | 
				
			|||||||
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
 | 
					#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionTanhFermion.h>
 | 
				
			||||||
#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
 | 
					#include <Grid/qcd/action/fermion/OverlapWilsonPartialFractionZolotarevFermion.h>
 | 
				
			||||||
NAMESPACE_CHECK(Overlap);
 | 
					NAMESPACE_CHECK(Overlap);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Two spin wilson fermion based
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h>
 | 
				
			||||||
 | 
					NAMESPACE_CHECK(TwoSpinWilson);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
///////////////////////////////////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
 | 
					// G5 herm -- this has to live in QCD since dirac matrix is not in the broader sector of code
 | 
				
			||||||
///////////////////////////////////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -41,8 +41,9 @@ NAMESPACE_CHECK(Compressor);
 | 
				
			|||||||
NAMESPACE_CHECK(FermionOperatorImpl);
 | 
					NAMESPACE_CHECK(FermionOperatorImpl);
 | 
				
			||||||
#include <Grid/qcd/action/fermion/FermionOperator.h>
 | 
					#include <Grid/qcd/action/fermion/FermionOperator.h>
 | 
				
			||||||
NAMESPACE_CHECK(FermionOperator);
 | 
					NAMESPACE_CHECK(FermionOperator);
 | 
				
			||||||
#include <Grid/qcd/action/fermion/WilsonKernels.h>        //used by all wilson type fermions
 | 
					#include <Grid/qcd/action/fermion/WilsonKernels.h>           //used by all wilson type fermions
 | 
				
			||||||
#include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions
 | 
					#include <Grid/qcd/action/fermion/StaggeredKernels.h>        //used by all wilson type fermions
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/TwoSpinWilsonKernels.h>    //used for 3D fermions, pauli in place of Dirac
 | 
				
			||||||
NAMESPACE_CHECK(Kernels);
 | 
					NAMESPACE_CHECK(Kernels);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -180,6 +180,12 @@ NAMESPACE_CHECK(ImplGparityWilson);
 | 
				
			|||||||
#include <Grid/qcd/action/fermion/StaggeredImpl.h> 
 | 
					#include <Grid/qcd/action/fermion/StaggeredImpl.h> 
 | 
				
			||||||
NAMESPACE_CHECK(ImplStaggered);  
 | 
					NAMESPACE_CHECK(ImplStaggered);  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Two component spinor Wilson action for 3d / Boston
 | 
				
			||||||
 | 
					/////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/TwoSpinWilsonImpl.h> 
 | 
				
			||||||
 | 
					NAMESPACE_CHECK(ImplTwoSpinWilson);  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/////////////////////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Single flavour one component spinors with colour index. 5d vec
 | 
					// Single flavour one component spinors with colour index. 5d vec
 | 
				
			||||||
/////////////////////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -274,7 +274,7 @@ public:
 | 
				
			|||||||
	autoView( Uds_v , Uds, CpuWrite);
 | 
						autoView( Uds_v , Uds, CpuWrite);
 | 
				
			||||||
	autoView( Utmp_v, Utmp, CpuWrite);
 | 
						autoView( Utmp_v, Utmp, CpuWrite);
 | 
				
			||||||
	thread_foreach(ss,Utmp_v,{
 | 
						thread_foreach(ss,Utmp_v,{
 | 
				
			||||||
	    Uds_v[ss](0)(mu+4) = Utmp_v[ss]();
 | 
						    Uds_v[ss](0)(mu+Nd) = Utmp_v[ss]();
 | 
				
			||||||
	  });
 | 
						  });
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      Utmp = Uconj;
 | 
					      Utmp = Uconj;
 | 
				
			||||||
@@ -286,7 +286,7 @@ public:
 | 
				
			|||||||
	autoView( Uds_v , Uds, CpuWrite);
 | 
						autoView( Uds_v , Uds, CpuWrite);
 | 
				
			||||||
	autoView( Utmp_v, Utmp, CpuWrite);
 | 
						autoView( Utmp_v, Utmp, CpuWrite);
 | 
				
			||||||
	thread_foreach(ss,Utmp_v,{
 | 
						thread_foreach(ss,Utmp_v,{
 | 
				
			||||||
	    Uds_v[ss](1)(mu+4) = Utmp_v[ss]();
 | 
						    Uds_v[ss](1)(mu+Nd) = Utmp_v[ss]();
 | 
				
			||||||
        });
 | 
					        });
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -320,7 +320,7 @@ public:
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      Uconj = conjugate(*Upoke);
 | 
					      Uconj = conjugate(*Upoke);
 | 
				
			||||||
      pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + 4);
 | 
					      pokeGparityDoubledGaugeField(Uds, *Upoke, Uconj, mu + Nd);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -36,6 +36,8 @@ public:
 | 
				
			|||||||
  static const std::vector<int> directions;
 | 
					  static const std::vector<int> directions;
 | 
				
			||||||
  static const std::vector<int> displacements;
 | 
					  static const std::vector<int> displacements;
 | 
				
			||||||
  static const int npoint = 16;
 | 
					  static const int npoint = 16;
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDirections(void);
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDisplacements(void);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -40,6 +40,8 @@ public:
 | 
				
			|||||||
  static const std::vector<int> directions;
 | 
					  static const std::vector<int> directions;
 | 
				
			||||||
  static const std::vector<int> displacements;
 | 
					  static const std::vector<int> displacements;
 | 
				
			||||||
  const int npoint = 16;
 | 
					  const int npoint = 16;
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDirections(void);
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDisplacements(void);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -36,6 +36,8 @@ public:
 | 
				
			|||||||
  static const std::vector<int> directions;
 | 
					  static const std::vector<int> directions;
 | 
				
			||||||
  static const std::vector<int> displacements;
 | 
					  static const std::vector<int> displacements;
 | 
				
			||||||
  static const int npoint = 8;
 | 
					  static const int npoint = 8;
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDirections(void);
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDisplacements(void);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -141,9 +141,9 @@ public:
 | 
				
			|||||||
      Udag = Udag *phases;
 | 
					      Udag = Udag *phases;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	InsertGaugeField(Uds,U,mu);
 | 
						InsertGaugeField(Uds,U,mu);
 | 
				
			||||||
	InsertGaugeField(Uds,Udag,mu+4);
 | 
						InsertGaugeField(Uds,Udag,mu+Nd);
 | 
				
			||||||
	//	PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
						//	PokeIndex<LorentzIndex>(Uds, U, mu);
 | 
				
			||||||
	//	PokeIndex<LorentzIndex>(Uds, Udag, mu + 4);
 | 
						//	PokeIndex<LorentzIndex>(Uds, Udag, mu + Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // 3 hop based on thin links. Crazy huh ?
 | 
					      // 3 hop based on thin links. Crazy huh ?
 | 
				
			||||||
      U  = PeekIndex<LorentzIndex>(Uthin, mu);
 | 
					      U  = PeekIndex<LorentzIndex>(Uthin, mu);
 | 
				
			||||||
@@ -156,7 +156,7 @@ public:
 | 
				
			|||||||
      UUUdag = UUUdag *phases;
 | 
					      UUUdag = UUUdag *phases;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	InsertGaugeField(UUUds,UUU,mu);
 | 
						InsertGaugeField(UUUds,UUU,mu);
 | 
				
			||||||
	InsertGaugeField(UUUds,UUUdag,mu+4);
 | 
						InsertGaugeField(UUUds,UUUdag,mu+Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										175
									
								
								Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										175
									
								
								Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,175 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma one 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TwoSpinWilsonFermion3plus1DStatic { 
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  // S-direction is INNERMOST and takes no part in the parity.
 | 
				
			||||||
 | 
					  static const std::vector<int> directions;
 | 
				
			||||||
 | 
					  static const std::vector<int> displacements;
 | 
				
			||||||
 | 
					  static constexpr int npoint = 6;
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDirections(void);
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDisplacements(void);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					class TwoSpinWilsonFermion3plus1D : public TwoSpinWilsonKernels<Impl>, public TwoSpinWilsonFermion3plus1DStatic
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  INHERIT_IMPL_TYPES(Impl);
 | 
				
			||||||
 | 
					  typedef TwoSpinWilsonKernels<Impl> Kernels;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FermionField _tmp;
 | 
				
			||||||
 | 
					  FermionField &tmp(void) { return _tmp; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int Dirichlet;
 | 
				
			||||||
 | 
					  Coordinate Block; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Implement the abstract base
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  GridBase *GaugeGrid(void)              { return _ThreeDimGrid ;}
 | 
				
			||||||
 | 
					  GridBase *GaugeRedBlackGrid(void)      { return _ThreeDimRedBlackGrid ;}
 | 
				
			||||||
 | 
					  GridBase *FermionGrid(void)            { return _FourDimGrid;}
 | 
				
			||||||
 | 
					  GridBase *FermionRedBlackGrid(void)    { return _FourDimRedBlackGrid;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // full checkerboard operations; leave unimplemented as abstract for now
 | 
				
			||||||
 | 
					  virtual void   M    (const FermionField &in, FermionField &out){assert(0);};
 | 
				
			||||||
 | 
					  virtual void   Mdag (const FermionField &in, FermionField &out){assert(0);};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // half checkerboard operations; leave unimplemented as abstract for now
 | 
				
			||||||
 | 
					  virtual void   Meooe       (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					  virtual void   Mooee       (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					  virtual void   MooeeInv    (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual void   MeooeDag    (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					  virtual void   MooeeDag    (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					  virtual void   MooeeInvDag (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					  virtual void   Mdir   (const FermionField &in, FermionField &out,int dir,int disp){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
				
			||||||
 | 
					  virtual void   MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);};   // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // These can be overridden by fancy 5d chiral action
 | 
				
			||||||
 | 
					  virtual void DhopDeriv  (GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
				
			||||||
 | 
					  virtual void DhopDerivEO(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
				
			||||||
 | 
					  virtual void DhopDerivOE(GaugeField &mat,const FermionField &U,const FermionField &V,int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  void MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
				
			||||||
 | 
					  void MomentumSpacePropagatorHt(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
				
			||||||
 | 
					  void MomentumSpacePropagatorHw(FermionField &out,const FermionField &in,RealD mass,std::vector<double> twist) ;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // Implement hopping term non-hermitian hopping term; half cb or both
 | 
				
			||||||
 | 
					  // Implement s-diagonal DW
 | 
				
			||||||
 | 
					  void DW    (const FermionField &in, FermionField &out,int dag);
 | 
				
			||||||
 | 
					  void Dhop  (const FermionField &in, FermionField &out,int dag);
 | 
				
			||||||
 | 
					  void DhopOE(const FermionField &in, FermionField &out,int dag);
 | 
				
			||||||
 | 
					  void DhopEO(const FermionField &in, FermionField &out,int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void DhopComms  (const FermionField &in, FermionField &out);
 | 
				
			||||||
 | 
					  void DhopCalc   (const FermionField &in, FermionField &out,uint64_t *ids);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // add a DhopComm
 | 
				
			||||||
 | 
					  // -- suboptimal interface will presently trigger multiple comms.
 | 
				
			||||||
 | 
					  void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);
 | 
				
			||||||
 | 
					  void DhopDirAll(const FermionField &in,std::vector<FermionField> &out);
 | 
				
			||||||
 | 
					  void DhopDirComms(const FermionField &in);
 | 
				
			||||||
 | 
					  void DhopDirCalc(const FermionField &in, FermionField &out,int point);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // New methods added 
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  void DerivInternal(StencilImpl & st,
 | 
				
			||||||
 | 
							     DoubledGaugeField & U,
 | 
				
			||||||
 | 
							     GaugeField &mat,
 | 
				
			||||||
 | 
							     const FermionField &A,
 | 
				
			||||||
 | 
							     const FermionField &B,
 | 
				
			||||||
 | 
							     int dag);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  void DhopInternal(StencilImpl & st,
 | 
				
			||||||
 | 
							    DoubledGaugeField &U,
 | 
				
			||||||
 | 
							    const FermionField &in, 
 | 
				
			||||||
 | 
							    FermionField &out,
 | 
				
			||||||
 | 
							    int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void DhopInternalOverlappedComms(StencilImpl & st,
 | 
				
			||||||
 | 
									   DoubledGaugeField &U,
 | 
				
			||||||
 | 
									   const FermionField &in, 
 | 
				
			||||||
 | 
									   FermionField &out,
 | 
				
			||||||
 | 
									   int dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void DhopInternalSerialComms(StencilImpl & st,
 | 
				
			||||||
 | 
								       DoubledGaugeField &U,
 | 
				
			||||||
 | 
								       const FermionField &in, 
 | 
				
			||||||
 | 
								       FermionField &out,
 | 
				
			||||||
 | 
								       int dag);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  // Constructors
 | 
				
			||||||
 | 
					  TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
 | 
				
			||||||
 | 
							  GridCartesian         &FourDimGrid,
 | 
				
			||||||
 | 
							  GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
				
			||||||
 | 
							  GridCartesian         &ThreeDimGrid,
 | 
				
			||||||
 | 
							  GridRedBlackCartesian &ThreeDimRedBlackGrid,
 | 
				
			||||||
 | 
							  double _M5,const ImplParams &p= ImplParams());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  virtual void DirichletBlock(const Coordinate & block)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  // DoubleStore
 | 
				
			||||||
 | 
					  void ImportGauge(const GaugeField &_Umu);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Data members require to support the functionality
 | 
				
			||||||
 | 
					  ///////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  // Add these to the support from Wilson
 | 
				
			||||||
 | 
					  GridBase *_ThreeDimGrid;
 | 
				
			||||||
 | 
					  GridBase *_ThreeDimRedBlackGrid;
 | 
				
			||||||
 | 
					  GridBase *_FourDimGrid;
 | 
				
			||||||
 | 
					  GridBase *_FourDimRedBlackGrid;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  double                        M5;
 | 
				
			||||||
 | 
					  int Ls;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  //Defines the stencils for even and odd
 | 
				
			||||||
 | 
					  StencilImpl Stencil; 
 | 
				
			||||||
 | 
					  StencilImpl StencilEven; 
 | 
				
			||||||
 | 
					  StencilImpl StencilOdd; 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  // Copy of the gauge field , with even and odd subsets
 | 
				
			||||||
 | 
					  DoubledGaugeField Umu;
 | 
				
			||||||
 | 
					  DoubledGaugeField UmuEven;
 | 
				
			||||||
 | 
					  DoubledGaugeField UmuOdd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										222
									
								
								Grid/qcd/action/fermion/TwoSpinWilsonImpl.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										222
									
								
								Grid/qcd/action/fermion/TwoSpinWilsonImpl.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,222 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/fermion/FermionOperatorImpl.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
								   /*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					/////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Single flavour four spinors with colour index
 | 
				
			||||||
 | 
					/////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template <class S, class Representation = FundamentalRepresentation,class Options = CoeffReal >
 | 
				
			||||||
 | 
					class TwoSpinWilsonImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation::Dimension > > {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  static const int Dimension = Representation::Dimension;
 | 
				
			||||||
 | 
					  static const bool isFundamental = Representation::isFundamental;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef PeriodicGaugeImpl<GaugeImplTypes<S, Dimension > > Gimpl;
 | 
				
			||||||
 | 
					  INHERIT_GIMPL_TYPES(Gimpl);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  //Necessary?
 | 
				
			||||||
 | 
					  constexpr bool is_fundamental() const{return Dimension == Nc ? 1 : 0;}
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  typedef typename Options::_Coeff_t Coeff_t;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  template <typename vtype> using iImplSpinor            = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
				
			||||||
 | 
					  template <typename vtype> using iImplPropagator        = iScalar<iMatrix<iMatrix<vtype, Dimension>, Nhs> >;
 | 
				
			||||||
 | 
					  template <typename vtype> using iImplHalfSpinor        = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
				
			||||||
 | 
					  template <typename vtype> using iImplHalfCommSpinor    = iScalar<iVector<iVector<vtype, Dimension>, Nhs> >;
 | 
				
			||||||
 | 
					  template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  typedef iImplSpinor<Simd>            SiteSpinor;
 | 
				
			||||||
 | 
					  typedef iImplPropagator<Simd>        SitePropagator;
 | 
				
			||||||
 | 
					  typedef iImplHalfSpinor<Simd>        SiteHalfSpinor;
 | 
				
			||||||
 | 
					  typedef iImplHalfCommSpinor<Simd>    SiteHalfCommSpinor;
 | 
				
			||||||
 | 
					  typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  typedef Lattice<SiteSpinor>            FermionField;
 | 
				
			||||||
 | 
					  typedef Lattice<SitePropagator>        PropagatorField;
 | 
				
			||||||
 | 
					  typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  typedef SimpleCompressor<SiteSpinor> Compressor;
 | 
				
			||||||
 | 
					  typedef WilsonImplParams ImplParams;
 | 
				
			||||||
 | 
					  typedef CartesianStencil<SiteSpinor, SiteSpinor, ImplParams> StencilImpl;
 | 
				
			||||||
 | 
					  typedef const typename StencilImpl::View_type StencilView;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  ImplParams Params;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  TwoSpinWilsonImpl(const ImplParams &p = ImplParams()) : Params(p){
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<class _Spinor>
 | 
				
			||||||
 | 
					  static accelerator_inline void multLink(_Spinor &phi,
 | 
				
			||||||
 | 
										  const SiteDoubledGaugeField &U,
 | 
				
			||||||
 | 
										  const _Spinor &chi,
 | 
				
			||||||
 | 
										  int mu) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    auto UU = coalescedRead(U(mu));
 | 
				
			||||||
 | 
					    mult(&phi(), &UU, &chi());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  template<class _Spinor>
 | 
				
			||||||
 | 
					  static accelerator_inline void multLink(_Spinor &phi,
 | 
				
			||||||
 | 
										  const SiteDoubledGaugeField &U,
 | 
				
			||||||
 | 
										  const _Spinor &chi,
 | 
				
			||||||
 | 
										  int mu,
 | 
				
			||||||
 | 
										  StencilEntry *SE,
 | 
				
			||||||
 | 
										  StencilView &St) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    multLink(phi,U,chi,mu);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<class _SpinorField> 
 | 
				
			||||||
 | 
					  inline void multLinkField(_SpinorField & out,
 | 
				
			||||||
 | 
								    const DoubledGaugeField &Umu,
 | 
				
			||||||
 | 
								    const _SpinorField & phi,
 | 
				
			||||||
 | 
								    int mu)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    const int Nsimd = SiteHalfSpinor::Nsimd();
 | 
				
			||||||
 | 
					    autoView( out_v, out, AcceleratorWrite);
 | 
				
			||||||
 | 
					    autoView( phi_v, phi, AcceleratorRead);
 | 
				
			||||||
 | 
					    autoView( Umu_v, Umu, AcceleratorRead);
 | 
				
			||||||
 | 
					    typedef decltype(coalescedRead(out_v[0]))   calcSpinor;
 | 
				
			||||||
 | 
					    accelerator_for(sss,out.Grid()->oSites(),Nsimd,{
 | 
				
			||||||
 | 
						calcSpinor tmp;
 | 
				
			||||||
 | 
						multLink(tmp,Umu_v[sss],phi_v(sss),mu);
 | 
				
			||||||
 | 
						coalescedWrite(out_v[sss],tmp);
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
										   
 | 
				
			||||||
 | 
					  template <class ref>
 | 
				
			||||||
 | 
					  static accelerator_inline void loadLinkElement(Simd ®, ref &memory) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    reg = memory;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					  inline void DoubleStore(GridBase *GaugeGrid,
 | 
				
			||||||
 | 
								  DoubledGaugeField &Uds,
 | 
				
			||||||
 | 
								  const GaugeField &Umu) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    typedef typename Simd::scalar_type scalar_type;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conformable(Uds.Grid(), GaugeGrid);
 | 
				
			||||||
 | 
					    conformable(Umu.Grid(), GaugeGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    GaugeLinkField U(GaugeGrid);
 | 
				
			||||||
 | 
					    GaugeLinkField tmp(GaugeGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Lattice<iScalar<vInteger> > coor(GaugeGrid);
 | 
				
			||||||
 | 
					      ////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					      // apply any boundary phase or twists
 | 
				
			||||||
 | 
					      ////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						////////// boundary phase /////////////
 | 
				
			||||||
 | 
					      auto pha = Params.boundary_phases[mu];
 | 
				
			||||||
 | 
					      scalar_type phase( real(pha),imag(pha) );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						int L   = GaugeGrid->GlobalDimensions()[mu];
 | 
				
			||||||
 | 
					        int Lmu = L - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      LatticeCoordinate(coor, mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      U = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// apply any twists
 | 
				
			||||||
 | 
						RealD theta = Params.twist_n_2pi_L[mu] * 2*M_PI / L;
 | 
				
			||||||
 | 
						if ( theta != 0.0) { 
 | 
				
			||||||
 | 
						  scalar_type twphase(::cos(theta),::sin(theta));
 | 
				
			||||||
 | 
						  U = twphase*U;
 | 
				
			||||||
 | 
						  std::cout << GridLogMessage << " Twist ["<<mu<<"] "<< Params.twist_n_2pi_L[mu]<< " phase"<<phase <<std::endl;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      tmp = where(coor == Lmu, phase * U, U);
 | 
				
			||||||
 | 
					      PokeIndex<LorentzIndex>(Uds, tmp, mu);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      U = adj(Cshift(U, mu, -1));
 | 
				
			||||||
 | 
					      U = where(coor == 0, conjugate(phase) * U, U); 
 | 
				
			||||||
 | 
					      PokeIndex<LorentzIndex>(Uds, U, mu + Nd);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  inline void InsertForce4D(GaugeField &mat, FermionField &Btilde, FermionField &A,int mu){
 | 
				
			||||||
 | 
					    GaugeLinkField link(mat.Grid());
 | 
				
			||||||
 | 
					    link = TraceIndex<SpinIndex>(outerProduct(Btilde,A)); 
 | 
				
			||||||
 | 
					    PokeIndex<LorentzIndex>(mat,link,mu);
 | 
				
			||||||
 | 
					  }   
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    inline void outerProductImpl(PropagatorField &mat, const FermionField &B, const FermionField &A){
 | 
				
			||||||
 | 
					      mat = outerProduct(B,A); 
 | 
				
			||||||
 | 
					    }  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    inline void TraceSpinImpl(GaugeLinkField &mat, PropagatorField&P) {
 | 
				
			||||||
 | 
					      mat = TraceIndex<SpinIndex>(P); 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    inline void extractLinkField(std::vector<GaugeLinkField> &mat, DoubledGaugeField &Uds)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      for (int mu = 0; mu < Nd; mu++)
 | 
				
			||||||
 | 
					      mat[mu] = PeekIndex<LorentzIndex>(Uds, mu);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  inline void InsertForce5D(GaugeField &mat, FermionField &Btilde, FermionField Ã,int mu)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    int Ls=Btilde.Grid()->_fdimensions[0];
 | 
				
			||||||
 | 
					    autoView( mat_v , mat, AcceleratorWrite);
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					      autoView( Btilde_v , Btilde, AcceleratorRead);
 | 
				
			||||||
 | 
					      autoView( Atilde_v , Atilde, AcceleratorRead);
 | 
				
			||||||
 | 
					      accelerator_for(sss,mat.Grid()->oSites(),Nsimd,{
 | 
				
			||||||
 | 
						  int sU=sss;
 | 
				
			||||||
 | 
					  	  typedef decltype(coalescedRead(mat_v[sU](mu)() )) ColorMatrixType;
 | 
				
			||||||
 | 
					  	  ColorMatrixType sum;
 | 
				
			||||||
 | 
						  zeroit(sum);  
 | 
				
			||||||
 | 
						  for(int s=0;s<Ls;s++){
 | 
				
			||||||
 | 
						    int sF = s+Ls*sU;
 | 
				
			||||||
 | 
					  	    for(int spn=0;spn<Ns;spn++){ //sum over spin
 | 
				
			||||||
 | 
					  	      auto bb = coalescedRead(Btilde_v[sF]()(spn) ); //color vector
 | 
				
			||||||
 | 
					  	      auto aa = coalescedRead(Atilde_v[sF]()(spn) );
 | 
				
			||||||
 | 
						      auto op = outerProduct(bb,aa);
 | 
				
			||||||
 | 
					  	      sum = sum + op;
 | 
				
			||||||
 | 
						    }
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
					  	  coalescedWrite(mat_v[sU](mu)(), sum);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					typedef TwoSpinWilsonImpl<vComplex,  FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplR;  // Real.. whichever prec
 | 
				
			||||||
 | 
					typedef TwoSpinWilsonImpl<vComplexF, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplF;  // Float
 | 
				
			||||||
 | 
					typedef TwoSpinWilsonImpl<vComplexD, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD;  // Double
 | 
				
			||||||
 | 
					typedef TwoSpinWilsonImpl<vComplexD2, FundamentalRepresentation, CoeffReal > TwoSpinWilsonImplD2;  // Double
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
							
								
								
									
										84
									
								
								Grid/qcd/action/fermion/TwoSpinWilsonKernels.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								Grid/qcd/action/fermion/TwoSpinWilsonKernels.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,84 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/fermion/WilsonKernels.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
								   /*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Helper routines that implement Wilson stencil for a single site.
 | 
				
			||||||
 | 
					// Common to both the WilsonFermion and WilsonFermion5D
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					template<class Impl> class TwoSpinWilsonKernels : public FermionOperator<Impl>  { 
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  INHERIT_IMPL_TYPES(Impl);
 | 
				
			||||||
 | 
					  typedef FermionOperator<Impl> Base;
 | 
				
			||||||
 | 
					  typedef AcceleratorVector<int,STENCIL_MAX> StencilVector;   
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void DhopKernel(StencilImpl &st,  DoubledGaugeField &U, SiteSpinor * buf,
 | 
				
			||||||
 | 
								 int Ls, int Nsite, const FermionField &in, FermionField &out,
 | 
				
			||||||
 | 
								 int interior=1,int exterior=1) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void DhopKernel(StencilImpl &st,  DoubledGaugeField &U, SiteSpinor * buf,
 | 
				
			||||||
 | 
								 int Ls, int Nsite, const FermionField &in, FermionField &out,
 | 
				
			||||||
 | 
								 uint64_t *ids);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  static void DhopDagKernel(StencilImpl &st,  DoubledGaugeField &U, SiteSpinor * buf,
 | 
				
			||||||
 | 
								    int Ls, int Nsite, const FermionField &in, FermionField &out,
 | 
				
			||||||
 | 
								    int interior=1,int exterior=1) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
 | 
				
			||||||
 | 
								  int Nsite, const FermionField &in, std::vector<FermionField> &out) ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static void DhopDirKernel(StencilImpl &st, DoubledGaugeField &U,SiteSpinor * buf,
 | 
				
			||||||
 | 
								    int Ls, int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					private:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirK(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor * buf,
 | 
				
			||||||
 | 
									   int sF, int sU, const FermionFieldView &in, FermionFieldView &out, int dirdisp, int gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirXp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirYp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirZp(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirXm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirYm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
				
			||||||
 | 
					  static accelerator_inline void DhopDirZm(StencilView &st,DoubledGaugeFieldView &U,SiteSpinor *buf,int sF,int sU,const FermionFieldView &in,FermionFieldView &out,int dirdisp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  TwoSpinWilsonKernels(const ImplParams &p = ImplParams()) : Base(p){};
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -38,6 +38,8 @@ public:
 | 
				
			|||||||
  static int MortonOrder;
 | 
					  static int MortonOrder;
 | 
				
			||||||
  static const std::vector<int> directions;
 | 
					  static const std::vector<int> directions;
 | 
				
			||||||
  static const std::vector<int> displacements;
 | 
					  static const std::vector<int> displacements;
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDirections(void);
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDisplacements(void);
 | 
				
			||||||
  static const int npoint = 8;
 | 
					  static const int npoint = 8;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -62,6 +62,8 @@ public:
 | 
				
			|||||||
  static const std::vector<int> directions;
 | 
					  static const std::vector<int> directions;
 | 
				
			||||||
  static const std::vector<int> displacements;
 | 
					  static const std::vector<int> displacements;
 | 
				
			||||||
  static constexpr int npoint = 8;
 | 
					  static constexpr int npoint = 8;
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDirections(void);
 | 
				
			||||||
 | 
					  static std::vector<int> MakeDisplacements(void);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -166,7 +166,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      U = adj(Cshift(U, mu, -1));
 | 
					      U = adj(Cshift(U, mu, -1));
 | 
				
			||||||
      U = where(coor == 0, conjugate(phase) * U, U); 
 | 
					      U = where(coor == 0, conjugate(phase) * U, U); 
 | 
				
			||||||
      PokeIndex<LorentzIndex>(Uds, U, mu + 4);
 | 
					      PokeIndex<LorentzIndex>(Uds, U, mu + Nd);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -56,7 +56,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
 | 
				
			|||||||
			Frbgrid,
 | 
								Frbgrid,
 | 
				
			||||||
			Ugrid,
 | 
								Ugrid,
 | 
				
			||||||
			Urbgrid,
 | 
								Urbgrid,
 | 
				
			||||||
			4.0,p)
 | 
								Nd*1.0,p)
 | 
				
			||||||
   
 | 
					   
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      update(_mass,_mu);
 | 
					      update(_mass,_mu);
 | 
				
			||||||
@@ -83,7 +83,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
 | 
				
			|||||||
    out.Checkerboard() = in.Checkerboard();
 | 
					    out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
    //axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
 | 
					    //axpibg5x(out,in,a,b); // out = a*in + b*i*G5*in
 | 
				
			||||||
    for (int s=0;s<(int)this->mass.size();s++) {
 | 
					    for (int s=0;s<(int)this->mass.size();s++) {
 | 
				
			||||||
      ComplexD a = 4.0+this->mass[s];
 | 
					      ComplexD a = Nd*1.0+this->mass[s];
 | 
				
			||||||
      ComplexD b(0.0,this->mu[s]);
 | 
					      ComplexD b(0.0,this->mu[s]);
 | 
				
			||||||
      axpbg5y_ssp(out,a,in,b,in,s,s);
 | 
					      axpbg5y_ssp(out,a,in,b,in,s,s);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -92,7 +92,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
 | 
				
			|||||||
  virtual void MooeeDag(const FermionField &in, FermionField &out) {
 | 
					  virtual void MooeeDag(const FermionField &in, FermionField &out) {
 | 
				
			||||||
    out.Checkerboard() = in.Checkerboard();
 | 
					    out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
    for (int s=0;s<(int)this->mass.size();s++) {
 | 
					    for (int s=0;s<(int)this->mass.size();s++) {
 | 
				
			||||||
      ComplexD a = 4.0+this->mass[s];
 | 
					      ComplexD a = Nd*1.0+this->mass[s];
 | 
				
			||||||
      ComplexD b(0.0,-this->mu[s]);
 | 
					      ComplexD b(0.0,-this->mu[s]);
 | 
				
			||||||
      axpbg5y_ssp(out,a,in,b,in,s,s);
 | 
					      axpbg5y_ssp(out,a,in,b,in,s,s);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -101,7 +101,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
 | 
				
			|||||||
    for (int s=0;s<(int)this->mass.size();s++) {
 | 
					    for (int s=0;s<(int)this->mass.size();s++) {
 | 
				
			||||||
      RealD m    = this->mass[s];
 | 
					      RealD m    = this->mass[s];
 | 
				
			||||||
      RealD tm   = this->mu[s];
 | 
					      RealD tm   = this->mu[s];
 | 
				
			||||||
      RealD mtil = 4.0+this->mass[s];
 | 
					      RealD mtil = Nd*1.0+this->mass[s];
 | 
				
			||||||
      RealD sq   = mtil*mtil+tm*tm;
 | 
					      RealD sq   = mtil*mtil+tm*tm;
 | 
				
			||||||
      ComplexD a    = mtil/sq;
 | 
					      ComplexD a    = mtil/sq;
 | 
				
			||||||
      ComplexD b(0.0, -tm /sq);
 | 
					      ComplexD b(0.0, -tm /sq);
 | 
				
			||||||
@@ -112,7 +112,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
 | 
				
			|||||||
    for (int s=0;s<(int)this->mass.size();s++) {
 | 
					    for (int s=0;s<(int)this->mass.size();s++) {
 | 
				
			||||||
      RealD m    = this->mass[s];
 | 
					      RealD m    = this->mass[s];
 | 
				
			||||||
      RealD tm   = this->mu[s];
 | 
					      RealD tm   = this->mu[s];
 | 
				
			||||||
      RealD mtil = 4.0+this->mass[s];
 | 
					      RealD mtil = Nd*1.0+this->mass[s];
 | 
				
			||||||
      RealD sq   = mtil*mtil+tm*tm;
 | 
					      RealD sq   = mtil*mtil+tm*tm;
 | 
				
			||||||
      ComplexD a    = mtil/sq;
 | 
					      ComplexD a    = mtil/sq;
 | 
				
			||||||
      ComplexD b(0.0,tm /sq);
 | 
					      ComplexD b(0.0,tm /sq);
 | 
				
			||||||
@@ -126,7 +126,7 @@ class WilsonTMFermion5D : public WilsonFermion5D<Impl>
 | 
				
			|||||||
    this->Dhop(in, out, DaggerNo);
 | 
					    this->Dhop(in, out, DaggerNo);
 | 
				
			||||||
    FermionField tmp(out.Grid());
 | 
					    FermionField tmp(out.Grid());
 | 
				
			||||||
    for (int s=0;s<(int)this->mass.size();s++) {
 | 
					    for (int s=0;s<(int)this->mass.size();s++) {
 | 
				
			||||||
      ComplexD a = 4.0+this->mass[s];
 | 
					      ComplexD a = Nd*1.0+this->mass[s];
 | 
				
			||||||
      ComplexD b(0.0,this->mu[s]);
 | 
					      ComplexD b(0.0,this->mu[s]);
 | 
				
			||||||
      axpbg5y_ssp(tmp,a,in,b,in,s,s);
 | 
					      axpbg5y_ssp(tmp,a,in,b,in,s,s);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -240,7 +240,7 @@ void DomainWallEOFAFermion<Impl>::SetCoefficientsInternal(RealD zolo_hi, std::ve
 | 
				
			|||||||
  this->ceo.resize(Ls);
 | 
					  this->ceo.resize(Ls);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  for(int i=0; i<Ls; ++i){
 | 
					  for(int i=0; i<Ls; ++i){
 | 
				
			||||||
    this->bee[i] = 4.0 - this->M5 + 1.0;
 | 
					    this->bee[i] = Nd*1.0 - this->M5 + 1.0;
 | 
				
			||||||
    this->cee[i] = 1.0;
 | 
					    this->cee[i] = 1.0;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,486 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/qcd/action/fermion/TwoSpinWilsonFermion2plus1D.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
 | 
				
			||||||
 | 
					#include <Grid/perfmon/PerfCount.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // 5d lattice for DWF.
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					TwoSpinWilsonFermion3plus15D<Impl>::TwoSpinWilsonFermion3plus1D(GaugeField &_Umu,
 | 
				
			||||||
 | 
													GridCartesian         &FourDimGrid,
 | 
				
			||||||
 | 
													GridRedBlackCartesian &FourDimRedBlackGrid,
 | 
				
			||||||
 | 
													GridCartesian         &ThreeDimGrid,
 | 
				
			||||||
 | 
													GridRedBlackCartesian &ThreeDimRedBlackGrid,
 | 
				
			||||||
 | 
					               RealD _M5,const ImplParams &p) :
 | 
				
			||||||
 | 
					  Kernels(p),
 | 
				
			||||||
 | 
					  _FourDimGrid        (&FourDimGrid),
 | 
				
			||||||
 | 
					  _FourDimRedBlackGrid(&FourDimRedBlackGrid),
 | 
				
			||||||
 | 
					  _ThreeDimGrid        (&ThreeDimGrid),
 | 
				
			||||||
 | 
					  _ThreeDimRedBlackGrid(&ThreeDimRedBlackGrid),
 | 
				
			||||||
 | 
					  Stencil    (_FourDimGrid,npoint,Even,directions,displacements,p),
 | 
				
			||||||
 | 
					  StencilEven(_FourDimRedBlackGrid,npoint,Even,directions,displacements,p), // source is Even
 | 
				
			||||||
 | 
					  StencilOdd (_FourDimRedBlackGrid,npoint,Odd ,directions,displacements,p), // source is Odd
 | 
				
			||||||
 | 
					  M5(_M5),
 | 
				
			||||||
 | 
					  Umu(_ThreeDimGrid),
 | 
				
			||||||
 | 
					  UmuEven(_ThreeDimRedBlackGrid),
 | 
				
			||||||
 | 
					  UmuOdd (_ThreeDimRedBlackGrid),
 | 
				
			||||||
 | 
					  _tmp(&FourDimRedBlackGrid),
 | 
				
			||||||
 | 
					  Dirichlet(0)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  // some assertions
 | 
				
			||||||
 | 
					  assert(FourDimGrid._ndimension==Nd+1);
 | 
				
			||||||
 | 
					  assert(ThreeDimGrid._ndimension==Nd);
 | 
				
			||||||
 | 
					  assert(ThreeDimRedBlackGrid._ndimension==Nd);
 | 
				
			||||||
 | 
					  assert(FourDimRedBlackGrid._ndimension==Nd+1);
 | 
				
			||||||
 | 
					  assert(FourDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // extent of fifth dim and not spread out
 | 
				
			||||||
 | 
					  Ls=FourDimGrid._fdimensions[0];
 | 
				
			||||||
 | 
					  assert(FourDimRedBlackGrid._fdimensions[0]==Ls);
 | 
				
			||||||
 | 
					  assert(FourDimGrid._processors[0]         ==1);
 | 
				
			||||||
 | 
					  assert(FourDimRedBlackGrid._processors[0] ==1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Other dimensions must match the decomposition of the four-D fields 
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(FourDimGrid._processors[d+1]         ==ThreeDimGrid._processors[d]);
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._processors[d+1] ==ThreeDimGrid._processors[d]);
 | 
				
			||||||
 | 
					    assert(ThreeDimRedBlackGrid._processors[d]   ==ThreeDimGrid._processors[d]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(FourDimGrid._fdimensions[d+1]        ==ThreeDimGrid._fdimensions[d]);
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._fdimensions[d+1]==ThreeDimGrid._fdimensions[d]);
 | 
				
			||||||
 | 
					    assert(ThreeDimRedBlackGrid._fdimensions[d]  ==ThreeDimGrid._fdimensions[d]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert(FourDimGrid._simd_layout[d+1]        ==ThreeDimGrid._simd_layout[d]);
 | 
				
			||||||
 | 
					    assert(FourDimRedBlackGrid._simd_layout[d+1]==ThreeDimGrid._simd_layout[d]);
 | 
				
			||||||
 | 
					    assert(ThreeDimRedBlackGrid._simd_layout[d]  ==ThreeDimGrid._simd_layout[d]);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if ( p.dirichlet.size() == Nd+1) {
 | 
				
			||||||
 | 
					    Coordinate block = p.dirichlet;
 | 
				
			||||||
 | 
					    for(int d=0;d<Nd+1;d++) {
 | 
				
			||||||
 | 
					      if ( block[d] ){
 | 
				
			||||||
 | 
						Dirichlet = 1;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
 | 
				
			||||||
 | 
						std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
 | 
				
			||||||
 | 
						Block = block;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    Coordinate block(Nd+1,0);
 | 
				
			||||||
 | 
					    Block = block;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Dimension zero of the five-d is the Ls direction
 | 
				
			||||||
 | 
					  assert(FourDimRedBlackGrid._simd_layout[0]==1);
 | 
				
			||||||
 | 
					  assert(FourDimGrid._simd_layout[0]        ==1);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  // Allocate the required comms buffer
 | 
				
			||||||
 | 
					  ImportGauge(_Umu);
 | 
				
			||||||
 | 
					  // Build lists of exterior only nodes
 | 
				
			||||||
 | 
					  int LLs = FourDimGrid._rdimensions[0];
 | 
				
			||||||
 | 
					  int vol3;
 | 
				
			||||||
 | 
					  vol3=ThreeDimGrid.oSites();
 | 
				
			||||||
 | 
					  Stencil.BuildSurfaceList(LLs,vol3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  vol3=ThreeDimRedBlackGrid.oSites();
 | 
				
			||||||
 | 
					  StencilEven.BuildSurfaceList(LLs,vol3);
 | 
				
			||||||
 | 
					   StencilOdd.BuildSurfaceList(LLs,vol3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::ImportGauge(const GaugeField &_Umu)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GaugeField HUmu(_Umu.Grid());
 | 
				
			||||||
 | 
					  HUmu = _Umu*(-0.5);
 | 
				
			||||||
 | 
					  Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
 | 
				
			||||||
 | 
					  pickCheckerboard(Even,UmuEven,Umu);
 | 
				
			||||||
 | 
					  pickCheckerboard(Odd ,UmuOdd,Umu);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopDir(const FermionField &in, FermionField &out,int dir5,int disp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int dir = dir5-1; // Maps to the ordering above in "directions" that is passed to stencil
 | 
				
			||||||
 | 
					                    // we drop off the innermost fifth dimension
 | 
				
			||||||
 | 
					  //  assert( (disp==1)||(disp==-1) );
 | 
				
			||||||
 | 
					  //  assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int skip = (disp==1) ? 0 : 1;
 | 
				
			||||||
 | 
					  int dirdisp = dir+skip*Nd;
 | 
				
			||||||
 | 
					  int gamma   = dir+(1-skip)*Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Compressor compressor(DaggerNo);
 | 
				
			||||||
 | 
					  Stencil.HaloExchange(in,compressor);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  uint64_t Nsite = Umu.Grid()->oSites();
 | 
				
			||||||
 | 
					  Kernels::DhopDirKernel(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out,dirdisp,gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopDirAll(const FermionField &in, std::vector<FermionField> &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Compressor compressor(DaggerNo);
 | 
				
			||||||
 | 
					  Stencil.HaloExchange(in,compressor);
 | 
				
			||||||
 | 
					  uint64_t Nsite = Umu.Grid()->oSites();
 | 
				
			||||||
 | 
					  Kernels::DhopDirAll(Stencil,Umu,Stencil.CommBuf(),Ls,Nsite,in,out);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DerivInternal(StencilImpl & st,
 | 
				
			||||||
 | 
										  DoubledGaugeField & U,
 | 
				
			||||||
 | 
										  GaugeField &mat,
 | 
				
			||||||
 | 
										  const FermionField &A,
 | 
				
			||||||
 | 
										  const FermionField &B,
 | 
				
			||||||
 | 
										  int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert((dag==DaggerNo) ||(dag==DaggerYes));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  conformable(st.Grid(),A.Grid());
 | 
				
			||||||
 | 
					  conformable(st.Grid(),B.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Compressor compressor(dag);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  FermionField Btilde(B.Grid());
 | 
				
			||||||
 | 
					  FermionField Atilde(B.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  st.HaloExchange(B,compressor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Atilde=A;
 | 
				
			||||||
 | 
					  int LLs = B.Grid()->_rdimensions[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for (int mu = 0; mu < Nd; mu++) {
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Flip gamma if dag
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    int gamma = mu;
 | 
				
			||||||
 | 
					    if (!dag) gamma += Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////
 | 
				
			||||||
 | 
					    // Call the single hop
 | 
				
			||||||
 | 
					    ////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int Usites = U.Grid()->oSites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    // spin trace outer product
 | 
				
			||||||
 | 
					    ////////////////////////////
 | 
				
			||||||
 | 
					    Impl::InsertForce5D(mat, Btilde, Atilde, mu);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopDeriv(GaugeField &mat,
 | 
				
			||||||
 | 
					                                      const FermionField &A,
 | 
				
			||||||
 | 
					                                      const FermionField &B,
 | 
				
			||||||
 | 
					                                      int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(A.Grid(),FermionGrid());  
 | 
				
			||||||
 | 
					  conformable(A.Grid(),B.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //conformable(GaugeGrid(),mat.Grid());// this is not general! leaving as a comment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  mat.Checkerboard() = A.Checkerboard();
 | 
				
			||||||
 | 
					  //  mat.checkerboard = A.checkerboard;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DerivInternal(Stencil,Umu,mat,A,B,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivEO(GaugeField &mat,
 | 
				
			||||||
 | 
					                                        const FermionField &A,
 | 
				
			||||||
 | 
					                                        const FermionField &B,
 | 
				
			||||||
 | 
					                                        int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(A.Grid(),FermionRedBlackGrid());
 | 
				
			||||||
 | 
					  conformable(A.Grid(),B.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(B.Checkerboard()==Odd);
 | 
				
			||||||
 | 
					  assert(A.Checkerboard()==Even);
 | 
				
			||||||
 | 
					  mat.Checkerboard() = Even;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DerivInternal(StencilOdd,UmuEven,mat,A,B,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopDerivOE(GaugeField &mat,
 | 
				
			||||||
 | 
					                                        const FermionField &A,
 | 
				
			||||||
 | 
					                                        const FermionField &B,
 | 
				
			||||||
 | 
					                                        int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(A.Grid(),FermionRedBlackGrid());
 | 
				
			||||||
 | 
					  conformable(A.Grid(),B.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(B.Checkerboard()==Even);
 | 
				
			||||||
 | 
					  assert(A.Checkerboard()==Odd);
 | 
				
			||||||
 | 
					  mat.Checkerboard() = Odd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DerivInternal(StencilEven,UmuOdd,mat,A,B,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternal(StencilImpl & st,
 | 
				
			||||||
 | 
					                                         DoubledGaugeField & U,
 | 
				
			||||||
 | 
					                                         const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  DhopInternalSerialComms(st,U,in,out,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalOverlappedComms(StencilImpl & st,
 | 
				
			||||||
 | 
												DoubledGaugeField & U,
 | 
				
			||||||
 | 
												const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GRID_TRACE("DhopInternalOverlappedComms");
 | 
				
			||||||
 | 
					  Compressor compressor(dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int LLs = in.Grid()->_rdimensions[0];
 | 
				
			||||||
 | 
					  int len =  U.Grid()->oSites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // Start comms  // Gather intranode and extra node differentiated??
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    //    std::cout << " TwoSpinWilsonFermion3plus1D gather " <<std::endl;
 | 
				
			||||||
 | 
					    GRID_TRACE("Gather");
 | 
				
			||||||
 | 
					    st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Communicate Begin " <<std::endl;
 | 
				
			||||||
 | 
					  std::vector<std::vector<CommsRequest_t> > requests;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if 1
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // Overlap with comms
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  st.CommunicateBegin(requests);
 | 
				
			||||||
 | 
					  st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms 
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // do the compute interior
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  if (dag == DaggerYes) {
 | 
				
			||||||
 | 
					    GRID_TRACE("DhopDagInterior");
 | 
				
			||||||
 | 
					    Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    GRID_TRACE("DhopInterior");
 | 
				
			||||||
 | 
					    Kernels::DhopKernel   (st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //ifdef GRID_ACCELERATED
 | 
				
			||||||
 | 
					#if 0
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // Overlap with comms -- on GPU the interior kernel call is nonblocking
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  st.CommunicateBegin(requests);
 | 
				
			||||||
 | 
					  st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // Complete comms
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Comms Complete " <<std::endl;
 | 
				
			||||||
 | 
					  st.CommunicateComplete(requests);
 | 
				
			||||||
 | 
					  //  traceStop(id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // do the compute exterior
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    //    std::cout << " TwoSpinWilsonFermion3plus1D Comms Merge " <<std::endl;
 | 
				
			||||||
 | 
					    GRID_TRACE("Merge");
 | 
				
			||||||
 | 
					    st.CommsMerge(compressor);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Exterior " <<std::endl;
 | 
				
			||||||
 | 
					  if (dag == DaggerYes) {
 | 
				
			||||||
 | 
					    GRID_TRACE("DhopDagExterior");
 | 
				
			||||||
 | 
					    Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    GRID_TRACE("DhopExterior");
 | 
				
			||||||
 | 
					    Kernels::DhopKernel   (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopInternalSerialComms(StencilImpl & st, 
 | 
				
			||||||
 | 
											    DoubledGaugeField & U,
 | 
				
			||||||
 | 
											    const FermionField &in, 
 | 
				
			||||||
 | 
											    FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  GRID_TRACE("DhopInternalSerialComms");
 | 
				
			||||||
 | 
					  Compressor compressor(dag);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int LLs = in.Grid()->_rdimensions[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Halo exch " <<std::endl;
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    GRID_TRACE("HaloExchange");
 | 
				
			||||||
 | 
					    st.HaloExchangeOpt(in,compressor);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Dhop " <<std::endl;
 | 
				
			||||||
 | 
					  if (dag == DaggerYes) {
 | 
				
			||||||
 | 
					    GRID_TRACE("DhopDag");
 | 
				
			||||||
 | 
					    Kernels::DhopDagKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    GRID_TRACE("Dhop");
 | 
				
			||||||
 | 
					    Kernels::DhopKernel(st,U,st.CommBuf(),LLs,U.oSites(),in,out);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  //  std::cout << " TwoSpinWilsonFermion3plus1D Done " <<std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(in.Grid(),FermionRedBlackGrid());    // verifies half grid
 | 
				
			||||||
 | 
					  conformable(in.Grid(),out.Grid()); // drops the cb check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(in.Checkerboard()==Even);
 | 
				
			||||||
 | 
					  out.Checkerboard() = Odd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DhopInternal(StencilEven,UmuOdd,in,out,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(in.Grid(),FermionRedBlackGrid());    // verifies half grid
 | 
				
			||||||
 | 
					  conformable(in.Grid(),out.Grid()); // drops the cb check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  assert(in.Checkerboard()==Odd);
 | 
				
			||||||
 | 
					  out.Checkerboard() = Even;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DhopInternal(StencilOdd,UmuEven,in,out,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopComms(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  int dag =0 ;
 | 
				
			||||||
 | 
					  conformable(in.Grid(),FermionGrid()); // verifies full grid
 | 
				
			||||||
 | 
					  conformable(in.Grid(),out.Grid());
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					  Compressor compressor(dag);
 | 
				
			||||||
 | 
					  Stencil.HaloExchangeOpt(in,compressor);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DhopCalc(const FermionField &in, FermionField &out,uint64_t *ids)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(in.Grid(),FermionGrid()); // verifies full grid
 | 
				
			||||||
 | 
					  conformable(in.Grid(),out.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int LLs = in.Grid()->_rdimensions[0];
 | 
				
			||||||
 | 
					  Kernels::DhopKernel(Stencil,Umu,Stencil.CommBuf(),LLs,Umu.oSites(),in,out,ids);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  conformable(in.Grid(),FermionGrid()); // verifies full grid
 | 
				
			||||||
 | 
					  conformable(in.Grid(),out.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  DhopInternal(Stencil,Umu,in,out,dag);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::DW(const FermionField &in, FermionField &out,int dag)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  out.Checkerboard()=in.Checkerboard();
 | 
				
			||||||
 | 
					  Dhop(in,out,dag); // -0.5 is included
 | 
				
			||||||
 | 
					  axpy(out,Nd*1.0-M5,in,out);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  if (in.Checkerboard() == Odd) {
 | 
				
			||||||
 | 
					    DhopEO(in, out, DaggerNo);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    DhopOE(in, out, DaggerNo);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  if (in.Checkerboard() == Odd) {
 | 
				
			||||||
 | 
					    DhopEO(in, out, DaggerYes);
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    DhopOE(in, out, DaggerYes);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					  typename FermionField::scalar_type scal(Nd*1.0 + M5);
 | 
				
			||||||
 | 
					  out = scal * in;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					  Mooee(in, out);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					  out = (1.0/(Nd*1.0 + M5))*in;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonFermion3plus1D<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
 | 
					  MooeeInv(in,out);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -0,0 +1,441 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/fermion/TwoSpinWilsonKernels.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Generic implementation; move to different file?
 | 
				
			||||||
 | 
					////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GENERIC_STENCIL_LEG(Dir,spProj,Recon)			\
 | 
				
			||||||
 | 
					  SE = st.GetEntry(ptype, Dir, sF);				\
 | 
				
			||||||
 | 
					  if (SE->_is_local) {						\
 | 
				
			||||||
 | 
					    int perm= SE->_permute;					\
 | 
				
			||||||
 | 
					    auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	\
 | 
				
			||||||
 | 
					    spProj(chi,tmp);						\
 | 
				
			||||||
 | 
					  } else {							\
 | 
				
			||||||
 | 
					    chi = coalescedRead(buf[SE->_offset],lane);			\
 | 
				
			||||||
 | 
					  }								\
 | 
				
			||||||
 | 
					  acceleratorSynchronise();					\
 | 
				
			||||||
 | 
					  Impl::multLink(Uchi, U[sU], chi, Dir, SE, st);		\
 | 
				
			||||||
 | 
					  Recon(result, Uchi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GENERIC_STENCIL_LEG_INT(Dir,spProj,Recon)		\
 | 
				
			||||||
 | 
					  SE = st.GetEntry(ptype, Dir, sF);				\
 | 
				
			||||||
 | 
					  if (SE->_is_local) {						\
 | 
				
			||||||
 | 
					    int perm= SE->_permute;					\
 | 
				
			||||||
 | 
					    auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	\
 | 
				
			||||||
 | 
					    spProj(chi,tmp);							\
 | 
				
			||||||
 | 
					    Impl::multLink(Uchi, U[sU], chi, Dir, SE, st);			\
 | 
				
			||||||
 | 
					    Recon(result, Uchi);						\
 | 
				
			||||||
 | 
					  }									\
 | 
				
			||||||
 | 
					  acceleratorSynchronise();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon)		\
 | 
				
			||||||
 | 
					  SE = st.GetEntry(ptype, Dir, sF);				\
 | 
				
			||||||
 | 
					  if (!SE->_is_local ) {		\
 | 
				
			||||||
 | 
					    auto chi = coalescedRead(buf[SE->_offset],lane);		\
 | 
				
			||||||
 | 
					    Impl::multLink(Uchi, U[sU], chi, Dir, SE, st);		\
 | 
				
			||||||
 | 
					    Recon(result, Uchi);					\
 | 
				
			||||||
 | 
					    nmu++;							\
 | 
				
			||||||
 | 
					  }								\
 | 
				
			||||||
 | 
					  acceleratorSynchronise();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon)		\
 | 
				
			||||||
 | 
					    if (SE->_is_local ) {					\
 | 
				
			||||||
 | 
					      int perm= SE->_permute;					\
 | 
				
			||||||
 | 
					      auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane);	\
 | 
				
			||||||
 | 
					      spProj(chi,tmp);						\
 | 
				
			||||||
 | 
					    } else {							\
 | 
				
			||||||
 | 
					      chi = coalescedRead(buf[SE->_offset],lane);		\
 | 
				
			||||||
 | 
					    }								\
 | 
				
			||||||
 | 
					    acceleratorSynchronise();					\
 | 
				
			||||||
 | 
					    Impl::multLink(Uchi, U[sU], chi, dir, SE, st);		\
 | 
				
			||||||
 | 
					    Recon(result, Uchi);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define GENERIC_DHOPDIR_LEG(Dir,spProj,Recon)			\
 | 
				
			||||||
 | 
					  if (gamma == Dir) {						\
 | 
				
			||||||
 | 
					    GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,Recon);			\
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// All legs kernels ; comms then compute
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::DhopSiteDag(StencilView &st, DoubledGaugeFieldView &U,
 | 
				
			||||||
 | 
										     SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
										     int sU, const FermionFieldView &in, FermionFieldView &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0])) calcSpinor;
 | 
				
			||||||
 | 
					  calcSpinor chi;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Xp,pauliProjXp,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Yp,pauliProjYp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Zp,pauliProjZp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Xm,pauliProjXm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Ym,pauliProjYm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Zm,pauliProjZm,pauliAdd);
 | 
				
			||||||
 | 
					  coalescedWrite(out[sF],result,lane);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::GenericDhopSite(StencilView &st, DoubledGaugeFieldView &U,
 | 
				
			||||||
 | 
										  SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
										  int sU, const FermionFieldView &in, FermionFieldView &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;
 | 
				
			||||||
 | 
					  calcSpinor chi;
 | 
				
			||||||
 | 
					  //  calcSpinor *chi_p;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Xm,pauliProjXp,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Ym,pauliProjYp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Zm,pauliProjZp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Xp,pauliProjXm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Yp,pauliProjYm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG(Zp,pauliProjZm,pauliAdd);
 | 
				
			||||||
 | 
					  coalescedWrite(out[sF], result,lane);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Interior kernels
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagInt(StencilView &st,  DoubledGaugeFieldView &U,
 | 
				
			||||||
 | 
											       SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
											       int sU, const FermionFieldView &in, FermionFieldView &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;
 | 
				
			||||||
 | 
					  calcSpinor chi;
 | 
				
			||||||
 | 
					  //  calcSpinor *chi_p;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  result=Zero();
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Xp,pauliProjXp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Yp,pauliProjYp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Zp,pauliProjZp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Xm,pauliProjXm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Ym,pauliProjYm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Zm,pauliProjZm,pauliAdd);
 | 
				
			||||||
 | 
					  coalescedWrite(out[sF], result,lane);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::GenericDhopSiteInt(StencilView &st,  DoubledGaugeFieldView &U,
 | 
				
			||||||
 | 
											    SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
											    int sU, const FermionFieldView &in, FermionFieldView &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  calcSpinor chi;
 | 
				
			||||||
 | 
					  //  calcSpinor *chi_p;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					  result=Zero();
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Xm,pauliProjXp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Ym,pauliProjYp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Zm,pauliProjZp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Xp,pauliProjXm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Yp,pauliProjYm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_INT(Zp,pauliProjZm,pauliAdd);
 | 
				
			||||||
 | 
					  coalescedWrite(out[sF], result,lane);
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Exterior kernels
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::GenericDhopSiteDagExt(StencilView &st,  DoubledGaugeFieldView &U,
 | 
				
			||||||
 | 
											SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
											int sU, const FermionFieldView &in, FermionFieldView &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;
 | 
				
			||||||
 | 
					  //  calcSpinor *chi_p;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					  int nmu=0;
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					  result=Zero();
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZm,pauliAdd);
 | 
				
			||||||
 | 
					  if ( nmu ) {
 | 
				
			||||||
 | 
					    auto out_t = coalescedRead(out[sF],lane);
 | 
				
			||||||
 | 
					    out_t = out_t + result;
 | 
				
			||||||
 | 
					    coalescedWrite(out[sF],out_t,lane);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::GenericDhopSiteExt(StencilView &st,  DoubledGaugeFieldView &U,
 | 
				
			||||||
 | 
										     SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
										     int sU, const FermionFieldView &in, FermionFieldView &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;
 | 
				
			||||||
 | 
					  //  calcSpinor *chi_p;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					  int nmu=0;
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					  result=Zero();
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Xm,pauliProjXp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Ym,pauliProjYp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Zm,pauliProjZp,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Xp,pauliProjXm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Yp,pauliProjYm,pauliAdd);
 | 
				
			||||||
 | 
					  GENERIC_STENCIL_LEG_EXT(Zp,pauliProjZm,pauliAdd);
 | 
				
			||||||
 | 
					  if ( nmu ) {
 | 
				
			||||||
 | 
					    auto out_t = coalescedRead(out[sF],lane);
 | 
				
			||||||
 | 
					    out_t = out_t + result;
 | 
				
			||||||
 | 
					    coalescedWrite(out[sF],out_t,lane);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define DhopDirMacro(Dir,spProj,spRecon)	\
 | 
				
			||||||
 | 
					  template <class Impl> accelerator_inline				\
 | 
				
			||||||
 | 
					  void TwoSpinWilsonKernels<Impl>::DhopDir##Dir(StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF, \
 | 
				
			||||||
 | 
										 int sU, const FermionFieldView &in, FermionFieldView &out, int dir) \
 | 
				
			||||||
 | 
					  {									\
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;			\
 | 
				
			||||||
 | 
					  calcSpinor chi;							\
 | 
				
			||||||
 | 
					  calcSpinor result;							\
 | 
				
			||||||
 | 
					  calcSpinor Uchi;							\
 | 
				
			||||||
 | 
					  StencilEntry *SE;							\
 | 
				
			||||||
 | 
					  int ptype;								\
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();				\
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);					\
 | 
				
			||||||
 | 
														\
 | 
				
			||||||
 | 
					  SE = st.GetEntry(ptype, dir, sF);					\
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG_BODY(Dir,spProj,spRecon);				\
 | 
				
			||||||
 | 
					  coalescedWrite(out[sF], result,lane);					\
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DhopDirMacro(Xp,pauliProjXp,pauliAssign);
 | 
				
			||||||
 | 
					DhopDirMacro(Yp,pauliProjYp,pauliAssign);
 | 
				
			||||||
 | 
					DhopDirMacro(Zp,pauliProjZp,pauliAssign);
 | 
				
			||||||
 | 
					DhopDirMacro(Xm,pauliProjXm,pauliAssign);
 | 
				
			||||||
 | 
					DhopDirMacro(Ym,pauliProjYm,pauliAssign);
 | 
				
			||||||
 | 
					DhopDirMacro(Zm,pauliProjZm,pauliAssign);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl> accelerator_inline
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::DhopDirK( StencilView &st, DoubledGaugeFieldView &U,SiteSpinor *buf, int sF,
 | 
				
			||||||
 | 
									    int sU, const FermionFieldView &in, FermionFieldView &out, int dir, int gamma)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  typedef decltype(coalescedRead(in[0]))  calcSpinor;
 | 
				
			||||||
 | 
					  calcSpinor chi;
 | 
				
			||||||
 | 
					  calcSpinor result;
 | 
				
			||||||
 | 
					  calcSpinor Uchi;
 | 
				
			||||||
 | 
					  StencilEntry *SE;
 | 
				
			||||||
 | 
					  int ptype;
 | 
				
			||||||
 | 
					  const int Nsimd = SiteSpinor::Nsimd();
 | 
				
			||||||
 | 
					  const int lane=acceleratorSIMTlane(Nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SE = st.GetEntry(ptype, dir, sF);
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG(Xp,pauliProjXp,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG(Yp,pauliProjYp,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG(Zp,pauliProjZp,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG(Xm,pauliProjXm,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG(Ym,pauliProjYm,pauliAssign);
 | 
				
			||||||
 | 
					  GENERIC_DHOPDIR_LEG(Zm,pauliProjZm,pauliAssign);
 | 
				
			||||||
 | 
					  coalescedWrite(out[sF], result,lane);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::DhopDirAll( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
 | 
				
			||||||
 | 
									      int Nsite, const FermionField &in, std::vector<FermionField> &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					   autoView(U_v  ,U,AcceleratorRead);
 | 
				
			||||||
 | 
					   autoView(in_v ,in,AcceleratorRead);
 | 
				
			||||||
 | 
					   autoView(st_v ,st,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   autoView(out_Xm,out[0],AcceleratorWrite);
 | 
				
			||||||
 | 
					   autoView(out_Ym,out[1],AcceleratorWrite);
 | 
				
			||||||
 | 
					   autoView(out_Zm,out[2],AcceleratorWrite);
 | 
				
			||||||
 | 
					   autoView(out_Xp,out[4],AcceleratorWrite);
 | 
				
			||||||
 | 
					   autoView(out_Yp,out[5],AcceleratorWrite);
 | 
				
			||||||
 | 
					   autoView(out_Zp,out[6],AcceleratorWrite);
 | 
				
			||||||
 | 
					   auto CBp=st.CommBuf();
 | 
				
			||||||
 | 
					   accelerator_for(sss,Nsite*Ls,Simd::Nsimd(),{
 | 
				
			||||||
 | 
					      int sU=sss/Ls;
 | 
				
			||||||
 | 
					      int sF =sss;
 | 
				
			||||||
 | 
					      DhopDirXm(st_v,U_v,CBp,sF,sU,in_v,out_Xm,0);
 | 
				
			||||||
 | 
					      DhopDirYm(st_v,U_v,CBp,sF,sU,in_v,out_Ym,1);
 | 
				
			||||||
 | 
					      DhopDirZm(st_v,U_v,CBp,sF,sU,in_v,out_Zm,2);
 | 
				
			||||||
 | 
					      DhopDirXp(st_v,U_v,CBp,sF,sU,in_v,out_Xp,3);
 | 
				
			||||||
 | 
					      DhopDirYp(st_v,U_v,CBp,sF,sU,in_v,out_Yp,4);
 | 
				
			||||||
 | 
					      DhopDirZp(st_v,U_v,CBp,sF,sU,in_v,out_Zp,5);
 | 
				
			||||||
 | 
					   });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,SiteSpinor *buf, int Ls,
 | 
				
			||||||
 | 
										 int Nsite, const FermionField &in, FermionField &out, int dirdisp, int gamma)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(dirdisp<=5);
 | 
				
			||||||
 | 
					  assert(dirdisp>=0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   autoView(U_v  ,U  ,AcceleratorRead);
 | 
				
			||||||
 | 
					   autoView(in_v ,in ,AcceleratorRead);
 | 
				
			||||||
 | 
					   autoView(out_v,out,AcceleratorWrite);
 | 
				
			||||||
 | 
					   autoView(st_v ,st ,AcceleratorRead);
 | 
				
			||||||
 | 
					   auto CBp=st.CommBuf();
 | 
				
			||||||
 | 
					#define LoopBody(Dir)				\
 | 
				
			||||||
 | 
					   case Dir :					\
 | 
				
			||||||
 | 
					     accelerator_for(ss,Nsite,Simd::Nsimd(),{	\
 | 
				
			||||||
 | 
					       for(int s=0;s<Ls;s++){			\
 | 
				
			||||||
 | 
						 int sU=ss;				\
 | 
				
			||||||
 | 
						 int sF = s+Ls*sU;						\
 | 
				
			||||||
 | 
						 DhopDir##Dir(st_v,U_v,CBp,sF,sU,in_v,out_v,dirdisp);\
 | 
				
			||||||
 | 
					       }							       \
 | 
				
			||||||
 | 
					       });							       \
 | 
				
			||||||
 | 
					     break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   switch(gamma){
 | 
				
			||||||
 | 
					   LoopBody(Xp);
 | 
				
			||||||
 | 
					   LoopBody(Yp);
 | 
				
			||||||
 | 
					   LoopBody(Zp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   LoopBody(Xm);
 | 
				
			||||||
 | 
					   LoopBody(Ym);
 | 
				
			||||||
 | 
					   LoopBody(Zm);
 | 
				
			||||||
 | 
					   default:
 | 
				
			||||||
 | 
					     assert(0);
 | 
				
			||||||
 | 
					     break;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					#undef LoopBody
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define KERNEL_CALLNB(A)						\
 | 
				
			||||||
 | 
					  const uint64_t    NN = Nsite*Ls;					\
 | 
				
			||||||
 | 
					  accelerator_forNB( ss, NN, Simd::Nsimd(), {				\
 | 
				
			||||||
 | 
					      int sF = ss;							\
 | 
				
			||||||
 | 
					      int sU = ss/Ls;							\
 | 
				
			||||||
 | 
					      TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v);		\
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define KERNEL_CALL_EXT(A)						\
 | 
				
			||||||
 | 
					  const uint64_t    sz = st.surface_list.size();			\
 | 
				
			||||||
 | 
					  auto ptr = &st.surface_list[0];					\
 | 
				
			||||||
 | 
					  accelerator_forNB( ss, sz, Simd::Nsimd(), {				\
 | 
				
			||||||
 | 
					      int sF = ptr[ss];							\
 | 
				
			||||||
 | 
					      int sU = sF/Ls;							\
 | 
				
			||||||
 | 
					      TwoSpinWilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v);		\
 | 
				
			||||||
 | 
					    });									\
 | 
				
			||||||
 | 
					  accelerator_barrier();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::DhopKernel(StencilImpl &st,  DoubledGaugeField &U, SiteSpinor * buf,
 | 
				
			||||||
 | 
										    int Ls, int Nsite, const FermionField &in, FermionField &out,
 | 
				
			||||||
 | 
										    int interior,int exterior)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  autoView(U_v  ,  U,AcceleratorRead);
 | 
				
			||||||
 | 
					  autoView(in_v , in,AcceleratorRead);
 | 
				
			||||||
 | 
					  autoView(out_v,out,AcceleratorWrite);
 | 
				
			||||||
 | 
					  autoView(st_v , st,AcceleratorRead);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if( interior && exterior ) {
 | 
				
			||||||
 | 
					    acceleratorFenceComputeStream();
 | 
				
			||||||
 | 
					    KERNEL_CALL(GenericDhopSite);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  } else if( interior ) {
 | 
				
			||||||
 | 
					    KERNEL_CALLNB(GenericDhopSiteInt);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  } else if( exterior ) {
 | 
				
			||||||
 | 
					    //     // dependent on result of merge
 | 
				
			||||||
 | 
					    acceleratorFenceComputeStream();
 | 
				
			||||||
 | 
					    KERNEL_CALL_EXT(GenericDhopSiteExt);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(0 && " Kernel optimisation case not covered ");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class Impl>
 | 
				
			||||||
 | 
					void TwoSpinWilsonKernels<Impl>::DhopDagKernel(StencilImpl &st,  DoubledGaugeField &U, SiteSpinor * buf,
 | 
				
			||||||
 | 
										       int Ls, int Nsite, const FermionField &in, FermionField &out,
 | 
				
			||||||
 | 
										       int interior,int exterior)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  autoView(U_v  ,U,AcceleratorRead);
 | 
				
			||||||
 | 
					  autoView(in_v ,in,AcceleratorRead);
 | 
				
			||||||
 | 
					  autoView(out_v,out,AcceleratorWrite);
 | 
				
			||||||
 | 
					  autoView(st_v ,st,AcceleratorRead);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if( interior && exterior ) {
 | 
				
			||||||
 | 
					    acceleratorFenceComputeStream();
 | 
				
			||||||
 | 
					    KERNEL_CALL(GenericDhopSiteDag);
 | 
				
			||||||
 | 
					    return;
 | 
				
			||||||
 | 
					  } else if( interior ) {
 | 
				
			||||||
 | 
					    KERNEL_CALLNB(GenericDhopSiteDagInt); return;
 | 
				
			||||||
 | 
					  } else if( exterior ) {
 | 
				
			||||||
 | 
					    // Dependent on result of merge
 | 
				
			||||||
 | 
					    acceleratorFenceComputeStream();
 | 
				
			||||||
 | 
					    KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  assert(0 && " Kernel optimisation case not covered ");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#undef KERNEL_CALLNB
 | 
				
			||||||
 | 
					#undef KERNEL_CALL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
@@ -61,7 +61,7 @@ WilsonCloverFermion<Impl, CloverHelpers>::WilsonCloverFermion(GaugeField&
 | 
				
			|||||||
    diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0);
 | 
					    diag_mass = _mass + 1.0 + (Nd - 1) * (clover_anisotropy.nu / clover_anisotropy.xi_0);
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    csw_r     = _csw_r * 0.5;
 | 
					    csw_r     = _csw_r * 0.5;
 | 
				
			||||||
    diag_mass = 4.0 + _mass;
 | 
					    diag_mass = Nd*1.0 + _mass;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  csw_t = _csw_t * 0.5;
 | 
					  csw_t = _csw_t * 0.5;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -299,7 +299,7 @@ void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const F
 | 
				
			|||||||
      continue;
 | 
					      continue;
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      RealD factor;
 | 
					      RealD factor;
 | 
				
			||||||
      if (nu == 4 || mu == 4)
 | 
					      if (nu == (Nd-1) || mu == (Nd-1)) // This was a bug - surely mu/nu is NEVER 4 but rather (Nd-1)=3 ??
 | 
				
			||||||
      {
 | 
					      {
 | 
				
			||||||
        factor = 2.0 * csw_t;
 | 
					        factor = 2.0 * csw_t;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
@@ -307,9 +307,11 @@ void WilsonCloverFermion<Impl, CloverHelpers>::MDeriv(GaugeField &force, const F
 | 
				
			|||||||
      {
 | 
					      {
 | 
				
			||||||
        factor = 2.0 * csw_r;
 | 
					        factor = 2.0 * csw_r;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
 | 
					      if ( mu < Nd && nu < Nd ) { // Allow to restrict range to Nd=3, but preserve orders of SigmaMuNu in table by counting ALL
 | 
				
			||||||
      Impl::TraceSpinImpl(lambda, Slambda);                   // traceSpin ok
 | 
						PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
 | 
				
			||||||
      force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu);                   // checked
 | 
						Impl::TraceSpinImpl(lambda, Slambda);                   // traceSpin ok
 | 
				
			||||||
 | 
						force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu);                   // checked
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
      count++;
 | 
					      count++;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -63,10 +63,10 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
				
			|||||||
  Dirichlet(0)
 | 
					  Dirichlet(0)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  // some assertions
 | 
					  // some assertions
 | 
				
			||||||
  assert(FiveDimGrid._ndimension==5);
 | 
					  assert(FiveDimGrid._ndimension==Nd+1);
 | 
				
			||||||
  assert(FourDimGrid._ndimension==4);
 | 
					  assert(FourDimGrid._ndimension==Nd);
 | 
				
			||||||
  assert(FourDimRedBlackGrid._ndimension==4);
 | 
					  assert(FourDimRedBlackGrid._ndimension==Nd);
 | 
				
			||||||
  assert(FiveDimRedBlackGrid._ndimension==5);
 | 
					  assert(FiveDimRedBlackGrid._ndimension==Nd+1);
 | 
				
			||||||
  assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
 | 
					  assert(FiveDimRedBlackGrid._checker_dim==1); // Don't checker the s direction
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // extent of fifth dim and not spread out
 | 
					  // extent of fifth dim and not spread out
 | 
				
			||||||
@@ -76,7 +76,7 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
				
			|||||||
  assert(FiveDimRedBlackGrid._processors[0] ==1);
 | 
					  assert(FiveDimRedBlackGrid._processors[0] ==1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Other dimensions must match the decomposition of the four-D fields 
 | 
					  // Other dimensions must match the decomposition of the four-D fields 
 | 
				
			||||||
  for(int d=0;d<4;d++){
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
 | 
					    assert(FiveDimGrid._processors[d+1]         ==FourDimGrid._processors[d]);
 | 
				
			||||||
    assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
 | 
					    assert(FiveDimRedBlackGrid._processors[d+1] ==FourDimGrid._processors[d]);
 | 
				
			||||||
@@ -93,11 +93,13 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  if ( p.dirichlet.size() == Nd+1) {
 | 
					  if ( p.dirichlet.size() == Nd+1) {
 | 
				
			||||||
    Coordinate block = p.dirichlet;
 | 
					    Coordinate block = p.dirichlet;
 | 
				
			||||||
    if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
 | 
					    for(int d=0;d<Nd+1;d++) {
 | 
				
			||||||
      Dirichlet = 1;
 | 
					      if ( block[d] ){
 | 
				
			||||||
      std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
 | 
						Dirichlet = 1;
 | 
				
			||||||
      std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
 | 
						std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
 | 
				
			||||||
      Block = block;
 | 
						std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
 | 
				
			||||||
 | 
						Block = block;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    Coordinate block(Nd+1,0);
 | 
					    Coordinate block(Nd+1,0);
 | 
				
			||||||
@@ -112,7 +114,7 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
 | 
				
			|||||||
    assert(FiveDimGrid._simd_layout[0]        ==nsimd);
 | 
					    assert(FiveDimGrid._simd_layout[0]        ==nsimd);
 | 
				
			||||||
    assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
 | 
					    assert(FiveDimRedBlackGrid._simd_layout[0]==nsimd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for(int d=0;d<4;d++){
 | 
					    for(int d=0;d<Nd;d++){
 | 
				
			||||||
      assert(FourDimGrid._simd_layout[d]==1);
 | 
					      assert(FourDimGrid._simd_layout[d]==1);
 | 
				
			||||||
      assert(FourDimRedBlackGrid._simd_layout[d]==1);
 | 
					      assert(FourDimRedBlackGrid._simd_layout[d]==1);
 | 
				
			||||||
      assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
 | 
					      assert(FiveDimRedBlackGrid._simd_layout[d+1]==1);
 | 
				
			||||||
@@ -183,8 +185,8 @@ void WilsonFermion5D<Impl>::DhopDir(const FermionField &in, FermionField &out,in
 | 
				
			|||||||
  //  assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
 | 
					  //  assert( (dir>=0)&&(dir<4) ); //must do x,y,z or t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int skip = (disp==1) ? 0 : 1;
 | 
					  int skip = (disp==1) ? 0 : 1;
 | 
				
			||||||
  int dirdisp = dir+skip*4;
 | 
					  int dirdisp = dir+skip*Nd;
 | 
				
			||||||
  int gamma   = dir+(1-skip)*4;
 | 
					  int gamma   = dir+(1-skip)*Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Compressor compressor(DaggerNo);
 | 
					  Compressor compressor(DaggerNo);
 | 
				
			||||||
  Stencil.HaloExchange(in,compressor);
 | 
					  Stencil.HaloExchange(in,compressor);
 | 
				
			||||||
@@ -483,7 +485,7 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
  out.Checkerboard()=in.Checkerboard();
 | 
					  out.Checkerboard()=in.Checkerboard();
 | 
				
			||||||
  Dhop(in,out,dag); // -0.5 is included
 | 
					  Dhop(in,out,dag); // -0.5 is included
 | 
				
			||||||
  axpy(out,4.0-M5,in,out);
 | 
					  axpy(out,Nd*1.0-M5,in,out);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
template <class Impl>
 | 
					template <class Impl>
 | 
				
			||||||
void WilsonFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
					void WilsonFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
 | 
				
			||||||
@@ -509,7 +511,7 @@ template <class Impl>
 | 
				
			|||||||
void WilsonFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
					void WilsonFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  out.Checkerboard() = in.Checkerboard();
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
  typename FermionField::scalar_type scal(4.0 + M5);
 | 
					  typename FermionField::scalar_type scal(Nd*1.0 + M5);
 | 
				
			||||||
  out = scal * in;
 | 
					  out = scal * in;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -524,7 +526,7 @@ template<class Impl>
 | 
				
			|||||||
void WilsonFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
					void WilsonFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  out.Checkerboard() = in.Checkerboard();
 | 
					  out.Checkerboard() = in.Checkerboard();
 | 
				
			||||||
  out = (1.0/(4.0 + M5))*in;
 | 
					  out = (1.0/(Nd*1.0 + M5))*in;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class Impl>
 | 
					template<class Impl>
 | 
				
			||||||
@@ -635,7 +637,7 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
 | 
				
			|||||||
  A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
 | 
					  A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
 | 
				
			||||||
  F = eaLs * (one - Wea + (Wema - one) * mass*mass);
 | 
					  F = eaLs * (one - Wea + (Wema - one) * mass*mass);
 | 
				
			||||||
  F = F + emaLs * (Wema - one + (one - Wea) * mass*mass);
 | 
					  F = F + emaLs * (Wema - one + (one - Wea) * mass*mass);
 | 
				
			||||||
  F = F - abs(W) * sinha * 4.0 * mass;
 | 
					  F = F - abs(W) * sinha * (Nd* 1.0) * mass;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Bpp =  (A/F) * (ema2Ls - one) * (one - Wema) * (one - mass*mass * one);
 | 
					  Bpp =  (A/F) * (ema2Ls - one) * (one - Wema) * (one - mass*mass * one);
 | 
				
			||||||
  Bmm =  (A/F) * (one - ea2Ls)  * (one - Wea) * (one - mass*mass * one);
 | 
					  Bmm =  (A/F) * (one - ea2Ls)  * (one - Wea) * (one - mass*mass * one);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -63,7 +63,7 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
 | 
				
			|||||||
  if  (anisotropyCoeff.isAnisotropic){
 | 
					  if  (anisotropyCoeff.isAnisotropic){
 | 
				
			||||||
    diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
					    diag_mass = mass + 1.0 + (Nd-1)*(anisotropyCoeff.nu / anisotropyCoeff.xi_0);
 | 
				
			||||||
  } else {
 | 
					  } else {
 | 
				
			||||||
    diag_mass = 4.0 + mass;
 | 
					    diag_mass = Nd*1.0 + mass;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int vol4;
 | 
					  int vol4;
 | 
				
			||||||
@@ -354,8 +354,8 @@ void WilsonFermion<Impl>::DhopDir(const FermionField &in, FermionField &out, int
 | 
				
			|||||||
  Stencil.HaloExchange(in, compressor);
 | 
					  Stencil.HaloExchange(in, compressor);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int skip = (disp == 1) ? 0 : 1;
 | 
					  int skip = (disp == 1) ? 0 : 1;
 | 
				
			||||||
  int dirdisp = dir + skip * 4;
 | 
					  int dirdisp = dir + skip * Nd;
 | 
				
			||||||
  int gamma = dir + (1 - skip) * 4;
 | 
					  int gamma = dir + (1 - skip) * Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
					  DhopDirCalc(in, out, dirdisp, gamma, DaggerNo);
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
@@ -370,8 +370,8 @@ void WilsonFermion<Impl>::DhopDirAll(const FermionField &in, std::vector<Fermion
 | 
				
			|||||||
    for(int disp=-1;disp<=1;disp+=2){
 | 
					    for(int disp=-1;disp<=1;disp+=2){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      int skip = (disp == 1) ? 0 : 1;
 | 
					      int skip = (disp == 1) ? 0 : 1;
 | 
				
			||||||
      int dirdisp = dir + skip * 4;
 | 
					      int dirdisp = dir + skip * Nd;
 | 
				
			||||||
      int gamma = dir + (1 - skip) * 4;
 | 
					      int gamma = dir + (1 - skip) * Nd;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
					      DhopDirCalc(in, out[dirdisp], dirdisp, gamma, DaggerNo);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -97,7 +97,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
  distance = st._distances[DIR];				\
 | 
					  distance = st._distances[DIR];				\
 | 
				
			||||||
  sl = st._simd_layout[direction];			        \
 | 
					  sl = st._simd_layout[direction];			        \
 | 
				
			||||||
  inplace_twist = 0;						\
 | 
					  inplace_twist = 0;						\
 | 
				
			||||||
  if(SE->_around_the_world && st.parameters.twists[DIR % 4]){		\
 | 
					  if(SE->_around_the_world && st.parameters.twists[DIR % Nd]){		\
 | 
				
			||||||
    if(sl == 1){							\
 | 
					    if(sl == 1){							\
 | 
				
			||||||
      g = (F+1) % 2;							\
 | 
					      g = (F+1) % 2;							\
 | 
				
			||||||
    }else{								\
 | 
					    }else{								\
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,8 +32,30 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
// S-direction is INNERMOST and takes no part in the parity.
 | 
					// S-direction is INNERMOST and takes no part in the parity.
 | 
				
			||||||
const std::vector<int> ImprovedStaggeredFermion5DStatic::directions({1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4});
 | 
					const std::vector<int> ImprovedStaggeredFermion5DStatic::directions(ImprovedStaggeredFermion5DStatic::MakeDirections());
 | 
				
			||||||
const std::vector<int> ImprovedStaggeredFermion5DStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
 | 
					const std::vector<int> ImprovedStaggeredFermion5DStatic::displacements(ImprovedStaggeredFermion5DStatic::MakeDisplacements());
 | 
				
			||||||
 | 
					std::vector<int> ImprovedStaggeredFermion5DStatic::MakeDirections(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> directions(4*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    directions[d+Nd*0] = d+1;
 | 
				
			||||||
 | 
					    directions[d+Nd*1] = d+1;
 | 
				
			||||||
 | 
					    directions[d+Nd*2] = d+1;
 | 
				
			||||||
 | 
					    directions[d+Nd*3] = d+1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return directions;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					std::vector<int> ImprovedStaggeredFermion5DStatic::MakeDisplacements(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> displacements(4*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    displacements[d+Nd*0] =+1;
 | 
				
			||||||
 | 
					    displacements[d+Nd*1] =-1;
 | 
				
			||||||
 | 
					    displacements[d+Nd*2] =+3;
 | 
				
			||||||
 | 
					    displacements[d+Nd*3] =-3;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return displacements;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,5 +32,26 @@ NAMESPACE_BEGIN(Grid);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
const std::vector<int> ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
 | 
					const std::vector<int> ImprovedStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
 | 
				
			||||||
const std::vector<int> ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
 | 
					const std::vector<int> ImprovedStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1, 3, 3, 3, 3, -3, -3, -3, -3});
 | 
				
			||||||
 | 
					std::vector<int> ImprovedStaggeredFermionStatic::MakeDirections(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> directions(4*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    directions[d+Nd*0] = d;
 | 
				
			||||||
 | 
					    directions[d+Nd*1] = d;
 | 
				
			||||||
 | 
					    directions[d+Nd*2] = d;
 | 
				
			||||||
 | 
					    directions[d+Nd*3] = d;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return directions;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					std::vector<int> ImprovedStaggeredFermionStatic::MakeDisplacements(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> displacements(4*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    displacements[d+Nd*0] =+1;
 | 
				
			||||||
 | 
					    displacements[d+Nd*1] =-1;
 | 
				
			||||||
 | 
					    displacements[d+Nd*2] =+3;
 | 
				
			||||||
 | 
					    displacements[d+Nd*3] =-3;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return displacements;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,7 +30,27 @@ directory
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const std::vector<int> NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
 | 
					//const std::vector<int> NaiveStaggeredFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
 | 
				
			||||||
const std::vector<int> NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
 | 
					//const std::vector<int> NaiveStaggeredFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
 | 
				
			||||||
 | 
					const std::vector<int> NaiveStaggeredFermionStatic::directions(NaiveStaggeredFermionStatic::MakeDirections());
 | 
				
			||||||
 | 
					const std::vector<int> NaiveStaggeredFermionStatic::displacements(NaiveStaggeredFermionStatic::MakeDisplacements());
 | 
				
			||||||
 | 
					std::vector<int> NaiveStaggeredFermionStatic::MakeDirections(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> directions(4*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    directions[d+Nd*0] = d;
 | 
				
			||||||
 | 
					    directions[d+Nd*1] = d;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return directions;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					std::vector<int> NaiveStaggeredFermionStatic::MakeDisplacements(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> displacements(4*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    displacements[d+Nd*0] =+1;
 | 
				
			||||||
 | 
					    displacements[d+Nd*1] =-1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return displacements;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,61 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/TwoSpinWilsonFermion3plus1D.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// S-direction is INNERMOST and takes no part in the parity.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::directions   (TwoSpinWilsonFermion3plus1DStatic::MakeDirections());
 | 
				
			||||||
 | 
					const std::vector<int> TwoSpinWilsonFermion3plus1DStatic::displacements(TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDirections (void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> directions(2*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    directions[d]    = d+1;
 | 
				
			||||||
 | 
					    directions[d+Nd] = d+1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return directions;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					std::vector<int> TwoSpinWilsonFermion3plus1DStatic::MakeDisplacements(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> displacements(2*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    displacements[d]    = +1;
 | 
				
			||||||
 | 
					    displacements[d+Nd] = -1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return displacements;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -0,0 +1,40 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonFermion3plus1DImplementation.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "impl.h"
 | 
				
			||||||
 | 
					template class TwoSpinWilsonFermion3plus1D<IMPLEMENTATION>; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -0,0 +1,40 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/qcd/action/fermion/WilsonKernels.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2015, 2020
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Peter Boyle <peterboyle@Peters-MacBook-Pro-2.local>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Nils Meyer <nils.meyer@ur.de> Regensburg University
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/implementation/TwoSpinWilsonKernelsImplementation.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "impl.h"
 | 
				
			||||||
 | 
					template class TwoSpinWilsonKernels<IMPLEMENTATION>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
@@ -34,8 +34,28 @@ directory
 | 
				
			|||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// S-direction is INNERMOST and takes no part in the parity.
 | 
					// S-direction is INNERMOST and takes no part in the parity.
 | 
				
			||||||
const std::vector<int> WilsonFermion5DStatic::directions   ({1,2,3,4, 1, 2, 3, 4});
 | 
					
 | 
				
			||||||
const std::vector<int> WilsonFermion5DStatic::displacements({1,1,1,1,-1,-1,-1,-1});
 | 
					const std::vector<int> WilsonFermion5DStatic::directions   (WilsonFermion5DStatic::MakeDirections());
 | 
				
			||||||
 | 
					const std::vector<int> WilsonFermion5DStatic::displacements(WilsonFermion5DStatic::MakeDisplacements());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					std::vector<int> WilsonFermion5DStatic::MakeDirections (void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> directions(2*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    directions[d]    = d+1;
 | 
				
			||||||
 | 
					    directions[d+Nd] = d+1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return directions;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					std::vector<int> WilsonFermion5DStatic::MakeDisplacements(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> displacements(2*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    displacements[d]    = +1;
 | 
				
			||||||
 | 
					    displacements[d+Nd] = -1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return displacements;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -33,9 +33,27 @@ directory
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
NAMESPACE_BEGIN(Grid);
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const std::vector<int> WilsonFermionStatic::directions({0, 1, 2, 3, 0, 1, 2, 3});
 | 
					const std::vector<int> WilsonFermionStatic::directions(WilsonFermionStatic::MakeDirections());
 | 
				
			||||||
const std::vector<int> WilsonFermionStatic::displacements({1, 1, 1, 1, -1, -1, -1, -1});
 | 
					const std::vector<int> WilsonFermionStatic::displacements(WilsonFermionStatic::MakeDisplacements());
 | 
				
			||||||
int WilsonFermionStatic::HandOptDslash;
 | 
					int WilsonFermionStatic::HandOptDslash;
 | 
				
			||||||
 | 
					std::vector<int> WilsonFermionStatic::MakeDirections (void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> directions(2*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    directions[d]    = d;
 | 
				
			||||||
 | 
					    directions[d+Nd] = d;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return directions;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					std::vector<int> WilsonFermionStatic::MakeDisplacements(void)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::vector<int> displacements(2*Nd);
 | 
				
			||||||
 | 
					  for(int d=0;d<Nd;d++){
 | 
				
			||||||
 | 
					    displacements[d]    = +1;
 | 
				
			||||||
 | 
					    displacements[d+Nd] = -1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return displacements;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
NAMESPACE_END(Grid);
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -36,11 +36,16 @@ DWF_IMPL_LIST=" \
 | 
				
			|||||||
	   ZWilsonImplF \
 | 
						   ZWilsonImplF \
 | 
				
			||||||
	   ZWilsonImplD2 "
 | 
						   ZWilsonImplD2 "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TWOSPIN_WILSON_IMPL_LIST=" \
 | 
				
			||||||
 | 
						   TwoSpinWilsonImplF \
 | 
				
			||||||
 | 
						   TwoSpinWilsonImplD "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
GDWF_IMPL_LIST=" \
 | 
					GDWF_IMPL_LIST=" \
 | 
				
			||||||
	   GparityWilsonImplF \
 | 
						   GparityWilsonImplF \
 | 
				
			||||||
	   GparityWilsonImplD "
 | 
						   GparityWilsonImplD "
 | 
				
			||||||
 | 
					
 | 
				
			||||||
IMPL_LIST="$STAG_IMPL_LIST  $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST"
 | 
					IMPL_LIST="$STAG_IMPL_LIST  $WILSON_IMPL_LIST $DWF_IMPL_LIST $GDWF_IMPL_LIST $TWOSPIN_WILSON_IMPL_LIST"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
for impl in $IMPL_LIST
 | 
					for impl in $IMPL_LIST
 | 
				
			||||||
do
 | 
					do
 | 
				
			||||||
@@ -110,7 +115,12 @@ do
 | 
				
			|||||||
done
 | 
					done
 | 
				
			||||||
done
 | 
					done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CC_LIST=" \
 | 
					CC_LIST="TwoSpinWilsonFermion3plus1DInstantiation.cc.master	TwoSpinWilsonKernelsInstantiation.cc.master"
 | 
				
			||||||
  ImprovedStaggeredFermion5DInstantiation \
 | 
					 | 
				
			||||||
  StaggeredKernelsInstantiation "
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					for impl in $TWOSPIN_WILSON_IMPL_LIST
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					for f in $CC_LIST
 | 
				
			||||||
 | 
					do
 | 
				
			||||||
 | 
					  ln -f -s ../$f.cc.master $impl/$f$impl.cc
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 | 
					done
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -158,8 +158,8 @@ RealD WilsonFlowBase<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeF
 | 
				
			|||||||
  LatticeComplexD R(U.Grid());
 | 
					  LatticeComplexD R(U.Grid());
 | 
				
			||||||
  R = Zero();
 | 
					  R = Zero();
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  for(int mu=0;mu<3;mu++){
 | 
					  for(int mu=0;mu<Nd-1;mu++){
 | 
				
			||||||
    for(int nu=mu+1;nu<4;nu++){
 | 
					    for(int nu=mu+1;nu<Nd;nu++){
 | 
				
			||||||
      WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
 | 
					      WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
 | 
				
			||||||
      R = R + trace(F*F);
 | 
					      R = R + trace(F*F);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										220
									
								
								Grid/qcd/spin/Pauli.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								Grid/qcd/spin/Pauli.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,220 @@
 | 
				
			|||||||
 | 
					#ifndef GRID_QCD_PAULI_H
 | 
				
			||||||
 | 
					#define GRID_QCD_PAULI_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <array>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_BEGIN(Grid);
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Pauli basis
 | 
				
			||||||
 | 
					 * sx        sy       sz       ident
 | 
				
			||||||
 | 
					 * (0 1)  , (0 -i) , ( 1 0 )
 | 
				
			||||||
 | 
					 * (1 0)    (i  0)   ( 0 -1)
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * These are hermitian.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Also supply wilson "projectors" (1+/-sx), (1+/-sy), (1+/-sz)
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * spPauliProjXm
 | 
				
			||||||
 | 
					 * spPauliProjYm etc...
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					class Pauli {
 | 
				
			||||||
 | 
					  public:
 | 
				
			||||||
 | 
					    GRID_SERIALIZABLE_ENUM(Algebra, undef,
 | 
				
			||||||
 | 
					                           SigmaX           , 0,
 | 
				
			||||||
 | 
					                           MinusSigmaX      , 1,
 | 
				
			||||||
 | 
					                           SigmaY           , 2,
 | 
				
			||||||
 | 
					                           MinusSigmaY      , 3,
 | 
				
			||||||
 | 
					                           SigmaZ           , 4,
 | 
				
			||||||
 | 
					                           MinusSigmaZ      , 5,
 | 
				
			||||||
 | 
					                           Identity         , 6,
 | 
				
			||||||
 | 
								   MinusIdentity    , 7);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					    static constexpr unsigned int nPauli = 8;
 | 
				
			||||||
 | 
					    static const std::array<const char *, nPauli>                name;
 | 
				
			||||||
 | 
					    static const std::array<std::array<Algebra, nPauli>, nPauli> mul;
 | 
				
			||||||
 | 
					    static const std::array<Algebra, nPauli>                     adj;
 | 
				
			||||||
 | 
					    static const std::array<const Pauli, 4>                      gmu;
 | 
				
			||||||
 | 
					    static const std::array<const Pauli, 16>                     gall;
 | 
				
			||||||
 | 
					    Algebra                                                      g;
 | 
				
			||||||
 | 
					  public:
 | 
				
			||||||
 | 
					  accelerator Pauli(Algebra initg): g(initg) {}  
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define CopyImplementation(iTemplate,multPauli,multFlavour)	\
 | 
				
			||||||
 | 
					  template<class vtype>							\
 | 
				
			||||||
 | 
					  accelerator_inline void multPauli(iTemplate<vtype, Nhs> &ret, const iTemplate<vtype, Nhs> &rhs) {	\
 | 
				
			||||||
 | 
					    multFlavour(ret,rhs);						\
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliSigmaX,multFlavourSigmaX);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliSigmaX,lmultFlavourSigmaX);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliSigmaX,rmultFlavourSigmaX);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliMinusSigmaX ,multFlavourMinusSigmaX);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliMinusSigmaX,lmultFlavourMinusSigmaX);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliMinusSigmaX,rmultFlavourMinusSigmaX);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliSigmaY,multFlavourSigmaY);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliSigmaY,lmultFlavourSigmaY);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliSigmaY,rmultFlavourSigmaY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliMinusSigmaY ,multFlavourMinusSigmaY);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliMinusSigmaY,lmultFlavourMinusSigmaY);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliMinusSigmaY,rmultFlavourMinusSigmaY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliSigmaZ,multFlavourSigmaZ);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliSigmaZ,lmultFlavourSigmaZ);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliSigmaZ,rmultFlavourSigmaZ);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliMinusSigmaZ ,multFlavourMinusSigmaZ);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliMinusSigmaZ,lmultFlavourMinusSigmaZ);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliMinusSigmaZ,rmultFlavourMinusSigmaZ);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliIdentity,multFlavourIdentity);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliIdentity,lmultFlavourIdentity);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliIdentity,rmultFlavourIdentity);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CopyImplementation(iVector,multPauliMinusIdentity ,multFlavourMinusIdentity);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,lmultPauliMinusIdentity,lmultFlavourMinusIdentity);
 | 
				
			||||||
 | 
					CopyImplementation(iMatrix,rmultPauliMinusIdentity,rmultFlavourMinusIdentity);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * sx        sy       sz       ident
 | 
				
			||||||
 | 
					 * (0 1)  , (0 -i) , ( 1 0 )
 | 
				
			||||||
 | 
					 * (1 0)    (i  0)   ( 0 -1)
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  hspin(0)=fspin(0)+fspin(1);
 | 
				
			||||||
 | 
					  hspin(1)=fspin(1)+fspin(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjXm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  hspin(0)=fspin(0)-fspin(1);
 | 
				
			||||||
 | 
					  hspin(1)=fspin(1)-fspin(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  hspin(0)=fspin(0)-timesI(fspin(1));
 | 
				
			||||||
 | 
					  hspin(1)=fspin(1)+timesI(fspin(0));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjYm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  hspin(0)=fspin(0)+timesI(fspin(1));
 | 
				
			||||||
 | 
					  hspin(1)=fspin(1)-timesI(fspin(0));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZp (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  hspin(0)=fspin(0)+fspin(0);
 | 
				
			||||||
 | 
					  hspin(1)=Zero();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliProjZm (iVector<vtype,Nhs> &hspin,const iVector<vtype,Nhs> &fspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  hspin(0)=Zero();
 | 
				
			||||||
 | 
					  hspin(1)=fspin(1)+fspin(1);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAssign(iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  fspin = hspin;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class vtype,IfSpinor<iVector<vtype,Nhs> > = 0> accelerator_inline void pauliAdd   (iVector<vtype,Nhs> &fspin,const iVector<vtype,Nhs> &hspin)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  fspin = fspin + hspin;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vtype> 
 | 
				
			||||||
 | 
					accelerator_inline auto operator*(const Pauli &G, const iVector<vtype, Nhs> &arg)
 | 
				
			||||||
 | 
					->typename std::enable_if<matchGridTensorIndex<iVector<vtype, Nhs>, PauliIndex>::value, iVector<vtype, Nhs>>::type
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  iVector<vtype, Nhs> ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  switch (G.g) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaX:
 | 
				
			||||||
 | 
					    multPauliSigmaX(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaX:
 | 
				
			||||||
 | 
					    multPauliMinusSigmaX(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaY:
 | 
				
			||||||
 | 
					    multPauliSigmaY(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaY:
 | 
				
			||||||
 | 
					    multPauliMinusSigmaY(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaZ:
 | 
				
			||||||
 | 
					    multPauliSigmaZ(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaZ:
 | 
				
			||||||
 | 
					    multPauliMinusSigmaZ(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::Identity:
 | 
				
			||||||
 | 
					    multPauliIdentity(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusIdentity:
 | 
				
			||||||
 | 
					    multPauliMinusIdentity(ret, arg); break;
 | 
				
			||||||
 | 
					  default: assert(0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vtype> 
 | 
				
			||||||
 | 
					accelerator_inline auto operator*(const Pauli &G, const iMatrix<vtype, Nhs> &arg)
 | 
				
			||||||
 | 
					->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  iMatrix<vtype, Nhs> ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  switch (G.g) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaX:
 | 
				
			||||||
 | 
					    lmultPauliSigmaX(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaX:
 | 
				
			||||||
 | 
					    lmultPauliMinusSigmaX(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaY:
 | 
				
			||||||
 | 
					    lmultPauliSigmaY(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaY:
 | 
				
			||||||
 | 
					    lmultPauliMinusSigmaY(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaZ:
 | 
				
			||||||
 | 
					    lmultPauliSigmaZ(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaZ:
 | 
				
			||||||
 | 
					    lmultPauliMinusSigmaZ(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::Identity:
 | 
				
			||||||
 | 
					    lmultPauliIdentity(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusIdentity:
 | 
				
			||||||
 | 
					    lmultPauliMinusIdentity(ret, arg); break;
 | 
				
			||||||
 | 
					  default: assert(0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class vtype> 
 | 
				
			||||||
 | 
					accelerator_inline auto operator*(const iMatrix<vtype, Nhs> &arg, const Pauli &G)
 | 
				
			||||||
 | 
					->typename std::enable_if<matchGridTensorIndex<iMatrix<vtype, Nhs>, PauliIndex>::value, iMatrix<vtype, Nhs>>::type
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  iMatrix<vtype, Nhs> ret;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  switch (G.g) 
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaX:
 | 
				
			||||||
 | 
					    rmultPauliSigmaX(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaX:
 | 
				
			||||||
 | 
					    rmultPauliMinusSigmaX(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaY:
 | 
				
			||||||
 | 
					    rmultPauliSigmaY(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaY:
 | 
				
			||||||
 | 
					    rmultPauliMinusSigmaY(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::SigmaZ:
 | 
				
			||||||
 | 
					    rmultPauliSigmaZ(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusSigmaZ:
 | 
				
			||||||
 | 
					    rmultPauliMinusSigmaZ(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::Identity:
 | 
				
			||||||
 | 
					    rmultPauliIdentity(ret, arg); break;
 | 
				
			||||||
 | 
					  case Pauli::Algebra::MinusIdentity:
 | 
				
			||||||
 | 
					    rmultPauliMinusIdentity(ret, arg); break;
 | 
				
			||||||
 | 
					  default: assert(0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					NAMESPACE_END(Grid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif // GRID_QCD_GAMMA_H
 | 
				
			||||||
@@ -179,20 +179,17 @@ public:
 | 
				
			|||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
  // average over all x,y,z the temporal loop
 | 
					  // average over all x,y,z the temporal loop
 | 
				
			||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
  static ComplexD avgPolyakovLoop(const GaugeField &Umu) {  //assume Nd=4
 | 
					  static ComplexD avgPolyakovLoop(const GaugeField &Umu) {  
 | 
				
			||||||
    GaugeMat Ut(Umu.Grid()), P(Umu.Grid());
 | 
					    GaugeMat Ut(Umu.Grid()), P(Umu.Grid());
 | 
				
			||||||
    ComplexD out;
 | 
					    ComplexD out;
 | 
				
			||||||
    int T = Umu.Grid()->GlobalDimensions()[3];
 | 
					    uint64_t vol = Umu.Grid()->gSites();
 | 
				
			||||||
    int X = Umu.Grid()->GlobalDimensions()[0];
 | 
					    int T = Umu.Grid()->GlobalDimensions()[Nd-1];
 | 
				
			||||||
    int Y = Umu.Grid()->GlobalDimensions()[1];
 | 
					    Ut = peekLorentz(Umu,Nd-1); //Select temporal direction
 | 
				
			||||||
    int Z = Umu.Grid()->GlobalDimensions()[2];
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Ut = peekLorentz(Umu,3); //Select temporal direction
 | 
					 | 
				
			||||||
    P = Ut;
 | 
					    P = Ut;
 | 
				
			||||||
    for (int t=1;t<T;t++){ 
 | 
					    for (int t=1;t<T;t++){ 
 | 
				
			||||||
      P = Gimpl::CovShiftForward(Ut,3,P);
 | 
					      P = Gimpl::CovShiftForward(Ut,Nd-1,P);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
   RealD norm = 1.0/(Nc*X*Y*Z*T);
 | 
					   RealD norm = 1.0/(Nc*vol);
 | 
				
			||||||
   out = sum(trace(P))*norm;
 | 
					   out = sum(trace(P))*norm;
 | 
				
			||||||
   return out;   
 | 
					   return out;   
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -215,7 +212,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    double vol = Umu.Grid()->gSites();
 | 
					    double vol = Umu.Grid()->gSites();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return p.real() / vol / (4.0 * Nc ) ;
 | 
					    return p.real() / vol / (Nd * Nc ) ;
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
@@ -740,6 +737,7 @@ public:
 | 
				
			|||||||
  //cf  https://arxiv.org/pdf/hep-lat/9701012.pdf  Eq 6
 | 
					  //cf  https://arxiv.org/pdf/hep-lat/9701012.pdf  Eq 6
 | 
				
			||||||
  //output is the charge by timeslice: sum over timeslices to obtain the total
 | 
					  //output is the charge by timeslice: sum over timeslices to obtain the total
 | 
				
			||||||
  static std::vector<Real> TimesliceTopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
 | 
					  static std::vector<Real> TimesliceTopologicalChargeMxN(const GaugeLorentz &U, int M, int N){
 | 
				
			||||||
 | 
					    // Audit: 4D epsilon is hard coded
 | 
				
			||||||
    assert(Nd == 4);
 | 
					    assert(Nd == 4);
 | 
				
			||||||
    std::vector<std::vector<GaugeMat*> > F(Nd,std::vector<GaugeMat*>(Nd,nullptr));
 | 
					    std::vector<std::vector<GaugeMat*> > F(Nd,std::vector<GaugeMat*>(Nd,nullptr));
 | 
				
			||||||
    //Note F_numu = - F_munu
 | 
					    //Note F_numu = - F_munu
 | 
				
			||||||
@@ -829,6 +827,25 @@ public:
 | 
				
			|||||||
    return out;
 | 
					    return out;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //Compute the 5Li topological charge density
 | 
				
			||||||
 | 
					  static std::vector<Real> TopologicalChargeDensity5Li(const GaugeLorentz &U){
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static const int exts[5][2] = { {1,1}, {2,2}, {1,2}, {1,3}, {3,3} };
 | 
				
			||||||
 | 
					    std::vector<std::vector<Real> > loops = TimesliceTopologicalCharge5LiContributions(U);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    double c5=1./20.;
 | 
				
			||||||
 | 
					    double c4=1./5.-2.*c5;
 | 
				
			||||||
 | 
					    double c3=(-64.+640.*c5)/45.;
 | 
				
			||||||
 | 
					    double c2=(1-64.*c5)/9.;
 | 
				
			||||||
 | 
					    double c1=(19.-55.*c5)/9.;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int Lt = loops[0].size();
 | 
				
			||||||
 | 
					    std::vector<Real> out(Lt,0.);
 | 
				
			||||||
 | 
					    for(int t=0;t<Lt;t++)
 | 
				
			||||||
 | 
					      out[t] += c1*loops[0][t] + c2*loops[1][t] + c3*loops[2][t] + c4*loops[3][t] + c5*loops[4][t];
 | 
				
			||||||
 | 
					    return out;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  static Real TopologicalCharge5Li(const GaugeLorentz &U){
 | 
					  static Real TopologicalCharge5Li(const GaugeLorentz &U){
 | 
				
			||||||
    std::vector<Real> Qt = TimesliceTopologicalCharge5Li(U);
 | 
					    std::vector<Real> Qt = TimesliceTopologicalCharge5Li(U);
 | 
				
			||||||
    Real Q = 0.;
 | 
					    Real Q = 0.;
 | 
				
			||||||
@@ -1455,7 +1472,7 @@ public:
 | 
				
			|||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
  static Real sumWilsonLoop(const GaugeLorentz &Umu,
 | 
					  static Real sumWilsonLoop(const GaugeLorentz &Umu,
 | 
				
			||||||
                            const int R1, const int R2) {
 | 
					                            const int R1, const int R2) {
 | 
				
			||||||
    std::vector<GaugeMat> U(4, Umu.Grid());
 | 
					    std::vector<GaugeMat> U(Nd, Umu.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
 | 
					    for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
 | 
				
			||||||
      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
					      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
@@ -1474,7 +1491,7 @@ public:
 | 
				
			|||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
  static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
 | 
					  static Real sumTimelikeWilsonLoop(const GaugeLorentz &Umu,
 | 
				
			||||||
                            const int R1, const int R2) {
 | 
					                            const int R1, const int R2) {
 | 
				
			||||||
    std::vector<GaugeMat> U(4, Umu.Grid());
 | 
					    std::vector<GaugeMat> U(Nd, Umu.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
 | 
					    for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
 | 
				
			||||||
      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
					      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
@@ -1492,8 +1509,8 @@ public:
 | 
				
			|||||||
  // sum over all x,y,z,t and over all planes of spatial Wilson loop
 | 
					  // sum over all x,y,z,t and over all planes of spatial Wilson loop
 | 
				
			||||||
  //////////////////////////////////////////////////
 | 
					  //////////////////////////////////////////////////
 | 
				
			||||||
  static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
 | 
					  static Real sumSpatialWilsonLoop(const GaugeLorentz &Umu,
 | 
				
			||||||
                            const int R1, const int R2) {
 | 
									   const int R1, const int R2) {
 | 
				
			||||||
    std::vector<GaugeMat> U(4, Umu.Grid());
 | 
					    std::vector<GaugeMat> U(Nd, Umu.Grid());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
 | 
					    for (int mu = 0; mu < Umu.Grid()->_ndimension; mu++) {
 | 
				
			||||||
      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
					      U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -252,7 +252,7 @@ inline std::ostream& operator<< (std::ostream& stream, const vComplexF &o){
 | 
				
			|||||||
 
 | 
					 
 | 
				
			||||||
inline std::ostream& operator<< (std::ostream& stream, const vComplexD &o){
 | 
					inline std::ostream& operator<< (std::ostream& stream, const vComplexD &o){
 | 
				
			||||||
  int nn=vComplexD::Nsimd();
 | 
					  int nn=vComplexD::Nsimd();
 | 
				
			||||||
  std::vector<ComplexD,alignedAllocator<ComplexD> > buf(nn);
 | 
					  std::vector<ComplexD> buf(nn);
 | 
				
			||||||
  vstore(o,&buf[0]);
 | 
					  vstore(o,&buf[0]);
 | 
				
			||||||
  stream<<"<";
 | 
					  stream<<"<";
 | 
				
			||||||
  for(int i=0;i<nn;i++){
 | 
					  for(int i=0;i<nn;i++){
 | 
				
			||||||
@@ -272,7 +272,7 @@ inline std::ostream& operator<< (std::ostream& stream, const vComplexD2 &o){
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
inline std::ostream& operator<< (std::ostream& stream, const vRealF &o){
 | 
					inline std::ostream& operator<< (std::ostream& stream, const vRealF &o){
 | 
				
			||||||
  int nn=vRealF::Nsimd();
 | 
					  int nn=vRealF::Nsimd();
 | 
				
			||||||
  std::vector<RealF,alignedAllocator<RealF> > buf(nn);
 | 
					  std::vector<RealF> buf(nn);
 | 
				
			||||||
  vstore(o,&buf[0]);
 | 
					  vstore(o,&buf[0]);
 | 
				
			||||||
  stream<<"<";
 | 
					  stream<<"<";
 | 
				
			||||||
  for(int i=0;i<nn;i++){
 | 
					  for(int i=0;i<nn;i++){
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										1009
									
								
								Grid/stencil/IcosahedralStencil.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1009
									
								
								Grid/stencil/IcosahedralStencil.h
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -187,8 +187,9 @@ void GridParseLayout(char **argv,int argc,
 | 
				
			|||||||
		     Coordinate &latt_c,
 | 
							     Coordinate &latt_c,
 | 
				
			||||||
		     Coordinate &mpi_c)
 | 
							     Coordinate &mpi_c)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  auto mpi =std::vector<int>({1,1,1,1});
 | 
					  auto mpi =std::vector<int>(Nd,1);
 | 
				
			||||||
  auto latt=std::vector<int>({8,8,8,8});
 | 
					  auto latt=std::vector<int>(Nd,8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  GridThread::SetMaxThreads();
 | 
					  GridThread::SetMaxThreads();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -228,6 +229,9 @@ void GridParseLayout(char **argv,int argc,
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
  // Copy back into coordinate format
 | 
					  // Copy back into coordinate format
 | 
				
			||||||
  int nd = mpi.size();
 | 
					  int nd = mpi.size();
 | 
				
			||||||
 | 
					  //  std::cout << "mpi.size() "<<nd<<std::endl;
 | 
				
			||||||
 | 
					  //  std::cout << "latt.size() "<<latt.size()<<std::endl;
 | 
				
			||||||
 | 
					  //  std::cout << "Nd "<<Nd<<std::endl;
 | 
				
			||||||
  assert(latt.size()==nd);
 | 
					  assert(latt.size()==nd);
 | 
				
			||||||
  latt_c.resize(nd);
 | 
					  latt_c.resize(nd);
 | 
				
			||||||
   mpi_c.resize(nd);
 | 
					   mpi_c.resize(nd);
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										5
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								TODO
									
									
									
									
									
								
							@@ -1,3 +1,8 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					* Clean up the extract merge and replace with insertLane/extractLane
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-----
 | 
				
			||||||
 | 
					
 | 
				
			||||||
i)    Refine subspace with HDCG & recompute
 | 
					i)    Refine subspace with HDCG & recompute
 | 
				
			||||||
ii)   Block Lanczos in coarse space
 | 
					ii)   Block Lanczos in coarse space
 | 
				
			||||||
iii)  Batched block project in the operator computation
 | 
					iii)  Batched block project in the operator computation
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										18
									
								
								configure.ac
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								configure.ac
									
									
									
									
									
								
							@@ -198,6 +198,8 @@ AC_ARG_ENABLE([Nc],
 | 
				
			|||||||
    [ac_Nc=${enable_Nc}], [ac_Nc=3])
 | 
					    [ac_Nc=${enable_Nc}], [ac_Nc=3])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
case ${ac_Nc} in
 | 
					case ${ac_Nc} in
 | 
				
			||||||
 | 
					     1)
 | 
				
			||||||
 | 
					        AC_DEFINE([Config_Nc],[1],[Gauge group Nc]);;
 | 
				
			||||||
    2)
 | 
					    2)
 | 
				
			||||||
        AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);;
 | 
					        AC_DEFINE([Config_Nc],[2],[Gauge group Nc]);;
 | 
				
			||||||
    3)
 | 
					    3)
 | 
				
			||||||
@@ -211,6 +213,21 @@ case ${ac_Nc} in
 | 
				
			|||||||
    *)
 | 
					    *)
 | 
				
			||||||
      AC_MSG_ERROR(["Unsupport gauge group choice Nc = ${ac_Nc}"]);;
 | 
					      AC_MSG_ERROR(["Unsupport gauge group choice Nc = ${ac_Nc}"]);;
 | 
				
			||||||
esac
 | 
					esac
 | 
				
			||||||
 | 
					############### Nd
 | 
				
			||||||
 | 
					AC_ARG_ENABLE([Nd],
 | 
				
			||||||
 | 
					    [AS_HELP_STRING([--enable-Nd=2|3|4],[enable default LGT dimension])],
 | 
				
			||||||
 | 
					    [ac_Nd=${enable_Nd}], [ac_Nd=4])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					case ${ac_Nd} in
 | 
				
			||||||
 | 
					    2)
 | 
				
			||||||
 | 
					        AC_DEFINE([Config_Nd],[2],[Gauge field dimension Nd]);;
 | 
				
			||||||
 | 
					    3)
 | 
				
			||||||
 | 
					        AC_DEFINE([Config_Nd],[3],[Gauge field dimension Nd]);;
 | 
				
			||||||
 | 
					    4)
 | 
				
			||||||
 | 
					        AC_DEFINE([Config_Nd],[4],[Gauge field dimension Nd]);;
 | 
				
			||||||
 | 
					    *)
 | 
				
			||||||
 | 
					      AC_MSG_ERROR(["Unsupport dimension Nd = ${ac_Nd}"]);;
 | 
				
			||||||
 | 
					esac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
############### Symplectic group
 | 
					############### Symplectic group
 | 
				
			||||||
AC_ARG_ENABLE([Sp],
 | 
					AC_ARG_ENABLE([Sp],
 | 
				
			||||||
@@ -818,6 +835,7 @@ os (target)                 : $target_os
 | 
				
			|||||||
compiler vendor             : ${ax_cv_cxx_compiler_vendor}
 | 
					compiler vendor             : ${ax_cv_cxx_compiler_vendor}
 | 
				
			||||||
compiler version            : ${ax_cv_gxx_version}
 | 
					compiler version            : ${ax_cv_gxx_version}
 | 
				
			||||||
----- BUILD OPTIONS -----------------------------------
 | 
					----- BUILD OPTIONS -----------------------------------
 | 
				
			||||||
 | 
					Nd                          : ${ac_Nd}
 | 
				
			||||||
Nc                          : ${ac_Nc}
 | 
					Nc                          : ${ac_Nc}
 | 
				
			||||||
SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
 | 
					SIMD                        : ${ac_SIMD}${SIMD_GEN_WIDTH_MSG}
 | 
				
			||||||
Threading                   : ${ac_openmp}
 | 
					Threading                   : ${ac_openmp}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										12
									
								
								systems/mac-arm/config-command
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								systems/mac-arm/config-command
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,12 @@
 | 
				
			|||||||
 | 
					MPICXX=mpicxx CXXFLAGS=-I/opt/local/include LDFLAGS=-L/opt/local/lib/ CXX=clang++ ../../configure \
 | 
				
			||||||
 | 
						      --enable-simd=GEN \
 | 
				
			||||||
 | 
						      --enable-Nc=1 \
 | 
				
			||||||
 | 
						      --enable-comms=mpi-auto \
 | 
				
			||||||
 | 
						      --enable-unified=yes \
 | 
				
			||||||
 | 
						      --prefix $HOME/QCD/GridInstall \
 | 
				
			||||||
 | 
						      --with-lime=/Users/peterboyle/QCD/SciDAC/install/ \
 | 
				
			||||||
 | 
						      --with-openssl=$BREW \
 | 
				
			||||||
 | 
						      --disable-fermion-reps \
 | 
				
			||||||
 | 
						      --disable-gparity \
 | 
				
			||||||
 | 
						      --enable-debug
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										693
									
								
								tests/debug/Test_icosahedron.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										693
									
								
								tests/debug/Test_icosahedron.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,693 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/debug/Test_icosahedron.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					#include <Grid/stencil/IcosahedralStencil.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace std;
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const int MyNd=3;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class IcosahedralGimpl
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  typedef LatticeLorentzColourMatrix  GaugeField;
 | 
				
			||||||
 | 
					  typedef LatticeColourMatrix         GaugeLinkField;
 | 
				
			||||||
 | 
					  typedef LatticeDoubledGaugeField    DoubledGaugeField;
 | 
				
			||||||
 | 
					  typedef LatticeComplex              ComplexField;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					template< class Gimpl>
 | 
				
			||||||
 | 
					class IcosahedralSupport {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  // Move to inherit types macro as before
 | 
				
			||||||
 | 
					  typedef typename Gimpl::GaugeField GaugeField;
 | 
				
			||||||
 | 
					  typedef typename Gimpl::GaugeLinkField GaugeLinkField;
 | 
				
			||||||
 | 
					  typedef typename Gimpl::ComplexField ComplexField;
 | 
				
			||||||
 | 
					  typedef typename Gimpl::DoubledGaugeField DoubledGaugeField;
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  GridBase *VertexGrid;
 | 
				
			||||||
 | 
					  GridBase *EdgeGrid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  IcosahedralStencil FaceStencil;
 | 
				
			||||||
 | 
					  IcosahedralStencil NNee; // edge neighbours with edge domain
 | 
				
			||||||
 | 
					  IcosahedralStencil NNev; // vertex neighbours but in edge domain
 | 
				
			||||||
 | 
					  IcosahedralStencil NNvv; // vertex neighbours with vertex domain
 | 
				
			||||||
 | 
					  IcosahedralStencil NNve; // edge neighbours with vertex domain
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  IcosahedralSupport(GridBase *_VertexGrid,GridBase *_EdgeGrid)
 | 
				
			||||||
 | 
					    : FaceStencil (_EdgeGrid,_VertexGrid),
 | 
				
			||||||
 | 
					      NNee(_EdgeGrid,_VertexGrid),
 | 
				
			||||||
 | 
					      NNev(_EdgeGrid,_VertexGrid),
 | 
				
			||||||
 | 
					      NNve(_EdgeGrid,_VertexGrid),
 | 
				
			||||||
 | 
					      NNvv(_EdgeGrid,_VertexGrid),
 | 
				
			||||||
 | 
					      VertexGrid(_VertexGrid), EdgeGrid(_EdgeGrid)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    FaceStencil.FaceStencil();
 | 
				
			||||||
 | 
					    std::cout << "NNee"<<std::endl;
 | 
				
			||||||
 | 
					    // true/false is "vertexInput, VertexOutput"
 | 
				
			||||||
 | 
					    NNee.NearestNeighbourStencil(false,false);// edge input + output      ; used by face stencil
 | 
				
			||||||
 | 
					    NNev.NearestNeighbourStencil(true,false); // vertex input, edge ouput ; used by gauge transform
 | 
				
			||||||
 | 
					    NNvv.NearestNeighbourStencil(true,true);  // vertex input + output    ; used by Laplacian
 | 
				
			||||||
 | 
					    NNve.NearestNeighbourStencil(false,true); // edge input, vertex output; used by double store
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  // Gauge Link field GT is the gauge transform and lives on the VERTEX field
 | 
				
			||||||
 | 
					  ////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					  void ForwardTriangles(GaugeField &Umu,LatticeComplex &plaq1,LatticeComplex &plaq2)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      autoView(Umu_v,Umu,AcceleratorRead);
 | 
				
			||||||
 | 
					      autoView(plaq1_v,plaq1,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(plaq2_v,plaq2,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stencil_v,FaceStencil,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      accelerator_for(ss,EdgeGrid->oSites(),vComplex::Nsimd(),{
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						auto Lx = Umu_v(ss)(IcosahedronPatchX);
 | 
				
			||||||
 | 
						auto Ly = Umu_v(ss)(IcosahedronPatchY);
 | 
				
			||||||
 | 
						auto Ld = Umu_v(ss)(IcosahedronPatchDiagonal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// for trace [ U_x(z) U_y(z+\hat x) adj(U_d(z)) ]
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE1 = stencil_v.GetEntry(0,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE1->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE1->_polarisation;
 | 
				
			||||||
 | 
						  auto s1    = SE1->_offset;
 | 
				
			||||||
 | 
						  auto L1 = Umu_v(s1)(pol);
 | 
				
			||||||
 | 
						  if(doAdj)
 | 
				
			||||||
 | 
						    L1 = adj(L1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  coalescedWrite(plaq1_v[ss](),trace(Lx*L1*adj(Ld) ) );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// for trace [  U_y(z) U_x(z+\hat y) adj(U_d(z))   ]
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE2 = stencil_v.GetEntry(1,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE2->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE2->_polarisation;
 | 
				
			||||||
 | 
						  auto s2    = SE2->_offset;
 | 
				
			||||||
 | 
						  auto L2 = Umu_v(s2)(pol);
 | 
				
			||||||
 | 
						  if(doAdj)
 | 
				
			||||||
 | 
						    L2 = adj(L2);
 | 
				
			||||||
 | 
						  coalescedWrite(plaq2_v[ss](),trace(Ly*L2*adj(Ld) ) );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Staples for gauge force
 | 
				
			||||||
 | 
					  void IcosahedralStaples(GaugeField &Umu,
 | 
				
			||||||
 | 
								  GaugeLinkField &stapleXY,
 | 
				
			||||||
 | 
								  GaugeLinkField &stapleYX,
 | 
				
			||||||
 | 
								  GaugeLinkField &stapleXD,
 | 
				
			||||||
 | 
								  GaugeLinkField &stapleDX,
 | 
				
			||||||
 | 
								  GaugeLinkField &stapleYD,
 | 
				
			||||||
 | 
								  GaugeLinkField &stapleDY)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					      autoView(Umu_v,Umu,AcceleratorRead);
 | 
				
			||||||
 | 
					      autoView(stapleXY_v,stapleXY,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stapleYX_v,stapleYX,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stapleXD_v,stapleXD,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stapleDX_v,stapleDX,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stapleYD_v,stapleYD,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stapleDY_v,stapleDY,AcceleratorWrite);
 | 
				
			||||||
 | 
					      autoView(stencil_v,NNee,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const int np = NNee._npoints;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const int ent_Xp = 0;
 | 
				
			||||||
 | 
					      const int ent_Yp = 1;
 | 
				
			||||||
 | 
					      const int ent_Xm = 3;
 | 
				
			||||||
 | 
					      const int ent_Ym = 4;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      accelerator_for(ss,EdgeGrid->oSites(),vComplex::Nsimd(),{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Three forward links from this site
 | 
				
			||||||
 | 
						auto Lx = Umu_v(ss)(IcosahedronPatchX);
 | 
				
			||||||
 | 
						auto Ly = Umu_v(ss)(IcosahedronPatchY);
 | 
				
			||||||
 | 
						auto Ld = Umu_v(ss)(IcosahedronPatchDiagonal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// Terms for the staple orthog to PlusDiagonal
 | 
				
			||||||
 | 
						///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// adj( U_y(z+\hat x)) adj(U_x(z)) 
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE1 = stencil_v.GetEntry(ent_Xp,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE1->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE1->_polarisation;
 | 
				
			||||||
 | 
						  auto s1    = SE1->_offset;
 | 
				
			||||||
 | 
						  auto Ly_at_xp = Umu_v(s1)(pol);
 | 
				
			||||||
 | 
						  if(doAdj)
 | 
				
			||||||
 | 
						    Ly_at_xp = adj(Ly_at_xp);
 | 
				
			||||||
 | 
						  coalescedWrite(stapleXY_v[ss](),adj(Ly_at_xp)*adj(Lx)  );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						// adj( U_y(z) )  adj(U_x(z+\hat y))
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE2 = stencil_v.GetEntry(ent_Yp,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE2->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE2->_polarisation;
 | 
				
			||||||
 | 
						  auto s2    = SE2->_offset;
 | 
				
			||||||
 | 
						  auto Lx_at_yp = Umu_v(s2)(pol);
 | 
				
			||||||
 | 
						  if(doAdj)
 | 
				
			||||||
 | 
						    Lx_at_yp = adj(Lx_at_yp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						  coalescedWrite(stapleYX_v[ss](),adj(Lx_at_yp)*adj(Ly) );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// Terms for the staple covering Xp : Dp Yp(x++) and Yp(y--) Dp(y--)
 | 
				
			||||||
 | 
						///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// U_y(z+\hat x)*adj(U_d(z)) 
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE1 = stencil_v.GetEntry(ent_Xp,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE1->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE1->_polarisation;
 | 
				
			||||||
 | 
						  auto s1    = SE1->_offset;
 | 
				
			||||||
 | 
						  auto Ly_at_xp = Umu_v(s1)(pol);
 | 
				
			||||||
 | 
						  if(doAdj)
 | 
				
			||||||
 | 
						    Ly_at_xp = adj(Ly_at_xp);
 | 
				
			||||||
 | 
						  coalescedWrite(stapleDY_v[ss](), Ly_at_xp *adj(Ld));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						//  adj(U_d(z-\hat y)) U_y(z-\hat y)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE2 = stencil_v.GetEntry(ent_Ym,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE2->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE2->_polarisation;
 | 
				
			||||||
 | 
						  auto s2    = SE2->_offset;
 | 
				
			||||||
 | 
						  int pol1 = IcosahedronPatchY;
 | 
				
			||||||
 | 
						  int pol2 = IcosahedronPatchDiagonal;
 | 
				
			||||||
 | 
						  if ( pol != IcosahedronPatchDiagonal ) {
 | 
				
			||||||
 | 
						    pol1 = IcosahedronPatchDiagonal;
 | 
				
			||||||
 | 
						    pol2 = IcosahedronPatchX;
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						  auto Ly_at_ym = Umu_v(s2)(pol1);
 | 
				
			||||||
 | 
						  auto Ld_at_ym = Umu_v(s2)(pol2);
 | 
				
			||||||
 | 
						  coalescedWrite(stapleYD_v[ss](),adj(Ld_at_ym)*Ly_at_ym );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						// Terms for the staple covering Yp : Dp Xp(y++) and Xp(x--) Dp(x--)
 | 
				
			||||||
 | 
						///////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
						//  U_x(z+\hat y)adj(U_d(z))
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE1 = stencil_v.GetEntry(ent_Yp,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE1->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE1->_polarisation;
 | 
				
			||||||
 | 
						  auto s1    = SE1->_offset;
 | 
				
			||||||
 | 
						  auto Lx_at_yp = Umu_v(s1)(pol);
 | 
				
			||||||
 | 
						  if(doAdj)
 | 
				
			||||||
 | 
						    Lx_at_yp = adj(Lx_at_yp);
 | 
				
			||||||
 | 
						  coalescedWrite(stapleDX_v[ss](),Lx_at_yp*adj(Ld) );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//  adj(U_d(z-\hat x))U_x(z-\hat x)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
						  auto SE2 = stencil_v.GetEntry(ent_Xm,ss);
 | 
				
			||||||
 | 
						  auto doAdj = SE2->_adjoint;
 | 
				
			||||||
 | 
						  auto pol   = SE2->_polarisation;
 | 
				
			||||||
 | 
						  auto s2    = SE2->_offset;
 | 
				
			||||||
 | 
						  int pol1 = IcosahedronPatchX;
 | 
				
			||||||
 | 
						  int pol2 = IcosahedronPatchDiagonal;
 | 
				
			||||||
 | 
						  if ( pol != IcosahedronPatchDiagonal ) {
 | 
				
			||||||
 | 
						    pol1 = IcosahedronPatchDiagonal;
 | 
				
			||||||
 | 
						    pol2 = IcosahedronPatchY;
 | 
				
			||||||
 | 
						  }
 | 
				
			||||||
 | 
						  auto Ly = Umu_v(ss)(IcosahedronPatchY);
 | 
				
			||||||
 | 
						  auto Lx_at_xm = Umu_v(s2)(pol1);
 | 
				
			||||||
 | 
						  auto Ld_at_xm = Umu_v(s2)(pol2);
 | 
				
			||||||
 | 
						  coalescedWrite(stapleXD_v[ss](),adj(Ld_at_xm)*Lx_at_xm );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  template<class MatterField>
 | 
				
			||||||
 | 
					  void Laplacian(MatterField &in,MatterField &out)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    autoView(out_v,out,AcceleratorWrite);
 | 
				
			||||||
 | 
					    autoView(in_v,in,AcceleratorRead);
 | 
				
			||||||
 | 
					    autoView(stencil_v,NNvv,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int np = NNvv._npoints;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int ent_Xp = 0;
 | 
				
			||||||
 | 
					    const int ent_Yp = 1;
 | 
				
			||||||
 | 
					    const int ent_Dp = 2;
 | 
				
			||||||
 | 
					    const int ent_Xm = 3;
 | 
				
			||||||
 | 
					    const int ent_Ym = 4;
 | 
				
			||||||
 | 
					    const int ent_Dm = 5;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    accelerator_for(ss,VertexGrid->oSites(),vComplex::Nsimd(),{
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
					      auto SE = stencil_v.GetEntry(ent_Xp,ss);
 | 
				
			||||||
 | 
					      uint64_t xp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Yp,ss);
 | 
				
			||||||
 | 
					      uint64_t yp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Dp,ss);
 | 
				
			||||||
 | 
					      uint64_t dp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Xm,ss);
 | 
				
			||||||
 | 
					      uint64_t xm_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Ym,ss);
 | 
				
			||||||
 | 
					      uint64_t ym_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Dm,ss);
 | 
				
			||||||
 | 
					      uint64_t dm_idx = SE->_offset;
 | 
				
			||||||
 | 
					      int missingLink = SE->_missing_link;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      auto i    = in_v(ss)();
 | 
				
			||||||
 | 
					      auto inxp = in_v(xp_idx)();
 | 
				
			||||||
 | 
					      auto inyp = in_v(yp_idx)();
 | 
				
			||||||
 | 
					      auto indp = in_v(dp_idx)();
 | 
				
			||||||
 | 
					      auto inxm = in_v(xm_idx)();
 | 
				
			||||||
 | 
					      auto inym = in_v(ym_idx)();
 | 
				
			||||||
 | 
					      auto indm = in_v(dm_idx)();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      auto o    = out_v(ss)();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if ( missingLink ) {
 | 
				
			||||||
 | 
						o = (1.0/5.0)*(inxp+inyp+indp+inxm+inym)-o;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						o = (1.0/6.0)*(inxp+inyp+indp+inxm+inym+indm)-o;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      coalescedWrite(out_v[ss](),o);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  template<class MatterField>
 | 
				
			||||||
 | 
					  void CovariantLaplacian(MatterField &in,MatterField &out,DoubledGaugeField &Uds)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    autoView(out_v,out,AcceleratorWrite);
 | 
				
			||||||
 | 
					    autoView(in_v,in,AcceleratorRead);
 | 
				
			||||||
 | 
					    autoView(U_v,Uds,AcceleratorRead);
 | 
				
			||||||
 | 
					    autoView(stencil_v,NNvv,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int np = NNvv._npoints;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int ent_Xp = 0;
 | 
				
			||||||
 | 
					    const int ent_Yp = 1;
 | 
				
			||||||
 | 
					    const int ent_Dp = 2;
 | 
				
			||||||
 | 
					    const int ent_Xm = 3;
 | 
				
			||||||
 | 
					    const int ent_Ym = 4;
 | 
				
			||||||
 | 
					    const int ent_Dm = 5;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    accelerator_for(ss,VertexGrid->oSites(),vComplex::Nsimd(),{
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
					      auto SE = stencil_v.GetEntry(ent_Xp,ss);
 | 
				
			||||||
 | 
					      uint64_t xp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Yp,ss);
 | 
				
			||||||
 | 
					      uint64_t yp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Dp,ss);
 | 
				
			||||||
 | 
					      uint64_t dp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Xm,ss);
 | 
				
			||||||
 | 
					      uint64_t xm_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Ym,ss);
 | 
				
			||||||
 | 
					      uint64_t ym_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Dm,ss);
 | 
				
			||||||
 | 
					      uint64_t dm_idx = SE->_offset;
 | 
				
			||||||
 | 
					      int missingLink = SE->_missing_link;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      auto i    = in_v(ss)();
 | 
				
			||||||
 | 
					      auto inxp = in_v(xp_idx)();
 | 
				
			||||||
 | 
					      auto inyp = in_v(yp_idx)();
 | 
				
			||||||
 | 
					      auto indp = in_v(dp_idx)();
 | 
				
			||||||
 | 
					      auto inxm = in_v(xm_idx)();
 | 
				
			||||||
 | 
					      auto inym = in_v(ym_idx)();
 | 
				
			||||||
 | 
					      auto indm = in_v(dm_idx)();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      inxp = U_v(ss)(0)*inxp;
 | 
				
			||||||
 | 
					      inyp = U_v(ss)(1)*inyp;
 | 
				
			||||||
 | 
					      indp = U_v(ss)(2)*indp;
 | 
				
			||||||
 | 
					      inxm = U_v(ss)(3)*inxm;
 | 
				
			||||||
 | 
					      inym = U_v(ss)(4)*inym;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      auto o    = i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if ( missingLink ) {
 | 
				
			||||||
 | 
						o = (1.0/5.0)*(inxp+inyp+indp+inxm+inym)-i;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
						indm = U_v(ss)(5)*indm;
 | 
				
			||||||
 | 
						o = (1.0/6.0)*(inxp+inyp+indp+inxm+inym+indm)-i;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      coalescedWrite(out_v[ss](),o);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  void DoubleStore(GaugeField &U,DoubledGaugeField &Uds)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    assert(U.Grid()==EdgeGrid);
 | 
				
			||||||
 | 
					    assert(Uds.Grid()==VertexGrid);
 | 
				
			||||||
 | 
					    autoView(Uds_v,Uds,AcceleratorWrite);
 | 
				
			||||||
 | 
					    autoView(U_v,U,AcceleratorRead);
 | 
				
			||||||
 | 
					    autoView(stencil_v,NNvv,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Vertex result
 | 
				
			||||||
 | 
					    // Edge valued input
 | 
				
			||||||
 | 
					    // Might need an extra case (?)
 | 
				
			||||||
 | 
					    const int np = NNvv._npoints;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int ent_Xm = 3;
 | 
				
			||||||
 | 
					    const int ent_Ym = 4;
 | 
				
			||||||
 | 
					    const int ent_Dm = 5;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    accelerator_for(ss,VertexGrid->CartesianOsites(),vComplex::Nsimd(),{
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					      // Three local links
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
						auto Lx = U_v(ss)(IcosahedronPatchX);
 | 
				
			||||||
 | 
						auto Ly = U_v(ss)(IcosahedronPatchY);
 | 
				
			||||||
 | 
						auto Ld = U_v(ss)(IcosahedronPatchDiagonal);
 | 
				
			||||||
 | 
						coalescedWrite(Uds_v[ss](IcosahedronPatchX),Lx);
 | 
				
			||||||
 | 
						coalescedWrite(Uds_v[ss](IcosahedronPatchY),Ly);
 | 
				
			||||||
 | 
						coalescedWrite(Uds_v[ss](IcosahedronPatchDiagonal),Ld);
 | 
				
			||||||
 | 
					      }	
 | 
				
			||||||
 | 
					      // Three backwards links
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
						auto SE = stencil_v.GetEntry(ent_Xm,ss);
 | 
				
			||||||
 | 
						auto pol = SE->_polarisation;
 | 
				
			||||||
 | 
						auto s   = SE->_offset;
 | 
				
			||||||
 | 
						int pol1 = IcosahedronPatchX;
 | 
				
			||||||
 | 
						//
 | 
				
			||||||
 | 
						// Xm link is given diagonal unless HemiPatch changes in Northern hemisphere
 | 
				
			||||||
 | 
						// But here for double storing we need the reverse link so
 | 
				
			||||||
 | 
						// return either the Xplus or Diag plus direction
 | 
				
			||||||
 | 
						//
 | 
				
			||||||
 | 
						if ( pol != IcosahedronPatchDiagonal ) {
 | 
				
			||||||
 | 
						  pol1 = IcosahedronPatchDiagonal;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						auto Lx_at_xm = U_v(s)(pol1);
 | 
				
			||||||
 | 
						Lx_at_xm = adj(Lx_at_xm);
 | 
				
			||||||
 | 
						coalescedWrite(Uds_v[ss](3),Lx_at_xm );
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
						auto SE   = stencil_v.GetEntry(ent_Ym,ss);
 | 
				
			||||||
 | 
						auto pol  = SE->_polarisation;
 | 
				
			||||||
 | 
						auto s    = SE->_offset;
 | 
				
			||||||
 | 
						//
 | 
				
			||||||
 | 
						// Ym link is given diagonal unless HemiPatch changes in the Southern hemisphere
 | 
				
			||||||
 | 
						// But here for double storing we need the reverse link so
 | 
				
			||||||
 | 
						// return either the Yplus or Diag plus direction
 | 
				
			||||||
 | 
						// 
 | 
				
			||||||
 | 
						int pol1 = IcosahedronPatchY;
 | 
				
			||||||
 | 
						if ( pol != IcosahedronPatchDiagonal ) {
 | 
				
			||||||
 | 
						  pol1 = IcosahedronPatchDiagonal;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						auto Ly_at_ym = U_v(s)(pol1);
 | 
				
			||||||
 | 
						Ly_at_ym = adj(Ly_at_ym);
 | 
				
			||||||
 | 
						coalescedWrite(Uds_v[ss](4),Ly_at_ym );
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      int missingLink;
 | 
				
			||||||
 | 
					      {      
 | 
				
			||||||
 | 
						auto SE = stencil_v.GetEntry(ent_Dm,ss);
 | 
				
			||||||
 | 
						auto s  = SE->_offset;
 | 
				
			||||||
 | 
						// Dm link given the correct value for this use case
 | 
				
			||||||
 | 
						auto pol= SE->_polarisation;
 | 
				
			||||||
 | 
						missingLink = SE->_missing_link;
 | 
				
			||||||
 | 
						if ( ! missingLink ) {
 | 
				
			||||||
 | 
						  auto Ld_at_dm  =  U_v(s)(pol);
 | 
				
			||||||
 | 
						  Ld_at_dm = adj(Ld_at_dm);
 | 
				
			||||||
 | 
						  coalescedWrite(Uds_v[ss](5),Ld_at_dm );
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    auto pole_sites = VertexGrid->oSites() - VertexGrid->CartesianOsites();
 | 
				
			||||||
 | 
					    auto pole_offset= VertexGrid->CartesianOsites();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    accelerator_for(ss,pole_sites,vComplex::Nsimd(),{
 | 
				
			||||||
 | 
						for(int p=0;p<HemiPatches;p++){ // Neighbours of each pole
 | 
				
			||||||
 | 
						auto SE = stencil_v.GetEntry(p,pole_offset+ss);
 | 
				
			||||||
 | 
						auto s  = SE->_offset;
 | 
				
			||||||
 | 
						auto pol= SE->_polarisation;
 | 
				
			||||||
 | 
						auto Link =  adj(U_v(s)(pol));
 | 
				
			||||||
 | 
						coalescedWrite(Uds_v[pole_offset+ss](p),Link);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  void  GaugeTransform(GaugeLinkField >, GaugeField &Umu)
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    autoView(Umu_v,Umu,AcceleratorWrite);
 | 
				
			||||||
 | 
					    autoView(g_v,gt,AcceleratorRead);
 | 
				
			||||||
 | 
					    autoView(stencil_v,NNev,AcceleratorRead);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const int np = NNev._npoints;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const int ent_Xp = 0;
 | 
				
			||||||
 | 
					    const int ent_Yp = 1;
 | 
				
			||||||
 | 
					    const int ent_Dp = 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    accelerator_for(ss,EdgeGrid->oSites(),vComplex::Nsimd(),{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Three forward links from this site
 | 
				
			||||||
 | 
					      auto Lx = Umu_v(ss)(IcosahedronPatchX);
 | 
				
			||||||
 | 
					      auto Ly = Umu_v(ss)(IcosahedronPatchY);
 | 
				
			||||||
 | 
					      auto Ld = Umu_v(ss)(IcosahedronPatchDiagonal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      auto SE = stencil_v.GetEntry(ent_Xp,ss);
 | 
				
			||||||
 | 
					      uint64_t xp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Yp,ss);
 | 
				
			||||||
 | 
					      uint64_t yp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      SE = stencil_v.GetEntry(ent_Dp,ss);
 | 
				
			||||||
 | 
					      uint64_t dp_idx = SE->_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      auto g  = g_v(ss)();
 | 
				
			||||||
 | 
					      auto gx = g_v(xp_idx)();
 | 
				
			||||||
 | 
					      auto gy = g_v(yp_idx)();
 | 
				
			||||||
 | 
					      auto gd = g_v(dp_idx)();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      auto lx = Umu_v(ss)(IcosahedronPatchX);
 | 
				
			||||||
 | 
					      auto ly = Umu_v(ss)(IcosahedronPatchY);
 | 
				
			||||||
 | 
					      auto ld = Umu_v(ss)(IcosahedronPatchDiagonal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      lx = g*lx*adj(gx);
 | 
				
			||||||
 | 
					      ly = g*ly*adj(gy);
 | 
				
			||||||
 | 
					      ld = g*ld*adj(gd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      coalescedWrite(Umu_v[ss](IcosahedronPatchX),lx);
 | 
				
			||||||
 | 
					      coalescedWrite(Umu_v[ss](IcosahedronPatchY),ly);
 | 
				
			||||||
 | 
					      coalescedWrite(Umu_v[ss](IcosahedronPatchDiagonal),ld);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int L=8;
 | 
				
			||||||
 | 
					  const int Npatch = IcosahedralPatches;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Put SIMD all in time direction
 | 
				
			||||||
 | 
					  Coordinate latt_size = GridDefaultLatt();
 | 
				
			||||||
 | 
					  Coordinate simd_layout({1,1,vComplex::Nsimd(),1});
 | 
				
			||||||
 | 
					  Coordinate mpi_layout = GridDefaultMpi();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " mpi "<<mpi_layout<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " simd "<<simd_layout<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " latt "<<latt_size<<std::endl;
 | 
				
			||||||
 | 
					  GridCartesianCrossIcosahedron EdgeGrid(latt_size,simd_layout,mpi_layout,IcosahedralEdges);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Created edge grid "<<std::endl;
 | 
				
			||||||
 | 
					  GridCartesianCrossIcosahedron VertexGrid(latt_size,simd_layout,mpi_layout,IcosahedralVertices);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Created vertex grid "<<std::endl;
 | 
				
			||||||
 | 
					  LatticeLorentzColourMatrix Umu(&EdgeGrid);
 | 
				
			||||||
 | 
					  LatticeLorentzColourMatrix Umuck(&EdgeGrid);
 | 
				
			||||||
 | 
					  LatticeComplex Phi(&VertexGrid);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Created two fields "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Phi = Zero();
 | 
				
			||||||
 | 
					  Umu = Zero();
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Zeroed two fields "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Complex one (1.0);
 | 
				
			||||||
 | 
					  Phi = one;
 | 
				
			||||||
 | 
					  Umu = one;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " V = "<<norm2(Phi)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10+2*latt_size[2]<<std::endl;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " E = "<<norm2(Umu)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Expect "<<latt_size[0]*latt_size[1]*latt_size[2]*10*4<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  //  std::cout << " Umu "<<Umu<<std::endl; // debugged, so comment out to reduce verbose
 | 
				
			||||||
 | 
					  //  std::cout << " Phi "<<Phi<<std::endl; // debugged, so comment out to reduce verbose
 | 
				
			||||||
 | 
					  LatticePole(Phi,South);
 | 
				
			||||||
 | 
					  //  std::cout << " Phi South Pole set\n"<<Phi<<std::endl; // debugged, so comment out to reduce verbose
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticePole(Phi,North);
 | 
				
			||||||
 | 
					  //  std::cout << " Phi North Pole set\n"<<Phi<<std::endl; // debugged, so comment out to reduce verbose
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  for(int mu=0;mu<VertexGrid._ndimension;mu++){
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << " Calling lattice coordinate mu="<<mu<<std::endl;
 | 
				
			||||||
 | 
					    LatticeCoordinate(Phi,mu);
 | 
				
			||||||
 | 
					    //    std::cout << GridLogMessage << " Phi coor mu="<<mu<<"\n"<<Phi<<std::endl; // debugged, so comment out to reduce verbose
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "Creating face stencil"<<std::endl;
 | 
				
			||||||
 | 
					  IcosahedralSupport<IcosahedralGimpl> Support(&VertexGrid,&EdgeGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Calling Test Geometry "<<std::endl;
 | 
				
			||||||
 | 
					  Support.FaceStencil.TestGeometry();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Umu=one;
 | 
				
			||||||
 | 
					  LatticeComplex plaq1(&EdgeGrid);
 | 
				
			||||||
 | 
					  LatticeComplex plaq2(&EdgeGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticeComplex plaq_ref(&EdgeGrid);
 | 
				
			||||||
 | 
					  plaq_ref=1.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Support.ForwardTriangles(Umu,plaq1,plaq2);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq1 "<< norm2(plaq1)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq2 "<< norm2(plaq2)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq1 err "<< norm2(plaq1-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq2 err "<< norm2(plaq2-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Random gauge xform
 | 
				
			||||||
 | 
					  std::vector<int> seeds({1,2,3,4});
 | 
				
			||||||
 | 
					  GridParallelRNG  vRNG(&EdgeGrid);   vRNG.SeedFixedIntegers(seeds);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  LatticeColourMatrix g(&VertexGrid);
 | 
				
			||||||
 | 
					  LatticeReal gr(&VertexGrid);
 | 
				
			||||||
 | 
					  LatticeComplex gc(&VertexGrid);
 | 
				
			||||||
 | 
					  //  gr = Zero();
 | 
				
			||||||
 | 
					  gaussian(vRNG,gr);
 | 
				
			||||||
 | 
					  Complex ci(0.0,1.0);
 | 
				
			||||||
 | 
					  gc = toComplex(gr);
 | 
				
			||||||
 | 
					  g=one;
 | 
				
			||||||
 | 
					  g = g * exp(ci*gc);
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "****************************************"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Check plaquette is gauge invariant "<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "****************************************"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " applying gauge transform"<<std::endl;
 | 
				
			||||||
 | 
					  Support.GaugeTransform    (g,Umu);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " applied gauge transform "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " recalculating plaquette "<<std::endl;
 | 
				
			||||||
 | 
					  Support.ForwardTriangles(Umu,plaq1,plaq2);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq1 "<< norm2(plaq1)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq2 "<< norm2(plaq2)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq1 err "<< norm2(plaq1-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " plaq2 err "<< norm2(plaq2-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  typedef IcosahedralGimpl::GaugeLinkField GaugeLinkField;
 | 
				
			||||||
 | 
					  typedef IcosahedralGimpl::GaugeField     GaugeField;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GaugeLinkField stapleXY(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField stapleYX(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField stapleXD(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField stapleDX(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField stapleYD(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField stapleDY(&EdgeGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  GaugeLinkField linkX(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField linkY(&EdgeGrid);
 | 
				
			||||||
 | 
					  GaugeLinkField linkD(&EdgeGrid);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "****************************************"<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " Check triangular staples match plaquette "<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "****************************************"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Support.IcosahedralStaples(Umu,stapleXY,stapleYX,
 | 
				
			||||||
 | 
								     stapleXD,stapleDX,
 | 
				
			||||||
 | 
								     stapleYD,stapleDY);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  linkX = peekLorentz(Umu,IcosahedronPatchX);
 | 
				
			||||||
 | 
					  linkY = peekLorentz(Umu,IcosahedronPatchY);
 | 
				
			||||||
 | 
					  linkD = peekLorentz(Umu,IcosahedronPatchDiagonal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // OK
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " trace D*StapleXY "<<norm2(trace(linkD * stapleXY))<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " err " << norm2(trace(linkD * stapleXY)-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // BAD
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " trace D*StapleYX "<<norm2(trace(linkD * stapleYX))<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " err " << norm2(trace(linkD * stapleYX)-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " trace X*StapleYD "<<norm2(trace(linkX * stapleYD))<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " err " << norm2(trace(linkX * stapleYD)-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " trace X*StapleDY "<<norm2(trace(linkX * stapleDY))<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " err " << norm2(trace(linkX * stapleDY)-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " trace Y*StapleXD "<<norm2(trace(linkY * stapleXD))<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " err " << norm2(trace(linkY * stapleXD)-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " trace Y*StapleDX "<<norm2(trace(linkY * stapleDX))<<std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << " err " << norm2(trace(linkY * stapleDX)-plaq_ref)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage<< "Calling Laplacian" <<std::endl;
 | 
				
			||||||
 | 
					  LatticeColourVector in(&VertexGrid);
 | 
				
			||||||
 | 
					  LatticeColourVector out(&VertexGrid);
 | 
				
			||||||
 | 
					  LatticeColourVector gout(&VertexGrid);
 | 
				
			||||||
 | 
					  LatticeColourVector gin(&VertexGrid);
 | 
				
			||||||
 | 
					  gaussian(vRNG,in);
 | 
				
			||||||
 | 
					  Support.Laplacian(in,out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage<< "Calling double storing gauge field" <<std::endl;
 | 
				
			||||||
 | 
					  LatticeDoubledGaugeField Uds(&VertexGrid);
 | 
				
			||||||
 | 
					  Support.DoubleStore(Umu,Uds);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Support.CovariantLaplacian(in,out,Uds);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  auto ip = innerProduct(out, in);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage<< "Applied covariant laplacian !" <<std::endl;
 | 
				
			||||||
 | 
					  /*
 | 
				
			||||||
 | 
					   * CovariantLaplacian testing -- check the laplacian is gauge invariant
 | 
				
			||||||
 | 
					   *
 | 
				
			||||||
 | 
					   * D[U_gt](gF) = g D[U] g^dag g F = g D[U] F
 | 
				
			||||||
 | 
					   */
 | 
				
			||||||
 | 
					  gout = g*out;
 | 
				
			||||||
 | 
					  gin  = g*in;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Support.GaugeTransform(g,Umu);
 | 
				
			||||||
 | 
					  Support.DoubleStore(Umu,Uds);
 | 
				
			||||||
 | 
					  Support.CovariantLaplacian(gin,out,Uds);
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage<< "Applied gauge transformed covariant laplacian to transformed vector !" <<std::endl;
 | 
				
			||||||
 | 
					  auto ipgt = innerProduct(out, gin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << "Testing D[U_gt](gF) = g D[U] F : defect is "<<norm2(out-gout)<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ip = ip - ipgt;
 | 
				
			||||||
 | 
					  std::cout << "Testing F D[U](F) = (gF) D[U_gt] gF : defect is "<<ip<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Reference in New Issue
	
	Block a user