mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 05:54:32 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/gpu-port
Conflicts: Grid/qcd/action/fermion/WilsonKernelsAsm.cc Grid/qcd/action/fermion/implementation/ImprovedStaggeredFermionImplementation.h Grid/qcd/action/fermion/implementation/StaggeredKernelsAsm.h benchmarks/Benchmark_comms.cc
This commit is contained in:
		
							
								
								
									
										127
									
								
								Grid/qcd/action/fermion/WilsonKernelsAsm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								Grid/qcd/action/fermion/WilsonKernelsAsm.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,127 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./lib/qcd/action/fermion/WilsonKernelsAsm.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/FermionCore.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					namespace QCD {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					// Default to no assembler implementation
 | 
				
			||||||
 | 
					///////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					template<class Impl> void 
 | 
				
			||||||
 | 
					WilsonKernels<Impl >::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
										  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl> void 
 | 
				
			||||||
 | 
					WilsonKernels<Impl >::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
										     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl> void 
 | 
				
			||||||
 | 
					WilsonKernels<Impl >::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
										  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl> void 
 | 
				
			||||||
 | 
					WilsonKernels<Impl >::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
										     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl> void 
 | 
				
			||||||
 | 
					WilsonKernels<Impl >::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
										  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class Impl> void 
 | 
				
			||||||
 | 
					WilsonKernels<Impl >::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
										     int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  assert(0);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/WilsonKernelsAsmAvx512.h>
 | 
				
			||||||
 | 
					#include <Grid/qcd/action/fermion/WilsonKernelsAsmQPX.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define INSTANTIATE_ASM(A)\
 | 
				
			||||||
 | 
					template void WilsonKernels<A>::AsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					 \
 | 
				
			||||||
 | 
					template void WilsonKernels<A>::AsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					template void WilsonKernels<A>::AsmDhopSiteInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					 \
 | 
				
			||||||
 | 
					template void WilsonKernels<A>::AsmDhopSiteDagInt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					template void WilsonKernels<A>::AsmDhopSiteExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					 \
 | 
				
			||||||
 | 
					template void WilsonKernels<A>::AsmDhopSiteDagExt(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 | 
					                                  int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out);\
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(WilsonImplF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(WilsonImplD);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZWilsonImplF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZWilsonImplD);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(GparityWilsonImplF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(GparityWilsonImplD);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(DomainWallVec5dImplF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(DomainWallVec5dImplD);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZDomainWallVec5dImplF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZDomainWallVec5dImplD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(WilsonImplFH);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(WilsonImplDF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZWilsonImplFH);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZWilsonImplDF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(GparityWilsonImplFH);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(GparityWilsonImplDF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(DomainWallVec5dImplFH);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(DomainWallVec5dImplDF);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZDomainWallVec5dImplFH);
 | 
				
			||||||
 | 
					INSTANTIATE_ASM(ZDomainWallVec5dImplDF);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -25,8 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			|||||||
See the full license in the file "LICENSE" in the top level distribution
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
directory
 | 
					directory
 | 
				
			||||||
*************************************************************************************/
 | 
					*************************************************************************************/
 | 
				
			||||||
			   /*  END LEGAL */
 | 
					/*  END LEGAL */
 | 
				
			||||||
#include <Grid.h>
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#pragma once 
 | 
					#pragma once 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,11 +28,11 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    /*  END LEGAL */
 | 
					    /*  END LEGAL */
 | 
				
			||||||
#pragma once
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <Grid.h>
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef AVX512
 | 
					#ifdef AVX512
 | 
				
			||||||
#include <simd/Intel512common.h>
 | 
					#include <Grid/simd/Intel512common.h>
 | 
				
			||||||
#include <simd/Intel512avx.h>
 | 
					#include <Grid/simd/Intel512avx.h>
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Interleave operations from two directions
 | 
					// Interleave operations from two directions
 | 
				
			||||||
@@ -681,7 +681,7 @@ void StaggeredKernels<Impl>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo,
 | 
				
			|||||||
  gauge3 =(uint64_t)&UU[sU]( T ); 
 | 
					  gauge3 =(uint64_t)&UU[sU]( T ); 
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  // This is the single precision 5th direction vectorised kernel
 | 
					  // This is the single precision 5th direction vectorised kernel
 | 
				
			||||||
#include <simd/Intel512single.h>
 | 
					#include <Grid/simd/Intel512single.h>
 | 
				
			||||||
template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
					template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
				
			||||||
								    DoubledGaugeFieldView &U,
 | 
													    DoubledGaugeFieldView &U,
 | 
				
			||||||
								    DoubledGaugeFieldView &UUU,
 | 
													    DoubledGaugeFieldView &UUU,
 | 
				
			||||||
@@ -735,7 +735,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplF>::DhopSiteAsm(StencilImpl
 | 
				
			|||||||
   
 | 
					   
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <simd/Intel512double.h>
 | 
					#include <Grid/simd/Intel512double.h>
 | 
				
			||||||
template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
					template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
				
			||||||
								    DoubledGaugeFieldView &U,
 | 
													    DoubledGaugeFieldView &U,
 | 
				
			||||||
								    DoubledGaugeFieldView &UUU,
 | 
													    DoubledGaugeFieldView &UUU,
 | 
				
			||||||
@@ -820,7 +820,7 @@ template <> void StaggeredKernels<StaggeredVec5dImplD>::DhopSiteAsm(StencilImpl
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  // This is the single precision 5th direction vectorised kernel
 | 
					  // This is the single precision 5th direction vectorised kernel
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <simd/Intel512single.h>
 | 
					#include <Grid/simd/Intel512single.h>
 | 
				
			||||||
template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
					template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
				
			||||||
							       DoubledGaugeFieldView &U,
 | 
												       DoubledGaugeFieldView &U,
 | 
				
			||||||
							       DoubledGaugeFieldView &UUU,
 | 
												       DoubledGaugeFieldView &UUU,
 | 
				
			||||||
@@ -889,7 +889,7 @@ template <> void StaggeredKernels<StaggeredImplF>::DhopSiteAsm(StencilImpl &st,
 | 
				
			|||||||
#endif
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <simd/Intel512double.h>
 | 
					#include <Grid/simd/Intel512double.h>
 | 
				
			||||||
template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
					template <> void StaggeredKernels<StaggeredImplD>::DhopSiteAsm(StencilImpl &st, LebesgueOrder &lo, 
 | 
				
			||||||
							       DoubledGaugeFieldView &U,
 | 
												       DoubledGaugeFieldView &U,
 | 
				
			||||||
							       DoubledGaugeFieldView &UUU,
 | 
												       DoubledGaugeFieldView &UUU,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -26,7 +26,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
    *************************************************************************************/
 | 
					    *************************************************************************************/
 | 
				
			||||||
    /*  END LEGAL */
 | 
					    /*  END LEGAL */
 | 
				
			||||||
#include <Grid.h>
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#pragma once
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,7 +29,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
using namespace std;
 | 
					using namespace std;
 | 
				
			||||||
using namespace Grid;
 | 
					using namespace Grid;
 | 
				
			||||||
 ;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct time_statistics{
 | 
					struct time_statistics{
 | 
				
			||||||
  double mean;
 | 
					  double mean;
 | 
				
			||||||
@@ -187,9 +186,9 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
	rbuf[mu].resize(lat*lat*lat*Ls);
 | 
						rbuf[mu].resize(lat*lat*lat*Ls);
 | 
				
			||||||
	//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
 | 
						//	std::cout << " buffers " << std::hex << (uint64_t)&xbuf[mu][0] <<" " << (uint64_t)&rbuf[mu][0] <<std::endl;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					      uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      int ncomm;
 | 
					      int ncomm;
 | 
				
			||||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      for(int i=0;i<Nloop;i++){
 | 
					      for(int i=0;i<Nloop;i++){
 | 
				
			||||||
      double start=usecond();
 | 
					      double start=usecond();
 | 
				
			||||||
@@ -276,15 +275,22 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
      std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
					      std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
				
			||||||
      std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
					      std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
				
			||||||
      Grid.ShmBufferFreeAll();
 | 
					      Grid.ShmBufferFreeAll();
 | 
				
			||||||
 | 
					      uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
				
			||||||
      for(int d=0;d<8;d++){
 | 
					      for(int d=0;d<8;d++){
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
	//	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						//	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
	//	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						//	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
						xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
 | 
				
			||||||
 | 
						rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
 | 
				
			||||||
 | 
						bzero((void *)xbuf[d],bytes);
 | 
				
			||||||
 | 
						bzero((void *)rbuf[d],bytes);
 | 
				
			||||||
 | 
					>>>>>>> develop
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      int ncomm;
 | 
					      int ncomm;
 | 
				
			||||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      double dbytes;
 | 
					      double dbytes;
 | 
				
			||||||
      for(int i=0;i<Nloop;i++){
 | 
					      for(int i=0;i<Nloop;i++){
 | 
				
			||||||
@@ -373,15 +379,22 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
      std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
					      std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
				
			||||||
      std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
					      std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
				
			||||||
      Grid.ShmBufferFreeAll();
 | 
					      Grid.ShmBufferFreeAll();
 | 
				
			||||||
 | 
					      uint64_t bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
				
			||||||
      for(int d=0;d<8;d++){
 | 
					      for(int d=0;d<8;d++){
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
	//	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						//	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
	//	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						//	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
						xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
 | 
				
			||||||
 | 
						rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
 | 
				
			||||||
 | 
						bzero((void *)xbuf[d],bytes);
 | 
				
			||||||
 | 
						bzero((void *)rbuf[d],bytes);
 | 
				
			||||||
 | 
					>>>>>>> develop
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      int ncomm;
 | 
					      int ncomm;
 | 
				
			||||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
					 | 
				
			||||||
      double dbytes;
 | 
					      double dbytes;
 | 
				
			||||||
      for(int i=0;i<Nloop;i++){
 | 
					      for(int i=0;i<Nloop;i++){
 | 
				
			||||||
	double start=usecond();
 | 
						double start=usecond();
 | 
				
			||||||
@@ -471,15 +484,13 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
      std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
					      std::vector<HalfSpinColourVectorD *> xbuf(8);
 | 
				
			||||||
      std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
					      std::vector<HalfSpinColourVectorD *> rbuf(8);
 | 
				
			||||||
      Grid.ShmBufferFreeAll();
 | 
					      Grid.ShmBufferFreeAll();
 | 
				
			||||||
 | 
					      uint64_t bytes = lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
				
			||||||
      for(int d=0;d<8;d++){
 | 
					      for(int d=0;d<8;d++){
 | 
				
			||||||
	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
 | 
				
			||||||
	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
						rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(bytes);
 | 
				
			||||||
	//	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
					 | 
				
			||||||
	//	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD));
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      int ncomm;
 | 
					      int ncomm;
 | 
				
			||||||
      int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD);
 | 
					 | 
				
			||||||
      double dbytes;
 | 
					      double dbytes;
 | 
				
			||||||
      for(int i=0;i<Nloop;i++){
 | 
					      for(int i=0;i<Nloop;i++){
 | 
				
			||||||
	double start=usecond();
 | 
						double start=usecond();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -262,7 +262,6 @@ Set HEADER_SEARCH_PATHS to:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    $Grid/build$(CONFIGURATION)/Grid
 | 
					    $Grid/build$(CONFIGURATION)/Grid
 | 
				
			||||||
    $Grid
 | 
					    $Grid
 | 
				
			||||||
    $Grid/Grid
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.:
 | 
					followed by (***the order is important***) the locations reported by `grid-config --cxxflags`, ignoring duplicates, e.g.:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -272,7 +271,7 @@ followed by (***the order is important***) the locations reported by `grid-confi
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
**Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.:
 | 
					**Note: the easiest way to set this value is to put it all on one line, space separated, and edit the text to the right of `HEADER_SEARCH_PATHS`**, i.e.:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    $Grid/build$(CONFIGURATION)/Grid $Grid $Grid/Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include
 | 
					    $Grid/build$(CONFIGURATION)/Grid $Grid $GridPre/openmpi/include $GridPkg/include $GridPre/lime/include
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### LIBRARY_SEARCH_PATHS
 | 
					#### LIBRARY_SEARCH_PATHS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -298,7 +297,7 @@ The easiest way to link to all required libraries is to obtain a list of all lib
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
and pasting the output ***with `-lGrid -lHadrons ` prepended*** (including the `-l` switches) directly into `OTHER_LDFLAGS`, e.g.:
 | 
					and pasting the output ***with `-lGrid -lHadrons ` prepended*** (including the `-l` switches) directly into `OTHER_LDFLAGS`, e.g.:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    -lGrid -lHadrons -lmpi -lhdf5_cpp -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lstdc++ -lm -lz -lhdf5
 | 
					    -lGrid -lHadrons -lmpi -lhdf5_cpp -lhdf5 -lz -lcrypto -llime -lfftw3f -lfftw3 -lmpfr -lgmp -lm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Make additional configurations
 | 
					## Make additional configurations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user