mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	First version of the doube prec for testing purposes
It does not compile single and double version at the same time
This commit is contained in:
		@@ -53,12 +53,13 @@ WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(AVX512) 
 | 
					#if defined(AVX512) 
 | 
				
			||||||
 | 
					#include <simd/Intel512wilson.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(GRID_DEFAULT_PRECISION_SINGLE)    
 | 
				
			||||||
    ///////////////////////////////////////////////////////////
 | 
					    ///////////////////////////////////////////////////////////
 | 
				
			||||||
    // If we are AVX512 specialise the single precision routine
 | 
					    // If we are AVX512 specialise the single precision routine
 | 
				
			||||||
    ///////////////////////////////////////////////////////////
 | 
					    ///////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <simd/Intel512wilson.h>
 | 
					 | 
				
			||||||
#include <simd/Intel512single.h>
 | 
					#include <simd/Intel512single.h>
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
static Vector<vComplexF> signs;
 | 
					static Vector<vComplexF> signs;
 | 
				
			||||||
@@ -78,6 +79,7 @@ static Vector<vComplexF> signs;
 | 
				
			|||||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
 | 
					#define MAYBEPERM(A,perm) if (perm) { A ; }
 | 
				
			||||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
 | 
					#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
 | 
				
			||||||
#define FX(A) WILSONASM_ ##A
 | 
					#define FX(A) WILSONASM_ ##A
 | 
				
			||||||
 | 
					#define COMPLEX_TYPE vComplexF
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
#undef KERNEL_DAG
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
template<> void 
 | 
					template<> void 
 | 
				
			||||||
@@ -113,8 +115,79 @@ template<> void
 | 
				
			|||||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
					WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
							    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
												    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
 | 
					#undef COMPLEX_TYPE
 | 
				
			||||||
	
 | 
						
 | 
				
			||||||
#endif
 | 
					#endif //Single precision			    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if defined(GRID_DEFAULT_PRECISION_DOUBLE)    
 | 
				
			||||||
 | 
					//temporary separating the two sections
 | 
				
			||||||
 | 
					//for debug in isolation
 | 
				
			||||||
 | 
					//can be unified
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // If we are AVX512 specialise the double precision routine
 | 
				
			||||||
 | 
					    ///////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <simd/Intel512double.h>
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					static Vector<vComplexD> signs;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					  int setupSigns(void ){
 | 
				
			||||||
 | 
					    Vector<vComplexD> bother(2);
 | 
				
			||||||
 | 
					    signs = bother;
 | 
				
			||||||
 | 
					    vrsign(signs[0]);
 | 
				
			||||||
 | 
					    visign(signs[1]);
 | 
				
			||||||
 | 
					    return 1;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  static int signInit = setupSigns();
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					#define label(A)  ilabel(A)
 | 
				
			||||||
 | 
					#define ilabel(A) ".globl\n"  #A ":\n" 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					#define MAYBEPERM(A,perm) if (perm) { A ; }
 | 
				
			||||||
 | 
					#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
 | 
				
			||||||
 | 
					#define FX(A) WILSONASM_ ##A
 | 
				
			||||||
 | 
					#define COMPLEX_TYPE vComplexD
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
 | 
					template<> void 
 | 
				
			||||||
 | 
					WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
											int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					#define KERNEL_DAG
 | 
				
			||||||
 | 
					template<> void 
 | 
				
			||||||
 | 
					WilsonKernels<WilsonImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
											   int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
 | 
									    
 | 
				
			||||||
 | 
					#undef VMOVIDUP
 | 
				
			||||||
 | 
					#undef VMOVRDUP
 | 
				
			||||||
 | 
					#undef MAYBEPERM
 | 
				
			||||||
 | 
					#undef MULT_2SPIN
 | 
				
			||||||
 | 
					#undef FX 
 | 
				
			||||||
 | 
					#define FX(A) DWFASM_ ## A
 | 
				
			||||||
 | 
					#define MAYBEPERM(A,B) 
 | 
				
			||||||
 | 
					#define VMOVIDUP(A,B,C)                                  VBCASTIDUPf(A,B,C)
 | 
				
			||||||
 | 
					#define VMOVRDUP(A,B,C)                                  VBCASTRDUPf(A,B,C)
 | 
				
			||||||
 | 
					#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
 | 
				
			||||||
 | 
									    
 | 
				
			||||||
 | 
					#undef KERNEL_DAG
 | 
				
			||||||
 | 
					template<> void 
 | 
				
			||||||
 | 
					WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
												 int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
 | 
									    
 | 
				
			||||||
 | 
					#define KERNEL_DAG
 | 
				
			||||||
 | 
					template<> void 
 | 
				
			||||||
 | 
					WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
 | 
				
			||||||
 | 
												    int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
 | 
				
			||||||
 | 
					#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					#undef COMPLEX_TYPE
 | 
				
			||||||
 | 
					#endif //Double precision			    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif //AVX512
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define INSTANTIATE_ASM(A)\
 | 
					#define INSTANTIATE_ASM(A)\
 | 
				
			||||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
					template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -5,7 +5,9 @@
 | 
				
			|||||||
  const uint64_t plocal =(uint64_t) & in._odata[0];
 | 
					  const uint64_t plocal =(uint64_t) & in._odata[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  //  vComplexF isigns[2] = { signs[0], signs[1] };
 | 
					  //  vComplexF isigns[2] = { signs[0], signs[1] };
 | 
				
			||||||
  vComplexF *isigns = &signs[0];
 | 
					  //COMPLEX_TYPE is vComplexF of vComplexD depending 
 | 
				
			||||||
 | 
					  //on the chosen precision
 | 
				
			||||||
 | 
					  COMPLEX_TYPE *isigns = &signs[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  MASK_REGS;
 | 
					  MASK_REGS;
 | 
				
			||||||
  int nmax=U._grid->oSites();
 | 
					  int nmax=U._grid->oSites();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -371,14 +371,8 @@ namespace Optimization {
 | 
				
			|||||||
  // Some Template specialization
 | 
					  // Some Template specialization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
 | 
					  // Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
 | 
				
			||||||
<<<<<<< HEAD
 | 
					 | 
				
			||||||
#define GNU_CLANG_COMPILER 
 | 
					 | 
				
			||||||
#ifdef GNU_CLANG_COMPILER
 | 
					 | 
				
			||||||
=======
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifndef __INTEL_COMPILER
 | 
					#ifndef __INTEL_COMPILER
 | 
				
			||||||
#warning "Slow reduction due to incomplete reduce intrinsics"
 | 
					#warning "Slow reduction due to incomplete reduce intrinsics"
 | 
				
			||||||
>>>>>>> develop
 | 
					 | 
				
			||||||
  //Complex float Reduce
 | 
					  //Complex float Reduce
 | 
				
			||||||
  template<>
 | 
					  template<>
 | 
				
			||||||
    inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
 | 
					    inline Grid::ComplexF Reduce<Grid::ComplexF, __m512>::operator()(__m512 in){
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user