|  |  |  | @@ -310,7 +310,7 @@ namespace QCD { | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | template<class Impl> | 
		
	
		
			
				|  |  |  |  | int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, | 
		
	
		
			
				|  |  |  |  | void WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, | 
		
	
		
			
				|  |  |  |  | 						   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | 						   int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | { | 
		
	
	
		
			
				
					
					|  |  |  | @@ -318,21 +318,21 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi | 
		
	
		
			
				|  |  |  |  |   typedef typename Simd::scalar_type S; | 
		
	
		
			
				|  |  |  |  |   typedef typename Simd::vector_type V; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_00; // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_01; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_02; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_10; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_11; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_12; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_20; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_21; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_22; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_30; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_31; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_32; // 20 left | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_00;    // two spinor; 6 regs | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_01; | 
		
	
	
		
			
				
					
					|  |  |  | @@ -372,178 +372,172 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   StencilEntry *SE; | 
		
	
		
			
				|  |  |  |  |   int offset, ptype; | 
		
	
		
			
				|  |  |  |  |   int num = 0; | 
		
	
		
			
				|  |  |  |  |   int offset,local,perm, ptype; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   // Xp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Xp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     XP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Xp); | 
		
	
		
			
				|  |  |  |  |     XP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   XP_RECON; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Yp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Yp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     YP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Yp); | 
		
	
		
			
				|  |  |  |  |     YP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   YP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Zp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Zp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     ZP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   }   | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Zp); | 
		
	
		
			
				|  |  |  |  |     ZP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   ZP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Tp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Tp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     TP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Tp); | 
		
	
		
			
				|  |  |  |  |     TP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   TP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   // Xm | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Xm,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     XM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Xm); | 
		
	
		
			
				|  |  |  |  |     XM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   XM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   // Ym | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Ym,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     YM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Ym); | 
		
	
		
			
				|  |  |  |  |     YM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   YM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Zm | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Zm,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     ZM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Zm); | 
		
	
		
			
				|  |  |  |  |     ZM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   ZM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Tm | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Tm,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |   local  = SE->_is_local; | 
		
	
		
			
				|  |  |  |  |   perm   = SE->_permute; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |   if ( local ) { | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     TM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |     if ( perm) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |   } else {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Tm); | 
		
	
		
			
				|  |  |  |  |     TM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   TM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   { | 
		
	
		
			
				|  |  |  |  |     SiteSpinor & ref (out._odata[ss]); | 
		
	
		
			
				|  |  |  |  |   if ( Local ) { | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(0),result_00*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(1),result_01*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(2),result_02*(-0.5)); | 
		
	
	
		
			
				
					
					|  |  |  | @@ -556,295 +550,9 @@ int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeFi | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(0),result_30*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(1),result_31*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(2),result_32*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     return 1; | 
		
	
		
			
				|  |  |  |  |   } else if ( num ) {  | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     return 1; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   return 0; | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | template<class Impl> | 
		
	
		
			
				|  |  |  |  | int WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, | 
		
	
		
			
				|  |  |  |  | 						std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | 						int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | { | 
		
	
		
			
				|  |  |  |  |   //  std::cout << "Hand op Dhop "<<std::endl; | 
		
	
		
			
				|  |  |  |  |   typedef typename Simd::scalar_type S; | 
		
	
		
			
				|  |  |  |  |   typedef typename Simd::vector_type V; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_00 ; zeroit(result_00); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_01 ; zeroit(result_01); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_02 ; zeroit(result_02); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_10 ; zeroit(result_10); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_11 ; zeroit(result_11); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_12 ; zeroit(result_12); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_20 ; zeroit(result_20); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_21 ; zeroit(result_21); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_22 ; zeroit(result_22); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_30 ; zeroit(result_30); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_31 ; zeroit(result_31); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd result_32 ; zeroit(result_32); // 12 regs on knc | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_00;    // two spinor; 6 regs | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_01; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_02; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_10; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_11; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd Chi_12;   // 14 left | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd UChi_00;  // two spinor; 6 regs | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd UChi_01; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd UChi_02; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd UChi_10; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd UChi_11; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd UChi_12;  // 8 left | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd U_00;  // two rows of U matrix | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd U_10; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd U_20;   | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd U_01; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd U_11; | 
		
	
		
			
				|  |  |  |  |   REGISTER Simd U_21;  // 2 reg left. | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | #define Chimu_00 Chi_00 | 
		
	
		
			
				|  |  |  |  | #define Chimu_01 Chi_01 | 
		
	
		
			
				|  |  |  |  | #define Chimu_02 Chi_02 | 
		
	
		
			
				|  |  |  |  | #define Chimu_10 Chi_10 | 
		
	
		
			
				|  |  |  |  | #define Chimu_11 Chi_11 | 
		
	
		
			
				|  |  |  |  | #define Chimu_12 Chi_12 | 
		
	
		
			
				|  |  |  |  | #define Chimu_20 UChi_00 | 
		
	
		
			
				|  |  |  |  | #define Chimu_21 UChi_01 | 
		
	
		
			
				|  |  |  |  | #define Chimu_22 UChi_02 | 
		
	
		
			
				|  |  |  |  | #define Chimu_30 UChi_10 | 
		
	
		
			
				|  |  |  |  | #define Chimu_31 UChi_11 | 
		
	
		
			
				|  |  |  |  | #define Chimu_32 UChi_12 | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   StencilEntry *SE; | 
		
	
		
			
				|  |  |  |  |   int offset, ptype; | 
		
	
		
			
				|  |  |  |  |   int num = 0; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Xp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Xp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     XM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Xp); | 
		
	
		
			
				|  |  |  |  |     XM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Yp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Yp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     YM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Yp); | 
		
	
		
			
				|  |  |  |  |     YM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Zp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Zp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     ZM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   }   | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Zp); | 
		
	
		
			
				|  |  |  |  |     ZM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Tp | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Tp,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     TM_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Tp); | 
		
	
		
			
				|  |  |  |  |     TM_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   // Xm | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Xm,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     XP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(3); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Xm); | 
		
	
		
			
				|  |  |  |  |     XP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   // Ym | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Ym,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |    | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     YP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(2); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Ym); | 
		
	
		
			
				|  |  |  |  |     YP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Zm | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Zm,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     ZP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(1); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Zm); | 
		
	
		
			
				|  |  |  |  |     ZP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   // Tm | 
		
	
		
			
				|  |  |  |  |   SE=st.GetEntry(ptype,Tm,ss); | 
		
	
		
			
				|  |  |  |  |   offset = SE->_offset; | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   if (Local && SE->_is_local ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHIMU; | 
		
	
		
			
				|  |  |  |  |     TP_PROJ; | 
		
	
		
			
				|  |  |  |  |     if ( SE->_permute ) { | 
		
	
		
			
				|  |  |  |  |       PERMUTE_DIR(0); // T==0, Z==1, Y==2, Z==3 expect 1,2,2,2 simd layout etc... | 
		
	
		
			
				|  |  |  |  |     } | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( Nonlocal && (!SE->_is_local) ) {  | 
		
	
		
			
				|  |  |  |  |     LOAD_CHI; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   if ( (Local && SE->_is_local) || ( Nonlocal && (!SE->_is_local)) ) { | 
		
	
		
			
				|  |  |  |  |     MULT_2SPIN(Tm); | 
		
	
		
			
				|  |  |  |  |     TP_RECON_ACCUM; | 
		
	
		
			
				|  |  |  |  |     num++;   | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   SiteSpinor & ref (out._odata[ss]); | 
		
	
		
			
				|  |  |  |  |   if ( Local ) { | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(0),result_00*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(1),result_01*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(2),result_02*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(0),result_10*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(1),result_11*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(2),result_12*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(0),result_20*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(1),result_21*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(2),result_22*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(0),result_30*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(1),result_31*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(2),result_32*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     return 1; | 
		
	
		
			
				|  |  |  |  |   } else if ( num ) {  | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(0),ref()(0)(0)+result_00*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(1),ref()(0)(1)+result_01*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(0)(2),ref()(0)(2)+result_02*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(0),ref()(1)(0)+result_10*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(1),ref()(1)(1)+result_11*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(1)(2),ref()(1)(2)+result_12*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(0),ref()(2)(0)+result_20*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(1),ref()(2)(1)+result_21*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(2)(2),ref()(2)(2)+result_22*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(0),ref()(3)(0)+result_30*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(1),ref()(3)(1)+result_31*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(2),ref()(3)(2)+result_32*(-0.5)); | 
		
	
		
			
				|  |  |  |  |     return 1; | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  |   return 0; | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   /* | 
		
	
		
			
				|  |  |  |  | template<class Impl> | 
		
	
		
			
				|  |  |  |  | void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, | 
		
	
		
			
				|  |  |  |  | 						std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
	
		
			
				
					
					|  |  |  | @@ -1087,21 +795,16 @@ void WilsonKernels<Impl >::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeFiel | 
		
	
		
			
				|  |  |  |  |     vstream(ref()(3)(2),result_32*(-0.5)); | 
		
	
		
			
				|  |  |  |  |   } | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  | */ | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |   //////////////////////////////////////////////// | 
		
	
		
			
				|  |  |  |  |   // Specialise Gparity to simple implementation | 
		
	
		
			
				|  |  |  |  |   //////////////////////////////////////////////// | 
		
	
		
			
				|  |  |  |  | //template<class Impl> | 
		
	
		
			
				|  |  |  |  | //int WilsonKernels<Impl >::DiracOptHandDhopSiteDag(StencilImpl &st,DoubledGaugeField &U, | 
		
	
		
			
				|  |  |  |  | //						   std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | //						   int ss,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | #if 0 | 
		
	
		
			
				|  |  |  |  | template | 
		
	
		
			
				|  |  |  |  | template<> | 
		
	
		
			
				|  |  |  |  | void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSite(StencilImpl &st,DoubledGaugeField &U, | 
		
	
		
			
				|  |  |  |  | 							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | 							     int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | { | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | template<> | 
		
	
	
		
			
				
					
					|  |  |  | @@ -1109,7 +812,7 @@ void WilsonKernels<GparityWilsonImplF>::DiracOptHandDhopSiteDag(StencilImpl &st, | 
		
	
		
			
				|  |  |  |  | 								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | 								int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | { | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | template<> | 
		
	
	
		
			
				
					
					|  |  |  | @@ -1117,7 +820,7 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSite(StencilImpl &st,Dou | 
		
	
		
			
				|  |  |  |  | 							     std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | 							     int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | { | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSite(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  | template<> | 
		
	
	
		
			
				
					
					|  |  |  | @@ -1125,9 +828,8 @@ void WilsonKernels<GparityWilsonImplD>::DiracOptHandDhopSiteDag(StencilImpl &st, | 
		
	
		
			
				|  |  |  |  | 								std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf, | 
		
	
		
			
				|  |  |  |  | 								int sF,int sU,const FermionField &in, FermionField &out, bool Local, bool Nonlocal) | 
		
	
		
			
				|  |  |  |  | { | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out,Local,Nonlocal); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  |   DiracOptDhopSiteDag(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3 | 
		
	
		
			
				|  |  |  |  | } | 
		
	
		
			
				|  |  |  |  | #endif | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
		
			
				|  |  |  |  |  | 
		
	
	
		
			
				
					
					|  |  |  |   |