Merge branch 'master' of https://github.com/paboyle/Grid

2025-12-20 04:34:29 +00:00 · 2016-03-30 08:37:55 +01:00
parent c77b7ee897 2ded354403
commit 1ecbf9794d
7 changed files with 55 additions and 10213 deletions
--- a/9988
+++ b/9988
--- a/lib/Config.h.in
+++ b/lib/Config.h.in
@@ -1,180 +0,0 @@
-/* lib/Config.h.in.  Generated from configure.ac by autoheader.  */
-
-/* AVX Intrinsics */
-#undef AVX1
-
-/* AVX2 Intrinsics */
-#undef AVX2
-
-/* AVX512 Intrinsics for Knights Landing */
-#undef AVX512
-
-/* AVX Intrinsics with FMA4 */
-#undef AVXFMA4
-
-/* EMPTY_SIMD only for DEBUGGING */
-#undef EMPTY_SIMD
-
-/* GRID_COMMS_MPI */
-#undef GRID_COMMS_MPI
-
-/* GRID_COMMS_NONE */
-#undef GRID_COMMS_NONE
-
-/* GRID_COMMS_SHMEM */
-#undef GRID_COMMS_SHMEM
-
-/* GRID_DEFAULT_PRECISION is DOUBLE */
-#undef GRID_DEFAULT_PRECISION_DOUBLE
-
-/* GRID_DEFAULT_PRECISION is SINGLE */
-#undef GRID_DEFAULT_PRECISION_SINGLE
-
-/* Support Altivec instructions */
-#undef HAVE_ALTIVEC
-
-/* Support AVX (Advanced Vector Extensions) instructions */
-#undef HAVE_AVX
-
-/* Support AVX2 (Advanced Vector Extensions 2) instructions */
-#undef HAVE_AVX2
-
-/* Define to 1 if you have the declaration of `be64toh', and to 0 if you
-   don't. */
-#undef HAVE_DECL_BE64TOH
-
-/* Define to 1 if you have the declaration of `ntohll', and to 0 if you don't.
-   */
-#undef HAVE_DECL_NTOHLL
-
-/* Define to 1 if you have the <endian.h> header file. */
-#undef HAVE_ENDIAN_H
-
-/* Define to 1 if you have the <execinfo.h> header file. */
-#undef HAVE_EXECINFO_H
-
-/* Support FMA3 (Fused Multiply-Add) instructions */
-#undef HAVE_FMA
-
-/* Define to 1 if you have the `gettimeofday' function. */
-#undef HAVE_GETTIMEOFDAY
-
-/* Define to 1 if you have the <gmp.h> header file. */
-#undef HAVE_GMP_H
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#undef HAVE_INTTYPES_H
-
-/* Define to 1 if you have the <malloc.h> header file. */
-#undef HAVE_MALLOC_H
-
-/* Define to 1 if you have the <malloc/malloc.h> header file. */
-#undef HAVE_MALLOC_MALLOC_H
-
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Support mmx instructions */
-#undef HAVE_MMX
-
-/* Define to 1 if you have the <mm_malloc.h> header file. */
-#undef HAVE_MM_MALLOC_H
-
-/* Support SSE (Streaming SIMD Extensions) instructions */
-#undef HAVE_SSE
-
-/* Support SSE2 (Streaming SIMD Extensions 2) instructions */
-#undef HAVE_SSE2
-
-/* Support SSE3 (Streaming SIMD Extensions 3) instructions */
-#undef HAVE_SSE3
-
-/* Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions */
-#undef HAVE_SSE4_1
-
-/* Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions */
-#undef HAVE_SSE4_2
-
-/* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */
-#undef HAVE_SSSE3
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if you have the <strings.h> header file. */
-#undef HAVE_STRINGS_H
-
-/* Define to 1 if you have the <string.h> header file. */
-#undef HAVE_STRING_H
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#undef HAVE_SYS_STAT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* IMCI Intrinsics for Knights Corner */
-#undef IMCI
-
-/* NEON ARMv8 Experimental support */
-#undef NEONv8
-
-/* Name of package */
-#undef PACKAGE
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* RNG_MT19937 */
-#undef RNG_MT19937
-
-/* RNG_RANLUX */
-#undef RNG_RANLUX
-
-/* SSE4 Intrinsics */
-#undef SSE4
-
-/* Define to 1 if you have the ANSI C header files. */
-#undef STDC_HEADERS
-
-/* Version number of package */
-#undef VERSION
-
-/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
-   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
-   #define below would cause a syntax error. */
-#undef _UINT32_T
-
-/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
-   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
-   #define below would cause a syntax error. */
-#undef _UINT64_T
-
-/* Define to `unsigned int' if <sys/types.h> does not define. */
-#undef size_t
-
-/* Define to the type of an unsigned integer type of width exactly 32 bits if
-   such a type exists and the standard includes do not define it. */
-#undef uint32_t
-
-/* Define to the type of an unsigned integer type of width exactly 64 bits if
-   such a type exists and the standard includes do not define it. */
-#undef uint64_t
--- a/lib/algorithms/CoarsenedMatrix.h
+++ b/lib/algorithms/CoarsenedMatrix.h
@@ -152,16 +152,18 @@ namespace Grid {
    {
      // Run a Lanczos with sloppy convergence
 	const int Nstop = nn;
-	const int Nk = nn+10;
-	const int Np = nn+10;
+	const int Nk = nn+20;
+	const int Np = nn+20;
 	const int Nm = Nk+Np;
 	const int MaxIt= 10000;
-	RealD resid = 1.0e-5;
+	RealD resid = 1.0e-3;

-	Chebyshev<FineField> Cheb(0.2,5.,11);
+	Chebyshev<FineField> Cheb(0.5,64.0,21);
 	ImplicitlyRestartedLanczos<FineField> IRL(hermop,Cheb,Nstop,Nk,Nm,resid,MaxIt);
+	//	IRL.lock = 1;

 	FineField noise(FineGrid); gaussian(RNG,noise);
+	FineField tmp(FineGrid); 
 	std::vector<RealD>     eval(Nm);
 	std::vector<FineField> evec(Nm,FineGrid);

@@ -172,16 +174,34 @@ namespace Grid {

    	// pull back nn vectors
 	for(int b=0;b<nn;b++){
+
 	  subspace[b]   = evec[b];
+
+	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
+
+	  hermop.Op(subspace[b],tmp); 
+	  std::cout<<GridLogMessage << "filtered["<<b<<"] <f|MdagM|f> "<<norm2(tmp)<<std::endl;
+
+	  noise = tmp -  sqrt(eval[b])*subspace[b] ;
+
+	  std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
+
+	  noise = tmp +  eval[b]*subspace[b] ;
+
+	  std::cout<<GridLogMessage << " lambda_"<<b<<" = "<< eval[b] <<"  ;  [ M - Lambda ]_"<<b<<" vec_"<<b<<"  = " <<norm2(noise)<<std::endl;
+
 	}
 	Orthogonalise();
+	for(int b=0;b<nn;b++){
+	  std::cout << GridLogMessage <<"subspace["<<b<<"] = "<<norm2(subspace[b])<<std::endl;
+	}
    }

    virtual void CreateSubspace(GridParallelRNG  &RNG,LinearOperatorBase<FineField> &hermop,int nn=nbasis) {

      RealD scale;

-      ConjugateGradient<FineField> CG(5.0e-3,10000);
+      ConjugateGradient<FineField> CG(1.0e-2,10000);
      FineField noise(FineGrid);
      FineField Mn(FineGrid);

--- a/lib/algorithms/iterative/EigenSort.h
+++ b/lib/algorithms/iterative/EigenSort.h
@@ -39,42 +39,33 @@ class SortEigen {
 private:
  
 //hacking for testing for now
-#if 0
-  static bool less_lmd(RealD left,RealD right){
-    return fabs(left) < fabs(right);
-  }  
-  static bool less_pair(std::pair<RealD,Field>& left,
-		 std::pair<RealD,Field>& right){
-    return fabs(left.first) < fabs(right.first);
-  }  
-#else
+ private:
  static bool less_lmd(RealD left,RealD right){
    return left > right;
  }  
-  static bool less_pair(std::pair<RealD,Field>& left,
-		 std::pair<RealD,Field>& right){
+  static bool less_pair(std::pair<RealD,Field const*>& left,
+                        std::pair<RealD,Field const*>& right){
    return left.first > (right.first);
  }  
-#endif
+  
  
 public:

  void push(DenseVector<RealD>& lmd,
-	    DenseVector<Field>& evec,int N) {
+            DenseVector<Field>& evec,int N) {
+    DenseVector<Field> cpy(lmd.size(),evec[0]._grid);
+    for(int i=0;i<lmd.size();i++) cpy[i] = evec[i];
    
-    DenseVector<std::pair<RealD, Field> > emod;
-    typename DenseVector<std::pair<RealD, Field> >::iterator it;
-    
-    for(int i=0;i<lmd.size();++i){
-      emod.push_back(std::pair<RealD,Field>(lmd[i],evec[i]));
-    }
+    DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());    
+    for(int i=0;i<lmd.size();++i)
+      emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]);

    partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair);

-    it=emod.begin();
+    typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin();
    for(int i=0;i<N;++i){
      lmd[i]=it->first;
-      evec[i]=it->second;
+      evec[i]=*(it->second);
      ++it;
    }
  }
--- a/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
+++ b/lib/algorithms/iterative/ImplicitlyRestartedLanczos.h
@@ -637,21 +637,20 @@ until convergence
 	abort();
 	
      converged:
-	// Sorting
+       // Sorting
+       eval.resize(Nconv);
+       evec.resize(Nconv,grid);
+       for(int i=0; i<Nconv; ++i){
+         eval[i] = eval2[Iconv[i]];
+         evec[i] = B[Iconv[i]];
+       }
+      _sort.push(eval,evec,Nconv);

-	eval.clear();
-	evec.clear();
-	for(int i=0; i<Nconv; ++i){
-	  eval.push_back(eval2[Iconv[i]]);
-	  evec.push_back(B[Iconv[i]]);
-	}
-	_sort.push(eval,evec,Nconv);
-	
-	std::cout << "\n Converged\n Summary :\n";
-	std::cout << " -- Iterations  = "<< Nconv  << "\n";
-	std::cout << " -- beta(k)     = "<< beta_k << "\n";
-	std::cout << " -- Nconv       = "<< Nconv  << "\n";
-      }
+      std::cout << "\n Converged\n Summary :\n";
+      std::cout << " -- Iterations  = "<< Nconv  << "\n";
+      std::cout << " -- beta(k)     = "<< beta_k << "\n";
+      std::cout << " -- Nconv       = "<< Nconv  << "\n";
+     }

    /////////////////////////////////////////////////
    // Adapted from Rudy's lanczos factor routine
--- a/lib/qcd/action/fermion/WilsonKernels.cc
+++ b/lib/qcd/action/fermion/WilsonKernels.cc
@@ -522,7 +522,7 @@ void WilsonKernels<Impl>::DiracOptDhopDir(StencilImpl &st,DoubledGaugeField &U,
 template<class Impl> 
 void WilsonKernels<Impl>::DiracOptAsmDhopSite(StencilImpl &st,DoubledGaugeField &U,
 					      std::vector<SiteHalfSpinor,alignedAllocator<SiteHalfSpinor> >  &buf,
-					      int sF,int sU,const FermionField &in, FermionField &out,bool local, bool nonlocal)
+					      int sF,int sU,const FermionField &in, FermionField &out)
 {
  DiracOptDhopSite(st,U,buf,sF,sU,in,out); // will template override for Wilson Nc=3
 }
--- a/tests/Test_dwf_hdcr.cc
+++ b/tests/Test_dwf_hdcr.cc
@@ -329,7 +329,7 @@ public:
    CoarseVector Ctmp(_CoarseOperator.Grid());
    CoarseVector Csol(_CoarseOperator.Grid()); Csol=zero;

-    ConjugateGradient<CoarseVector>  CG(1.0e-4,100000);
+    ConjugateGradient<CoarseVector>  CG(3.0e-3,100000);
    //    ConjugateGradient<FineField>    fCG(3.0e-2,1000);

    HermitianLinearOperator<CoarseOperator,CoarseVector>  HermOp(_CoarseOperator);
@@ -560,8 +560,8 @@ int main (int argc, char ** argv)
  assert ( (nbasis & 0x1)==0);
  int nb=nbasis/2;
  std::cout<<GridLogMessage << " nbasis/2 = "<<nb<<std::endl;
-  Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
-    //  Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
+  //  Aggregates.CreateSubspace(RNG5,HermDefOp,nb);
+  Aggregates.CreateSubspaceLanczos(RNG5,HermDefOp,nb);
  for(int n=0;n<nb;n++){
    G5R5(Aggregates.subspace[n+nb],Aggregates.subspace[n]);
    std::cout<<GridLogMessage<<n<<" subspace "<<norm2(Aggregates.subspace[n+nb])<<" "<<norm2(Aggregates.subspace[n]) <<std::endl;