mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-25 10:09:34 +01:00 
			
		
		
		
	Compare commits
	
		
			20 Commits
		
	
	
		
			8a098889fc
			...
			feature/CG
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 70068cff51 | ||
|  | 85c055fa30 | ||
|  | 90fedbd2af | ||
|  | ec0c53fa68 | ||
|  | 6ceee102e8 | ||
|  | 6e57bdb6b3 | ||
|  | 4c11e36d3d | ||
|  | 9977c53035 | ||
|  | 3a74fec62f | ||
|  | 8fb0a13f39 | ||
|  | 14a1406f54 | ||
|  | 538e64e5b4 | ||
|  | b2dc17e160 | ||
|  | afbbcd2194 | ||
|  | d4e0b11bb1 | ||
|  | 7144ee7ae8 | ||
|  | f1908c7bc9 | ||
|  | 036ec31c48 | ||
|  | 53f240200e | ||
|  | 9720c9ba3f | 
							
								
								
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/Bitwise.cc | ||||
|  | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #include <iostream> | ||||
| #include <Bitwise.h> | ||||
| #include <bitset> | ||||
| #include <climits> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| void show_binaryrep(const unsigned char* a, size_t size) { | ||||
|   const unsigned char* beg = a; | ||||
|   const unsigned char* end = a + size; | ||||
|   unsigned int ctr = 0; | ||||
|   while (beg != end) { | ||||
|     std::cout << std::bitset<CHAR_BIT>(*beg++) << ' '; | ||||
|     ctr++; | ||||
|     if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n'; | ||||
|   } | ||||
|   std::cout << '\n'; | ||||
| } | ||||
|  | ||||
| } // namespace  | ||||
|  | ||||
							
								
								
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
| Grid physics library, www.github.com/paboyle/Grid | ||||
|  | ||||
| Source file: ./lib/Bitwise.h | ||||
|  | ||||
| Copyright (C) 2016 | ||||
|  | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation; either version 2 of the License, or | ||||
| (at your option) any later version. | ||||
|  | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
|  | ||||
| You should have received a copy of the GNU General Public License along | ||||
| with this program; if not, write to the Free Software Foundation, Inc., | ||||
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
| See the full license in the file "LICENSE" in the top level distribution | ||||
| directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
| #ifndef GRID_BITWISE_H | ||||
| #define GRID_BITWISE_H | ||||
|  | ||||
| #include <cassert> | ||||
| #include <cfloat> | ||||
| #include <bitset> | ||||
| #include <climits> | ||||
| #include <Config.h> | ||||
|  | ||||
| #ifdef GRID_DEFAULT_PRECISION_SINGLE | ||||
| #define GRID_REAL_BYTES 4 | ||||
| #endif | ||||
| #ifdef GRID_DEFAULT_PRECISION_DOUBLE | ||||
| #define GRID_REAL_BYTES 8 | ||||
| #endif | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| void show_binaryrep(const unsigned char* a, size_t size); | ||||
|  | ||||
| template <typename T> | ||||
| void show_binaryrep(const T& a) { | ||||
|   const char* beg = reinterpret_cast<const char*>(&a); | ||||
|   const char* end = beg + sizeof(a); | ||||
|   unsigned int ctr = 0; | ||||
|   while (beg != end) { | ||||
|   	std::cout << std::bitset<CHAR_BIT>(*beg++) << ' '; | ||||
|   	ctr++; | ||||
|   	if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n'; | ||||
|   } | ||||
|   std::cout << '\n'; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| void bitwise_xor(T& l, T& r, unsigned char* xors) { | ||||
|   assert(sizeof(l) == sizeof(r)); | ||||
|   unsigned char* org = reinterpret_cast<unsigned char*>(&l); | ||||
|   unsigned char* cur = reinterpret_cast<unsigned char*>(&r); | ||||
|   int words = sizeof(l) / sizeof(*org); | ||||
|   unsigned char result = 0; | ||||
|   for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]); | ||||
| } | ||||
|  | ||||
| }; // namespace  | ||||
|  | ||||
|  | ||||
| #endif | ||||
| @@ -62,6 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/serialisation/Serialisation.h> | ||||
| #include "Config.h" | ||||
| #include <Grid/Timer.h> | ||||
| #include <Grid/Bitwise.h> | ||||
| #include <Grid/PerfCount.h> | ||||
| #include <Grid/Log.h> | ||||
| #include <Grid/AlignedAllocator.h> | ||||
|   | ||||
| @@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) { | ||||
|   me = shmem_my_pe(); | ||||
| #endif | ||||
|   if (me) { | ||||
|     std::cout.setstate(std::ios::badbit); | ||||
|     std::cout.setstate(std::ios::badbit);// mute all nodes except 0 | ||||
|   } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -38,19 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #ifdef GRID_OMP | ||||
| #include <omp.h> | ||||
| #ifdef GRID_NUMA | ||||
| #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)") | ||||
| #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") | ||||
| #else | ||||
| #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)") | ||||
| #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") | ||||
|   #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)") | ||||
|   #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") | ||||
|   #else | ||||
|   #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)") | ||||
|   #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") | ||||
| #endif | ||||
| #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") | ||||
| #define PARALLEL_REGION       _Pragma("omp parallel") | ||||
| #define PARALLEL_NESTED_LOOP2    _Pragma("omp parallel for collapse(2)") | ||||
| #define PARALLEL_REGION          _Pragma("omp parallel") | ||||
| #define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)") | ||||
| #else | ||||
| #define PARALLEL_FOR_LOOP | ||||
| #define PARALLEL_FOR_LOOP_INTERN | ||||
| #define PARALLEL_NESTED_LOOP2 | ||||
| #define PARALLEL_REGION | ||||
| #define PARALLEL_FOR_LOOP_STATIC | ||||
| #endif | ||||
|  | ||||
| namespace Grid { | ||||
|   | ||||
| @@ -9,6 +9,7 @@ Copyright (C) 2015 | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
| This program is free software; you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| @@ -33,6 +34,21 @@ directory | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| struct CG_state { | ||||
|   bool do_repro; | ||||
|   std::vector<RealD> residuals; | ||||
|  | ||||
|   CG_state() {reset();} | ||||
|  | ||||
|   void reset(){ | ||||
|     do_repro = false; | ||||
|     residuals.clear(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| enum CGexec_mode{ Default, ReproducibilityTest }; | ||||
|  | ||||
| ///////////////////////////////////////////////////////////// | ||||
| // Base classes for iterative processes based on operators | ||||
| // single input vec, single output vec. | ||||
| @@ -45,10 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|       : Tolerance(tol), | ||||
|         MaxIterations(maxit), | ||||
|         ErrorOnNoConverge(err_on_no_conv){}; | ||||
|  | ||||
|   // Reproducibility controls | ||||
|   bool ReproTest; | ||||
|   CG_state CGState; //to check reproducibility by repeating the CG | ||||
|   ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts | ||||
|  | ||||
|   // Constructor | ||||
|   ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default) | ||||
|     : Tolerance(tol),MaxIterations(maxit){ | ||||
|     switch(Mode) | ||||
|     { | ||||
|       case Default  :  | ||||
|       ErrorOnNoConverge = true; | ||||
|       ReproTest = false; | ||||
|       case ReproducibilityTest : | ||||
|       ErrorOnNoConverge = false; | ||||
|       ReproTest = true;  | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   void set_reproducibility_interval(unsigned int interval){ | ||||
|     ReprTest.interval = interval; | ||||
|   } | ||||
|  | ||||
|  | ||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, | ||||
|                   Field &psi) { | ||||
| @@ -60,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|     Field p(src); | ||||
|     Field mmp(src); | ||||
|     Field r(src); | ||||
|     Field psi_start(psi);// save for the repro test | ||||
|  | ||||
|     if (CGState.do_repro && ReproTest) | ||||
|         std::cout << GridLogMessage << "Starting reproducibility test, full check every " | ||||
|                   << ReprTest.interval << " calls" << std::endl; | ||||
|  | ||||
|     if(!ReprTest.do_check) | ||||
|         ReprTest.reset(); | ||||
|     ReprTest.enable_reprocheck=ReproTest; | ||||
|  | ||||
|  | ||||
|  | ||||
|     // Initial residual computation & set up | ||||
|     RealD guess = norm2(psi); | ||||
|     RealD guess = norm2(psi, ReprTest); | ||||
|     assert(std::isnan(guess) == 0); | ||||
|  | ||||
|     Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check | ||||
|      | ||||
|     Linop.HermOpAndNorm(psi, mmp, d, b); | ||||
|      | ||||
|  | ||||
|     r = src - mmp; | ||||
|     p = r; | ||||
|  | ||||
|     a = norm2(p); | ||||
|     a = norm2(p, ReprTest); | ||||
|     cp = a; | ||||
|     ssq = norm2(src); | ||||
|     ssq = norm2(src, ReprTest); | ||||
|  | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << std::setprecision(4) | ||||
|               << "ConjugateGradient:     p " << a << std::endl; | ||||
|     std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl; | ||||
|     std::cout << GridLogIterative << "ConjugateGradient:   src " << ssq << std::endl; | ||||
|     std::cout << GridLogIterative << "ConjugateGradient:    mp " << d << std::endl; | ||||
|     std::cout << GridLogIterative << "ConjugateGradient:   mmp " << b << std::endl; | ||||
|     std::cout << GridLogIterative << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||
|     std::cout << GridLogIterative << "ConjugateGradient:     p " << a << std::endl; | ||||
|  | ||||
|     RealD rsq = Tolerance * Tolerance * ssq; | ||||
|  | ||||
| @@ -107,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|     SolverTimer.Start(); | ||||
|     int k; | ||||
|     for (k = 1; k <= MaxIterations; k++) { | ||||
|       c = cp; | ||||
|       c = cp;// old residual | ||||
|  | ||||
|       MatrixTimer.Start(); | ||||
|       Linop.HermOpAndNorm(p, mmp, d, qq); | ||||
|       Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp | ||||
|       MatrixTimer.Stop(); | ||||
|  | ||||
|       LinalgTimer.Start(); | ||||
| @@ -118,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|       //  ComplexD dck  = innerProduct(p,mmp); | ||||
|  | ||||
|       a = c / d; | ||||
|       b_pred = a * (a * qq - d) / c; | ||||
|       b_pred = a * (a * qq - d) / c;// a check | ||||
|  | ||||
|       cp = axpy_norm(r, -a, mmp, r); | ||||
|  | ||||
|       axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap | ||||
|       cp = norm2(r, ReprTest); // bookkeeping this norm | ||||
|       if (ReproTest && !CGState.do_repro) { | ||||
|         CGState.residuals.push_back(cp);  // save residuals state | ||||
|                 std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl; | ||||
|         } | ||||
|       if (ReproTest && CGState.do_repro){ | ||||
|         // check that the residual agrees with the previous run | ||||
|         std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl; | ||||
|         if (cp != CGState.residuals[k-1]){ | ||||
|                 std::cout << GridLogMessage << "Failing reproducibility test"; | ||||
|                 std::cout << GridLogMessage << " at k=" << k << std::endl; | ||||
|                 std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1]  | ||||
|                         << " cp = " << cp << std::endl; | ||||
|                 exit(1);  // exit after the first failure | ||||
|         } | ||||
|       } | ||||
|       b = cp / c; | ||||
|  | ||||
|       // Fuse these loops ; should be really easy | ||||
|       psi = a * p + psi; | ||||
|       p = p * b + r; | ||||
|       psi = a * p + psi; // update solution | ||||
|       p = p * b + r;  // update search direction | ||||
|  | ||||
|       LinalgTimer.Stop(); | ||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||
| @@ -156,6 +212,22 @@ class ConjugateGradient : public OperatorFunction<Field> { | ||||
|  | ||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||
|  | ||||
|         if (! (CGState.do_repro && ReproTest)){ | ||||
|                 CGState.do_repro = true; | ||||
|                 ReprTest.do_check = true; | ||||
|                 ReprTest.reset_counter(); | ||||
|                 this->operator()(Linop, src, psi_start);// run the repro test | ||||
|                 if (ReprTest.success) | ||||
|                 	std::cout << GridLogMessage << "Reproducibility test passed" << std::endl; | ||||
|                 else{ | ||||
|                 	std::cout << GridLogMessage << "Reproducibility test failed" << std::endl; | ||||
|                 	exit(1); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
|         // Clear state | ||||
|         CGState.reset(); | ||||
|         ReprTest.reset(); | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|   | ||||
| @@ -59,7 +59,7 @@ public: | ||||
|  | ||||
|     int Nstop;   // Number of evecs checked for convergence | ||||
|     int Nk;      // Number of converged sought | ||||
|     int Np;      // Np -- Number of spare vecs in kryloc space | ||||
|     int Np;      // Np -- Number of spare vecs in krylov space | ||||
|     int Nm;      // Nm -- total number of vectors | ||||
|  | ||||
|     RealD eresid; | ||||
|   | ||||
| @@ -65,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return | ||||
| const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; }; | ||||
| int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; }; | ||||
|  | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // very VERY rarely (Log, serial RNG) we need world without a grid | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | ||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||
|  | ||||
| void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 						       void *xmit, | ||||
| 						       int xmit_to_rank, | ||||
| 						       void *recv, | ||||
| 						       int recv_from_rank, | ||||
| 						       int bytes) | ||||
|                                                        void *xmit, | ||||
|                                                        int xmit_to_rank, | ||||
|                                                        void *recv, | ||||
|                                                        int recv_from_rank, | ||||
|                                                        int bytes) | ||||
| { | ||||
|   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||
| } | ||||
|   | ||||
| @@ -68,6 +68,8 @@ class CartesianCommunicator { | ||||
|   static MPI_Comm communicator_world; | ||||
|          MPI_Comm communicator; | ||||
|   typedef MPI_Request CommsRequest_t; | ||||
|   static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name | ||||
|   static int length;  | ||||
| #else  | ||||
|   typedef int CommsRequest_t; | ||||
| #endif | ||||
| @@ -149,6 +151,7 @@ class CartesianCommunicator { | ||||
|   const std::vector<int> & ProcessorGrid(void)     ; | ||||
|   int                      ProcessorCount(void)    ; | ||||
|  | ||||
|   void        		 PrintRankInfo(void)     ; | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -35,6 +35,8 @@ namespace Grid { | ||||
| // Info that is setup once and indept of cartesian layout | ||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| MPI_Comm CartesianCommunicator::communicator_world; | ||||
| char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name | ||||
| int CartesianCommunicator::length;  | ||||
|  | ||||
| // Should error check all MPI calls. | ||||
| void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
| @@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||
|     MPI_Init(argc,argv); | ||||
|   } | ||||
|   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); | ||||
|  | ||||
|   MPI_Get_processor_name(name, &length); | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| @@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||
|   assert(ierr==0); | ||||
| } | ||||
|  | ||||
| void CartesianCommunicator::PrintRankInfo(){ | ||||
|     std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl; | ||||
| } | ||||
|  | ||||
|  | ||||
| }// end of namespace | ||||
|  | ||||
|   | ||||
| @@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||
|   assert(ierr==0); | ||||
| } | ||||
|  | ||||
|   void CartesianCommunicator::PrintRankInfo(){ | ||||
|     std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl; | ||||
|   } | ||||
|  | ||||
|    | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { | ||||
|   return NULL; | ||||
| } | ||||
|  | ||||
|   void CartesianCommunicator::PrintRankInfo(){ | ||||
|     std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv) | ||||
|   ShmInitGeneric(); | ||||
| } | ||||
|  | ||||
| void CartesianCommunicator::PrintRankInfo(){ | ||||
|     std::cout << GridLogMessage << "No Rank Info available" << std::endl; | ||||
| } | ||||
|  | ||||
|  | ||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||
| { | ||||
|   _processors = processors; | ||||
| @@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||
|  | ||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||
| 					   void *recv, | ||||
| 					   int xmit_to_rank, | ||||
| 					   int recv_from_rank, | ||||
| 					   int bytes) | ||||
|                                            void *recv, | ||||
|                                            int xmit_to_rank, | ||||
|                                            int recv_from_rank, | ||||
|                                            int bytes) | ||||
| { | ||||
|   assert(0); | ||||
| } | ||||
| @@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||
|  | ||||
| // Basic Halo comms primitive -- should never call in single node | ||||
| void CartesianCommunicator::SendToRecvFrom(void *xmit, | ||||
| 					   int dest, | ||||
| 					   void *recv, | ||||
| 					   int from, | ||||
| 					   int bytes) | ||||
|                                            int dest, | ||||
|                                            void *recv, | ||||
|                                            int from, | ||||
|                                            int bytes) | ||||
| { | ||||
|   assert(0); | ||||
| } | ||||
| void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 						void *xmit, | ||||
| 						int dest, | ||||
| 						void *recv, | ||||
| 						int from, | ||||
| 						int bytes) | ||||
|                                                 void *xmit, | ||||
|                                                 int dest, | ||||
|                                                 void *recv, | ||||
|                                                 int from, | ||||
|                                                 int bytes) | ||||
| { | ||||
|   assert(0); | ||||
| } | ||||
|   | ||||
| @@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | ||||
|   } | ||||
| } | ||||
|  | ||||
|   void CartesianCommunicator::PrintRankInfo(){ | ||||
|     std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl; | ||||
|   } | ||||
|  | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -31,6 +31,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #define GRID_LATTICE_REDUCTION_H | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template <class T> | ||||
| class ReproducibilityState { | ||||
|  public: | ||||
|   typedef typename T::vector_type vector_type; | ||||
|   typedef std::vector<vector_type, alignedAllocator<vector_type> >  sum_type; | ||||
|   unsigned int  n_call; | ||||
|   bool          do_check; | ||||
|   bool          enable_reprocheck; | ||||
|   bool          success; | ||||
|   unsigned int  interval;  | ||||
|   std::vector<sum_type> th_states; | ||||
|  | ||||
|   void reset_counter() { n_call = 0; } | ||||
|   void reset() { | ||||
|     th_states.clear(); | ||||
|     do_check          = false; | ||||
|     enable_reprocheck = false; | ||||
|     success           = true; | ||||
|     n_call            = 0; | ||||
|   }; | ||||
|  | ||||
|   ReproducibilityState():interval(1) {  | ||||
|     reset();  | ||||
|   } | ||||
|  | ||||
|   void check(GridBase* grid, sum_type &sumarray){ | ||||
|     ///////////////////////  Reproducibility section, not threaded on purpouse | ||||
|     if (enable_reprocheck) { | ||||
|       if (do_check && (n_call % interval) == 0) { | ||||
|         for (int thread = 0; thread < sumarray.size(); thread++) { | ||||
|           int words = sizeof(sumarray[thread])/sizeof(unsigned char); | ||||
|           unsigned char xors[words]; | ||||
|           bitwise_xor(sumarray[thread], th_states[n_call][thread],xors); | ||||
|           // OR all words | ||||
|           unsigned char res = 0; | ||||
|           for (int w = 0; w < words; w++) res = res | xors[w]; | ||||
|  | ||||
|           Grid_unquiesce_nodes(); | ||||
|           int rank = 0; | ||||
|           while (rank < grid->_Nprocessors){ | ||||
|             if (rank == grid->ThisRank() ){ | ||||
|               if ( res ) { | ||||
|                 std::cout << "Reproducibility failure report" << std::endl; | ||||
|                 grid->PrintRankInfo(); | ||||
|                 std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl; | ||||
|                 std::cout << "Size of states: " << th_states.size() << std::endl; | ||||
|                 std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific; | ||||
|                 std::cout << "Saved partial sum  : " << th_states[n_call][thread] << std::endl; | ||||
|                 std::cout << "Current partial sum: " << sumarray[thread] << std::endl; | ||||
|                 std::cout << "Saved state "  << std::endl; show_binaryrep(th_states[n_call][thread]); | ||||
|                 std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]); | ||||
|                 std::cout << "XOR result"    << std::endl; show_binaryrep(xors, words); | ||||
|               		//std::cout << std::defaultfloat;  //not supported by some compilers | ||||
|                 std::cout << std::setprecision(6); | ||||
|                 success = false;  | ||||
|               } | ||||
|             } | ||||
|             rank++; | ||||
|             grid->Barrier(); | ||||
|           } | ||||
|           Grid_quiesce_nodes();                          | ||||
|         } | ||||
|  | ||||
|       } else if (!do_check) | ||||
|       { | ||||
|         std::cout << GridLogDebug << "Saving thread state for inner product. Call n. "  | ||||
|         << n_call << std::endl; | ||||
|         th_states.resize(n_call+1); | ||||
|         th_states[n_call].resize(grid->SumArraySize()); | ||||
|           th_states[n_call] = sumarray;  // save threads state | ||||
|           //n_call++; | ||||
|         } | ||||
|         n_call++; | ||||
|       } | ||||
|     } | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| #ifdef GRID_WARN_SUBOPTIMAL | ||||
| #warning "Optimisation alert all these reduction loops are NOT threaded " | ||||
| #endif      | ||||
| @@ -39,12 +120,25 @@ namespace Grid { | ||||
|     // Deterministic Reduction operations | ||||
|     //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | ||||
|     ComplexD nrm = innerProduct(arg,arg); | ||||
|     return std::real(nrm);  | ||||
|   } | ||||
|         ReproducibilityState<vobj> repr; | ||||
|         ComplexD nrm = innerProduct(arg, arg, repr); | ||||
|         return std::real(nrm);  | ||||
|     } | ||||
|  | ||||
|     template <class vobj> | ||||
|     inline RealD norm2(const Lattice<vobj> &arg, ReproducibilityState<vobj>& rep) { | ||||
|       ComplexD nrm = innerProduct(arg, arg, rep); | ||||
|       return std::real(nrm); | ||||
|     } | ||||
|  | ||||
|     template<class vobj> | ||||
|     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)  | ||||
|     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right){ | ||||
|       ReproducibilityState<vobj> repr; | ||||
|       return innerProduct(left, right, repr); | ||||
|     }  | ||||
|      | ||||
|     template<class vobj> | ||||
|     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right, ReproducibilityState<vobj>& repr)  | ||||
|     { | ||||
|       typedef typename vobj::scalar_type scalar_type; | ||||
|       typedef typename vobj::vector_type vector_type; | ||||
| @@ -54,24 +148,28 @@ namespace Grid { | ||||
|  | ||||
|       std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); | ||||
|       for(int i=0;i<grid->SumArraySize();i++){ | ||||
| 	sumarray[i]=zero; | ||||
|         sumarray[i]=zero; | ||||
|       } | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|       PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility | ||||
|       for(int thr=0;thr<grid->SumArraySize();thr++){ | ||||
| 	int nwork, mywork, myoff; | ||||
| 	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); | ||||
| 	 | ||||
| 	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation | ||||
|         int nwork, mywork, myoff; | ||||
|         GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); | ||||
|          | ||||
|         decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation | ||||
|         for(int ss=myoff;ss<mywork+myoff; ss++){ | ||||
| 	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); | ||||
| 	} | ||||
| 	sumarray[thr]=TensorRemove(vnrm) ; | ||||
|           vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); | ||||
|         } | ||||
|         sumarray[thr]=TensorRemove(vnrm) ; | ||||
|       } | ||||
|      | ||||
|        | ||||
|       /////////// Reproducibility | ||||
|       repr.check(grid, sumarray); | ||||
|       /////////////////////////// | ||||
|  | ||||
|       vector_type vvnrm; vvnrm=zero;  // sum across threads | ||||
|       for(int i=0;i<grid->SumArraySize();i++){ | ||||
| 	vvnrm = vvnrm+sumarray[i]; | ||||
|         vvnrm = vvnrm+sumarray[i]; | ||||
|       }  | ||||
|       nrm = Reduce(vvnrm);// sum across simd | ||||
|       right._grid->GlobalSum(nrm); | ||||
| @@ -79,26 +177,26 @@ PARALLEL_FOR_LOOP | ||||
|     } | ||||
|  | ||||
|     template<class Op,class T1> | ||||
|       inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) | ||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object | ||||
|     inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) | ||||
|     ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object | ||||
|     { | ||||
|       return sum(closure(expr)); | ||||
|     } | ||||
|  | ||||
|     template<class Op,class T1,class T2> | ||||
|       inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr) | ||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object | ||||
|     inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr) | ||||
|     ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object | ||||
|     { | ||||
|       return sum(closure(expr)); | ||||
|     } | ||||
|  | ||||
|  | ||||
|     template<class Op,class T1,class T2,class T3> | ||||
|       inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | ||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||
| 				 eval(0,std::get<1>(expr.second)), | ||||
| 				 eval(0,std::get<2>(expr.second)) | ||||
| 				 ))::scalar_object | ||||
|     inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | ||||
|     ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||
|      eval(0,std::get<1>(expr.second)), | ||||
|      eval(0,std::get<2>(expr.second)) | ||||
|      ))::scalar_object | ||||
|     { | ||||
|       return sum(closure(expr)); | ||||
|     } | ||||
| @@ -111,24 +209,24 @@ PARALLEL_FOR_LOOP | ||||
|  | ||||
|       std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize()); | ||||
|       for(int i=0;i<grid->SumArraySize();i++){ | ||||
| 	sumarray[i]=zero; | ||||
|         sumarray[i]=zero; | ||||
|       } | ||||
|  | ||||
| PARALLEL_FOR_LOOP | ||||
|       PARALLEL_FOR_LOOP | ||||
|       for(int thr=0;thr<grid->SumArraySize();thr++){ | ||||
| 	int nwork, mywork, myoff; | ||||
| 	GridThread::GetWork(grid->oSites(),thr,mywork,myoff); | ||||
|         int nwork, mywork, myoff; | ||||
|         GridThread::GetWork(grid->oSites(),thr,mywork,myoff); | ||||
|  | ||||
| 	vobj vvsum=zero; | ||||
|         vobj vvsum=zero; | ||||
|         for(int ss=myoff;ss<mywork+myoff; ss++){ | ||||
| 	  vvsum = vvsum + arg._odata[ss]; | ||||
| 	} | ||||
| 	sumarray[thr]=vvsum; | ||||
|           vvsum = vvsum + arg._odata[ss]; | ||||
|         } | ||||
|         sumarray[thr]=vvsum; | ||||
|       } | ||||
|  | ||||
|       vobj vsum=zero;  // sum across threads | ||||
|       for(int i=0;i<grid->SumArraySize();i++){ | ||||
| 	vsum = vsum+sumarray[i]; | ||||
|         vsum = vsum+sumarray[i]; | ||||
|       }  | ||||
|  | ||||
|       typedef typename vobj::scalar_object sobj; | ||||
| @@ -138,7 +236,7 @@ PARALLEL_FOR_LOOP | ||||
|       extract(vsum,buf); | ||||
|  | ||||
|       for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i]; | ||||
|       arg._grid->GlobalSum(ssum); | ||||
|         arg._grid->GlobalSum(ssum); | ||||
|  | ||||
|       return ssum; | ||||
|     } | ||||
| @@ -146,23 +244,23 @@ PARALLEL_FOR_LOOP | ||||
|  | ||||
|  | ||||
| template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | ||||
| { | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   GridBase  *grid = Data._grid; | ||||
|   assert(grid!=NULL); | ||||
|     { | ||||
|       typedef typename vobj::scalar_object sobj; | ||||
|       GridBase  *grid = Data._grid; | ||||
|       assert(grid!=NULL); | ||||
|  | ||||
|   // FIXME | ||||
|   // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl; | ||||
|  | ||||
|   const int    Nd = grid->_ndimension; | ||||
|   const int Nsimd = grid->Nsimd(); | ||||
|       const int    Nd = grid->_ndimension; | ||||
|       const int Nsimd = grid->Nsimd(); | ||||
|  | ||||
|   assert(orthogdim >= 0); | ||||
|   assert(orthogdim < Nd); | ||||
|       assert(orthogdim >= 0); | ||||
|       assert(orthogdim < Nd); | ||||
|  | ||||
|   int fd=grid->_fdimensions[orthogdim]; | ||||
|   int ld=grid->_ldimensions[orthogdim]; | ||||
|   int rd=grid->_rdimensions[orthogdim]; | ||||
|       int fd=grid->_fdimensions[orthogdim]; | ||||
|       int ld=grid->_ldimensions[orthogdim]; | ||||
|       int rd=grid->_rdimensions[orthogdim]; | ||||
|  | ||||
|   std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first | ||||
|   std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars | ||||
|   | ||||
| @@ -66,6 +66,9 @@ namespace Optimization { | ||||
|     double f[4]; | ||||
|   }; | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|   struct Vsplat{ | ||||
|     //Complex float | ||||
|     inline __m256 operator()(float a, float b){ | ||||
|   | ||||
| @@ -60,6 +60,13 @@ directory | ||||
| #include "Grid_neon.h" | ||||
| #endif | ||||
|  | ||||
| #ifdef GRID_DEFAULT_PRECISION_SINGLE | ||||
| #define GRID_REAL_DIGITS FLT_DIG | ||||
| #endif | ||||
| #ifdef GRID_DEFAULT_PRECISION_DOUBLE | ||||
| #define GRID_REAL_DIGITS DBL_DIG | ||||
| #endif | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| ////////////////////////////////////// | ||||
| @@ -146,6 +153,23 @@ class Grid_simd { | ||||
|   Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps | ||||
|   Grid_simd(const Grid_simd &&rhs) : v(rhs.v){}; | ||||
|  | ||||
|   ///////////////////////////// | ||||
|   // Comparisons | ||||
|   ///////////////////////////// | ||||
|   inline bool operator==(const Grid_simd& lhs){  | ||||
|     Grid_simd::conv_t conv1; | ||||
|     Grid_simd::conv_t conv2; | ||||
|     conv1.v = v; | ||||
|     conv2.v = lhs.v; | ||||
|     return std::equal(std::begin(conv1.s), std::end(conv1.s), std::begin(conv2.s)); | ||||
|   } | ||||
|  | ||||
|   inline bool operator!=(const Grid_simd& lhs){  | ||||
|     return !(*this == lhs); | ||||
|   } | ||||
|  | ||||
|  | ||||
|  | ||||
|   ///////////////////////////// | ||||
|   // Constructors | ||||
|   ///////////////////////////// | ||||
|   | ||||
							
								
								
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,215 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_cayley_cg_reproducibility.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| #define REPRODUCIBILITY_INTERVAL 1 | ||||
|  | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::GammaMatrix Gmu [] = { | ||||
|     Gamma::GammaX, | ||||
|     Gamma::GammaY, | ||||
|     Gamma::GammaZ, | ||||
|     Gamma::GammaT | ||||
|   }; | ||||
|  | ||||
| template<class What>  | ||||
| void  TestCGinversions(What & Ddwf,  | ||||
|                        GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid, | ||||
|                        GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid, | ||||
|                        RealD mass, RealD M5, | ||||
|                        GridParallelRNG *RNG4, | ||||
|                        GridParallelRNG *RNG5); | ||||
| template<class What>  | ||||
| void  TestCGschur(What & Ddwf,  | ||||
|                   GridCartesian         * FGrid,               GridRedBlackCartesian * FrbGrid, | ||||
|                   GridCartesian         * UGrid,               GridRedBlackCartesian * UrbGrid, | ||||
|                   RealD mass, RealD M5, | ||||
|                   GridParallelRNG *RNG4, | ||||
|                   GridParallelRNG *RNG5); | ||||
|  | ||||
| template<class What>  | ||||
| void  TestCGunprec(What & Ddwf,  | ||||
|                    GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid, | ||||
|                    GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid, | ||||
|                    RealD mass, RealD M5, | ||||
|                    GridParallelRNG *RNG4, | ||||
|                    GridParallelRNG *RNG5); | ||||
|  | ||||
| template<class What>  | ||||
| void  TestCGprec(What & Ddwf,  | ||||
|                  GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid, | ||||
|                  GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid, | ||||
|                  RealD mass, RealD M5, | ||||
|                  GridParallelRNG *RNG4, | ||||
|                  GridParallelRNG *RNG5); | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   std::cout << GridLogMessage << "###########################################" << std::endl; | ||||
|   std::cout << GridLogMessage << "This test disables the convergence alert" << std::endl; | ||||
|   std::cout << GridLogMessage << "and checks whether in consecutive runs of the CG" << std::endl; | ||||
|   std::cout << GridLogMessage << "the internal reductions are bit reproducible." << std::endl; | ||||
|   std::cout << GridLogMessage << "A warning and a report is produced for every failure" << std::endl; | ||||
|   std::cout << GridLogMessage << "###########################################" << std::endl; | ||||
|  | ||||
|  | ||||
|   const int Ls=8; | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|  | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); | ||||
|   SU3::HotConfiguration(RNG4,Umu); | ||||
|   std::vector<LatticeColourMatrix> U(4,UGrid); | ||||
|  | ||||
|   RealD mass=0.1; | ||||
|   RealD M5  =1.8; | ||||
|   std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl; | ||||
|   DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   RealD b=1.5;// Scale factor b+c=2, b-c=1 | ||||
|   RealD c=0.5; | ||||
|   std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl; | ||||
|   MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); | ||||
|   TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl; | ||||
|   MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0); | ||||
|   TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl; | ||||
|   ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0); | ||||
|   TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl; | ||||
|   ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); | ||||
|   TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl; | ||||
|   OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); | ||||
|   TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl; | ||||
|   OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); | ||||
|   TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
| template<class What>  | ||||
| void  TestCGinversions(What & Ddwf,  | ||||
|                        GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid, | ||||
|                        GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid, | ||||
|                        RealD mass, RealD M5, | ||||
|                        GridParallelRNG *RNG4, | ||||
|                        GridParallelRNG *RNG5) | ||||
| { | ||||
|   std::cout<<GridLogMessage << "Testing unpreconditioned inverter"<<std::endl; | ||||
|   TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); | ||||
|   std::cout<<GridLogMessage << "Testing red black preconditioned inverter"<<std::endl; | ||||
|   TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); | ||||
|   std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl; | ||||
|   TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); | ||||
| } | ||||
|  | ||||
| template<class What>  | ||||
| void  TestCGunprec(What & Ddwf,  | ||||
|                    GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid, | ||||
|                    GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid, | ||||
|                    RealD mass, RealD M5, | ||||
|                    GridParallelRNG *RNG4, | ||||
|                    GridParallelRNG *RNG5) | ||||
| { | ||||
|   LatticeFermion src   (FGrid); random(*RNG5,src); | ||||
|   LatticeFermion result(FGrid); result=zero; | ||||
|  | ||||
|   MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf); | ||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest); | ||||
|   CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL); | ||||
|   CG(HermOp,src,result); | ||||
|  | ||||
| } | ||||
| template<class What>  | ||||
| void  TestCGprec(What & Ddwf,  | ||||
|                  GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid, | ||||
|                  GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid, | ||||
|                  RealD mass, RealD M5, | ||||
|                  GridParallelRNG *RNG4, | ||||
|                  GridParallelRNG *RNG5) | ||||
| { | ||||
|   LatticeFermion src   (FGrid); random(*RNG5,src); | ||||
|   LatticeFermion    src_o(FrbGrid); | ||||
|   LatticeFermion result_o(FrbGrid); | ||||
|   pickCheckerboard(Odd,src_o,src); | ||||
|   result_o=zero; | ||||
|  | ||||
|   SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf); | ||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest); | ||||
|   CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL); | ||||
|   CG(HermOpEO,src_o,result_o); | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class What>  | ||||
| void  TestCGschur(What & Ddwf,  | ||||
|                    GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid, | ||||
|                    GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid, | ||||
|                    RealD mass, RealD M5, | ||||
|                    GridParallelRNG *RNG4, | ||||
|                    GridParallelRNG *RNG5) | ||||
| { | ||||
|   LatticeFermion src   (FGrid); random(*RNG5,src); | ||||
|   LatticeFermion result(FGrid); result=zero; | ||||
|  | ||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest); | ||||
|   CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL); | ||||
|   SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG); | ||||
|   SchurSolver(Ddwf,src,result); | ||||
| } | ||||
| @@ -63,6 +63,9 @@ class HmcRunner : public NerscHmcRunner { | ||||
|     Real mass = -0.77; | ||||
|     FermionAction FermOp(U, *FGrid, *FrbGrid, mass); | ||||
|  | ||||
|     // To enable the CG reproducibility tests use  | ||||
|     //ConjugateGradient<FermionField> CG(1.0e-8, 10000, ReproducibilityTest); | ||||
|     // This is the plain version | ||||
|     ConjugateGradient<FermionField> CG(1.0e-8, 10000); | ||||
|  | ||||
|     TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG); | ||||
|   | ||||
| @@ -71,7 +71,7 @@ int main (int argc, char ** argv) | ||||
|   WilsonFermionR Dw(Umu,Grid,RBGrid,mass); | ||||
|  | ||||
|   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw); | ||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); | ||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000,true, true); | ||||
|   CG(HermOp,src,result); | ||||
|  | ||||
|   Grid_finalize(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user