mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 12:04:33 +00:00 
			
		
		
		
	Compare commits
	
		
			20 Commits
		
	
	
		
			hotfix/unw
			...
			feature/CG
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 70068cff51 | ||
|  | 85c055fa30 | ||
|  | 90fedbd2af | ||
|  | ec0c53fa68 | ||
|  | 6ceee102e8 | ||
|  | 6e57bdb6b3 | ||
|  | 4c11e36d3d | ||
|  | 9977c53035 | ||
|  | 3a74fec62f | ||
|  | 8fb0a13f39 | ||
|  | 14a1406f54 | ||
|  | 538e64e5b4 | ||
|  | b2dc17e160 | ||
|  | afbbcd2194 | ||
|  | d4e0b11bb1 | ||
|  | 7144ee7ae8 | ||
|  | f1908c7bc9 | ||
|  | 036ec31c48 | ||
|  | 53f240200e | ||
|  | 9720c9ba3f | 
							
								
								
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/Bitwise.cc | ||||||
|  |  | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #include <iostream> | ||||||
|  | #include <Bitwise.h> | ||||||
|  | #include <bitset> | ||||||
|  | #include <climits> | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | void show_binaryrep(const unsigned char* a, size_t size) { | ||||||
|  |   const unsigned char* beg = a; | ||||||
|  |   const unsigned char* end = a + size; | ||||||
|  |   unsigned int ctr = 0; | ||||||
|  |   while (beg != end) { | ||||||
|  |     std::cout << std::bitset<CHAR_BIT>(*beg++) << ' '; | ||||||
|  |     ctr++; | ||||||
|  |     if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n'; | ||||||
|  |   } | ||||||
|  |   std::cout << '\n'; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace  | ||||||
|  |  | ||||||
							
								
								
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | |||||||
|  | /************************************************************************************* | ||||||
|  |  | ||||||
|  | Grid physics library, www.github.com/paboyle/Grid | ||||||
|  |  | ||||||
|  | Source file: ./lib/Bitwise.h | ||||||
|  |  | ||||||
|  | Copyright (C) 2016 | ||||||
|  |  | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|  | This program is free software; you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU General Public License as published by | ||||||
|  | the Free Software Foundation; either version 2 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  |  | ||||||
|  | This program is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU General Public License for more details. | ||||||
|  |  | ||||||
|  | You should have received a copy of the GNU General Public License along | ||||||
|  | with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  | See the full license in the file "LICENSE" in the top level distribution | ||||||
|  | directory | ||||||
|  | *************************************************************************************/ | ||||||
|  | /*  END LEGAL */ | ||||||
|  | #ifndef GRID_BITWISE_H | ||||||
|  | #define GRID_BITWISE_H | ||||||
|  |  | ||||||
|  | #include <cassert> | ||||||
|  | #include <cfloat> | ||||||
|  | #include <bitset> | ||||||
|  | #include <climits> | ||||||
|  | #include <Config.h> | ||||||
|  |  | ||||||
|  | #ifdef GRID_DEFAULT_PRECISION_SINGLE | ||||||
|  | #define GRID_REAL_BYTES 4 | ||||||
|  | #endif | ||||||
|  | #ifdef GRID_DEFAULT_PRECISION_DOUBLE | ||||||
|  | #define GRID_REAL_BYTES 8 | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |  | ||||||
|  | namespace Grid { | ||||||
|  |  | ||||||
|  | void show_binaryrep(const unsigned char* a, size_t size); | ||||||
|  |  | ||||||
|  | template <typename T> | ||||||
|  | void show_binaryrep(const T& a) { | ||||||
|  |   const char* beg = reinterpret_cast<const char*>(&a); | ||||||
|  |   const char* end = beg + sizeof(a); | ||||||
|  |   unsigned int ctr = 0; | ||||||
|  |   while (beg != end) { | ||||||
|  |   	std::cout << std::bitset<CHAR_BIT>(*beg++) << ' '; | ||||||
|  |   	ctr++; | ||||||
|  |   	if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n'; | ||||||
|  |   } | ||||||
|  |   std::cout << '\n'; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <typename T> | ||||||
|  | void bitwise_xor(T& l, T& r, unsigned char* xors) { | ||||||
|  |   assert(sizeof(l) == sizeof(r)); | ||||||
|  |   unsigned char* org = reinterpret_cast<unsigned char*>(&l); | ||||||
|  |   unsigned char* cur = reinterpret_cast<unsigned char*>(&r); | ||||||
|  |   int words = sizeof(l) / sizeof(*org); | ||||||
|  |   unsigned char result = 0; | ||||||
|  |   for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | }; // namespace  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #endif | ||||||
| @@ -62,6 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #include <Grid/serialisation/Serialisation.h> | #include <Grid/serialisation/Serialisation.h> | ||||||
| #include "Config.h" | #include "Config.h" | ||||||
| #include <Grid/Timer.h> | #include <Grid/Timer.h> | ||||||
|  | #include <Grid/Bitwise.h> | ||||||
| #include <Grid/PerfCount.h> | #include <Grid/PerfCount.h> | ||||||
| #include <Grid/Log.h> | #include <Grid/Log.h> | ||||||
| #include <Grid/AlignedAllocator.h> | #include <Grid/AlignedAllocator.h> | ||||||
|   | |||||||
| @@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) { | |||||||
|   me = shmem_my_pe(); |   me = shmem_my_pe(); | ||||||
| #endif | #endif | ||||||
|   if (me) { |   if (me) { | ||||||
|     std::cout.setstate(std::ios::badbit); |     std::cout.setstate(std::ios::badbit);// mute all nodes except 0 | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -38,19 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #ifdef GRID_OMP | #ifdef GRID_OMP | ||||||
| #include <omp.h> | #include <omp.h> | ||||||
| #ifdef GRID_NUMA | #ifdef GRID_NUMA | ||||||
| #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)") |   #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)") | ||||||
| #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") |   #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)") | ||||||
| #else |   #else | ||||||
| #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)") |   #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)") | ||||||
| #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") |   #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)") | ||||||
| #endif | #endif | ||||||
| #define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)") | #define PARALLEL_NESTED_LOOP2    _Pragma("omp parallel for collapse(2)") | ||||||
| #define PARALLEL_REGION       _Pragma("omp parallel") | #define PARALLEL_REGION          _Pragma("omp parallel") | ||||||
|  | #define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)") | ||||||
| #else | #else | ||||||
| #define PARALLEL_FOR_LOOP | #define PARALLEL_FOR_LOOP | ||||||
| #define PARALLEL_FOR_LOOP_INTERN | #define PARALLEL_FOR_LOOP_INTERN | ||||||
| #define PARALLEL_NESTED_LOOP2 | #define PARALLEL_NESTED_LOOP2 | ||||||
| #define PARALLEL_REGION | #define PARALLEL_REGION | ||||||
|  | #define PARALLEL_FOR_LOOP_STATIC | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ Copyright (C) 2015 | |||||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | Author: paboyle <paboyle@ph.ed.ac.uk> | ||||||
|  | Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
| This program is free software; you can redistribute it and/or modify | This program is free software; you can redistribute it and/or modify | ||||||
| it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | ||||||
| @@ -33,6 +34,21 @@ directory | |||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
|  | struct CG_state { | ||||||
|  |   bool do_repro; | ||||||
|  |   std::vector<RealD> residuals; | ||||||
|  |  | ||||||
|  |   CG_state() {reset();} | ||||||
|  |  | ||||||
|  |   void reset(){ | ||||||
|  |     do_repro = false; | ||||||
|  |     residuals.clear(); | ||||||
|  |   } | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  | enum CGexec_mode{ Default, ReproducibilityTest }; | ||||||
|  |  | ||||||
| ///////////////////////////////////////////////////////////// | ///////////////////////////////////////////////////////////// | ||||||
| // Base classes for iterative processes based on operators | // Base classes for iterative processes based on operators | ||||||
| // single input vec, single output vec. | // single input vec, single output vec. | ||||||
| @@ -45,10 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|                            // Defaults true. |                            // Defaults true. | ||||||
|   RealD Tolerance; |   RealD Tolerance; | ||||||
|   Integer MaxIterations; |   Integer MaxIterations; | ||||||
|   ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) |  | ||||||
|       : Tolerance(tol), |   // Reproducibility controls | ||||||
|         MaxIterations(maxit), |   bool ReproTest; | ||||||
|         ErrorOnNoConverge(err_on_no_conv){}; |   CG_state CGState; //to check reproducibility by repeating the CG | ||||||
|  |   ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts | ||||||
|  |  | ||||||
|  |   // Constructor | ||||||
|  |   ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default) | ||||||
|  |     : Tolerance(tol),MaxIterations(maxit){ | ||||||
|  |     switch(Mode) | ||||||
|  |     { | ||||||
|  |       case Default  :  | ||||||
|  |       ErrorOnNoConverge = true; | ||||||
|  |       ReproTest = false; | ||||||
|  |       case ReproducibilityTest : | ||||||
|  |       ErrorOnNoConverge = false; | ||||||
|  |       ReproTest = true;  | ||||||
|  |     } | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   void set_reproducibility_interval(unsigned int interval){ | ||||||
|  |     ReprTest.interval = interval; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, |   void operator()(LinearOperatorBase<Field> &Linop, const Field &src, | ||||||
|                   Field &psi) { |                   Field &psi) { | ||||||
| @@ -60,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     Field p(src); |     Field p(src); | ||||||
|     Field mmp(src); |     Field mmp(src); | ||||||
|     Field r(src); |     Field r(src); | ||||||
|  |     Field psi_start(psi);// save for the repro test | ||||||
|  |  | ||||||
|  |     if (CGState.do_repro && ReproTest) | ||||||
|  |         std::cout << GridLogMessage << "Starting reproducibility test, full check every " | ||||||
|  |                   << ReprTest.interval << " calls" << std::endl; | ||||||
|  |  | ||||||
|  |     if(!ReprTest.do_check) | ||||||
|  |         ReprTest.reset(); | ||||||
|  |     ReprTest.enable_reprocheck=ReproTest; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     // Initial residual computation & set up |     // Initial residual computation & set up | ||||||
|     RealD guess = norm2(psi); |     RealD guess = norm2(psi, ReprTest); | ||||||
|     assert(std::isnan(guess) == 0); |     assert(std::isnan(guess) == 0); | ||||||
|  |  | ||||||
|  |     Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check | ||||||
|      |      | ||||||
|     Linop.HermOpAndNorm(psi, mmp, d, b); |  | ||||||
|      |  | ||||||
|  |  | ||||||
|     r = src - mmp; |     r = src - mmp; | ||||||
|     p = r; |     p = r; | ||||||
|  |  | ||||||
|     a = norm2(p); |     a = norm2(p, ReprTest); | ||||||
|     cp = a; |     cp = a; | ||||||
|     ssq = norm2(src); |     ssq = norm2(src, ReprTest); | ||||||
|  |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl; | ||||||
|               << "ConjugateGradient: guess " << guess << std::endl; |     std::cout << GridLogIterative << "ConjugateGradient:   src " << ssq << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << "ConjugateGradient:    mp " << d << std::endl; | ||||||
|               << "ConjugateGradient:   src " << ssq << std::endl; |     std::cout << GridLogIterative << "ConjugateGradient:   mmp " << b << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |     std::cout << GridLogIterative << "ConjugateGradient:  cp,r " << cp << std::endl; | ||||||
|               << "ConjugateGradient:    mp " << d << std::endl; |     std::cout << GridLogIterative << "ConjugateGradient:     p " << a << std::endl; | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:   mmp " << b << std::endl; |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:  cp,r " << cp << std::endl; |  | ||||||
|     std::cout << GridLogIterative << std::setprecision(4) |  | ||||||
|               << "ConjugateGradient:     p " << a << std::endl; |  | ||||||
|  |  | ||||||
|     RealD rsq = Tolerance * Tolerance * ssq; |     RealD rsq = Tolerance * Tolerance * ssq; | ||||||
|  |  | ||||||
| @@ -107,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|     SolverTimer.Start(); |     SolverTimer.Start(); | ||||||
|     int k; |     int k; | ||||||
|     for (k = 1; k <= MaxIterations; k++) { |     for (k = 1; k <= MaxIterations; k++) { | ||||||
|       c = cp; |       c = cp;// old residual | ||||||
|  |  | ||||||
|       MatrixTimer.Start(); |       MatrixTimer.Start(); | ||||||
|       Linop.HermOpAndNorm(p, mmp, d, qq); |       Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp | ||||||
|       MatrixTimer.Stop(); |       MatrixTimer.Stop(); | ||||||
|  |  | ||||||
|       LinalgTimer.Start(); |       LinalgTimer.Start(); | ||||||
| @@ -118,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|       //  ComplexD dck  = innerProduct(p,mmp); |       //  ComplexD dck  = innerProduct(p,mmp); | ||||||
|  |  | ||||||
|       a = c / d; |       a = c / d; | ||||||
|       b_pred = a * (a * qq - d) / c; |       b_pred = a * (a * qq - d) / c;// a check | ||||||
|  |  | ||||||
|       cp = axpy_norm(r, -a, mmp, r); |  | ||||||
|  |       axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap | ||||||
|  |       cp = norm2(r, ReprTest); // bookkeeping this norm | ||||||
|  |       if (ReproTest && !CGState.do_repro) { | ||||||
|  |         CGState.residuals.push_back(cp);  // save residuals state | ||||||
|  |                 std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl; | ||||||
|  |         } | ||||||
|  |       if (ReproTest && CGState.do_repro){ | ||||||
|  |         // check that the residual agrees with the previous run | ||||||
|  |         std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl; | ||||||
|  |         if (cp != CGState.residuals[k-1]){ | ||||||
|  |                 std::cout << GridLogMessage << "Failing reproducibility test"; | ||||||
|  |                 std::cout << GridLogMessage << " at k=" << k << std::endl; | ||||||
|  |                 std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1]  | ||||||
|  |                         << " cp = " << cp << std::endl; | ||||||
|  |                 exit(1);  // exit after the first failure | ||||||
|  |         } | ||||||
|  |       } | ||||||
|       b = cp / c; |       b = cp / c; | ||||||
|  |  | ||||||
|       // Fuse these loops ; should be really easy |       // Fuse these loops ; should be really easy | ||||||
|       psi = a * p + psi; |       psi = a * p + psi; // update solution | ||||||
|       p = p * b + r; |       p = p * b + r;  // update search direction | ||||||
|  |  | ||||||
|       LinalgTimer.Stop(); |       LinalgTimer.Stop(); | ||||||
|       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k |       std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k | ||||||
| @@ -156,6 +212,22 @@ class ConjugateGradient : public OperatorFunction<Field> { | |||||||
|  |  | ||||||
|         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); |         if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0); | ||||||
|  |  | ||||||
|  |         if (! (CGState.do_repro && ReproTest)){ | ||||||
|  |                 CGState.do_repro = true; | ||||||
|  |                 ReprTest.do_check = true; | ||||||
|  |                 ReprTest.reset_counter(); | ||||||
|  |                 this->operator()(Linop, src, psi_start);// run the repro test | ||||||
|  |                 if (ReprTest.success) | ||||||
|  |                 	std::cout << GridLogMessage << "Reproducibility test passed" << std::endl; | ||||||
|  |                 else{ | ||||||
|  |                 	std::cout << GridLogMessage << "Reproducibility test failed" << std::endl; | ||||||
|  |                 	exit(1); | ||||||
|  |                 } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Clear state | ||||||
|  |         CGState.reset(); | ||||||
|  |         ReprTest.reset(); | ||||||
|         return; |         return; | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -59,7 +59,7 @@ public: | |||||||
|  |  | ||||||
|     int Nstop;   // Number of evecs checked for convergence |     int Nstop;   // Number of evecs checked for convergence | ||||||
|     int Nk;      // Number of converged sought |     int Nk;      // Number of converged sought | ||||||
|     int Np;      // Np -- Number of spare vecs in kryloc space |     int Np;      // Np -- Number of spare vecs in krylov space | ||||||
|     int Nm;      // Nm -- total number of vectors |     int Nm;      // Nm -- total number of vectors | ||||||
|  |  | ||||||
|     RealD eresid; |     RealD eresid; | ||||||
|   | |||||||
| @@ -65,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return | |||||||
| const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; }; | const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; }; | ||||||
| int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; }; | int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; }; | ||||||
|  |  | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
| // very VERY rarely (Log, serial RNG) we need world without a grid | // very VERY rarely (Log, serial RNG) we need world without a grid | ||||||
| //////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | |||||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||||
|  |  | ||||||
| void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 						       void *xmit, |                                                        void *xmit, | ||||||
| 						       int xmit_to_rank, |                                                        int xmit_to_rank, | ||||||
| 						       void *recv, |                                                        void *recv, | ||||||
| 						       int recv_from_rank, |                                                        int recv_from_rank, | ||||||
| 						       int bytes) |                                                        int bytes) | ||||||
| { | { | ||||||
|   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); |   SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -68,6 +68,8 @@ class CartesianCommunicator { | |||||||
|   static MPI_Comm communicator_world; |   static MPI_Comm communicator_world; | ||||||
|          MPI_Comm communicator; |          MPI_Comm communicator; | ||||||
|   typedef MPI_Request CommsRequest_t; |   typedef MPI_Request CommsRequest_t; | ||||||
|  |   static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name | ||||||
|  |   static int length;  | ||||||
| #else  | #else  | ||||||
|   typedef int CommsRequest_t; |   typedef int CommsRequest_t; | ||||||
| #endif | #endif | ||||||
| @@ -149,6 +151,7 @@ class CartesianCommunicator { | |||||||
|   const std::vector<int> & ProcessorGrid(void)     ; |   const std::vector<int> & ProcessorGrid(void)     ; | ||||||
|   int                      ProcessorCount(void)    ; |   int                      ProcessorCount(void)    ; | ||||||
|  |  | ||||||
|  |   void        		 PrintRankInfo(void)     ; | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid |   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||||
|   //////////////////////////////////////////////////////////////////////////////// |   //////////////////////////////////////////////////////////////////////////////// | ||||||
|   | |||||||
| @@ -35,6 +35,8 @@ namespace Grid { | |||||||
| // Info that is setup once and indept of cartesian layout | // Info that is setup once and indept of cartesian layout | ||||||
| /////////////////////////////////////////////////////////////////////////////////////////////////// | /////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| MPI_Comm CartesianCommunicator::communicator_world; | MPI_Comm CartesianCommunicator::communicator_world; | ||||||
|  | char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name | ||||||
|  | int CartesianCommunicator::length;  | ||||||
|  |  | ||||||
| // Should error check all MPI calls. | // Should error check all MPI calls. | ||||||
| void CartesianCommunicator::Init(int *argc, char ***argv) { | void CartesianCommunicator::Init(int *argc, char ***argv) { | ||||||
| @@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) { | |||||||
|     MPI_Init(argc,argv); |     MPI_Init(argc,argv); | ||||||
|   } |   } | ||||||
|   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); |   MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world); | ||||||
|  |  | ||||||
|  |   MPI_Get_processor_name(name, &length); | ||||||
|   ShmInitGeneric(); |   ShmInitGeneric(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | |||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void CartesianCommunicator::PrintRankInfo(){ | ||||||
|  |     std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | }// end of namespace | ||||||
|  |  | ||||||
|   | |||||||
| @@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | |||||||
|   assert(ierr==0); |   assert(ierr==0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |   void CartesianCommunicator::PrintRankInfo(){ | ||||||
|  |     std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |    | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) { | |||||||
|   return NULL; |   return NULL; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |   void CartesianCommunicator::PrintRankInfo(){ | ||||||
|  |     std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv) | |||||||
|   ShmInitGeneric(); |   ShmInitGeneric(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void CartesianCommunicator::PrintRankInfo(){ | ||||||
|  |     std::cout << GridLogMessage << "No Rank Info available" << std::endl; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
| CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors) | ||||||
| { | { | ||||||
|   _processors = processors; |   _processors = processors; | ||||||
| @@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){} | |||||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||||
|  |  | ||||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||||
| 					   void *recv, |                                            void *recv, | ||||||
| 					   int xmit_to_rank, |                                            int xmit_to_rank, | ||||||
| 					   int recv_from_rank, |                                            int recv_from_rank, | ||||||
| 					   int bytes) |                                            int bytes) | ||||||
| { | { | ||||||
|   assert(0); |   assert(0); | ||||||
| } | } | ||||||
| @@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit, | |||||||
|  |  | ||||||
| // Basic Halo comms primitive -- should never call in single node | // Basic Halo comms primitive -- should never call in single node | ||||||
| void CartesianCommunicator::SendToRecvFrom(void *xmit, | void CartesianCommunicator::SendToRecvFrom(void *xmit, | ||||||
| 					   int dest, |                                            int dest, | ||||||
| 					   void *recv, |                                            void *recv, | ||||||
| 					   int from, |                                            int from, | ||||||
| 					   int bytes) |                                            int bytes) | ||||||
| { | { | ||||||
|   assert(0); |   assert(0); | ||||||
| } | } | ||||||
| void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||||
| 						void *xmit, |                                                 void *xmit, | ||||||
| 						int dest, |                                                 int dest, | ||||||
| 						void *recv, |                                                 void *recv, | ||||||
| 						int from, |                                                 int from, | ||||||
| 						int bytes) |                                                 int bytes) | ||||||
| { | { | ||||||
|   assert(0); |   assert(0); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |   void CartesianCommunicator::PrintRankInfo(){ | ||||||
|  |     std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl; | ||||||
|  |   } | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -31,6 +31,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | |||||||
| #define GRID_LATTICE_REDUCTION_H | #define GRID_LATTICE_REDUCTION_H | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
|  | template <class T> | ||||||
|  | class ReproducibilityState { | ||||||
|  |  public: | ||||||
|  |   typedef typename T::vector_type vector_type; | ||||||
|  |   typedef std::vector<vector_type, alignedAllocator<vector_type> >  sum_type; | ||||||
|  |   unsigned int  n_call; | ||||||
|  |   bool          do_check; | ||||||
|  |   bool          enable_reprocheck; | ||||||
|  |   bool          success; | ||||||
|  |   unsigned int  interval;  | ||||||
|  |   std::vector<sum_type> th_states; | ||||||
|  |  | ||||||
|  |   void reset_counter() { n_call = 0; } | ||||||
|  |   void reset() { | ||||||
|  |     th_states.clear(); | ||||||
|  |     do_check          = false; | ||||||
|  |     enable_reprocheck = false; | ||||||
|  |     success           = true; | ||||||
|  |     n_call            = 0; | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  |   ReproducibilityState():interval(1) {  | ||||||
|  |     reset();  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   void check(GridBase* grid, sum_type &sumarray){ | ||||||
|  |     ///////////////////////  Reproducibility section, not threaded on purpouse | ||||||
|  |     if (enable_reprocheck) { | ||||||
|  |       if (do_check && (n_call % interval) == 0) { | ||||||
|  |         for (int thread = 0; thread < sumarray.size(); thread++) { | ||||||
|  |           int words = sizeof(sumarray[thread])/sizeof(unsigned char); | ||||||
|  |           unsigned char xors[words]; | ||||||
|  |           bitwise_xor(sumarray[thread], th_states[n_call][thread],xors); | ||||||
|  |           // OR all words | ||||||
|  |           unsigned char res = 0; | ||||||
|  |           for (int w = 0; w < words; w++) res = res | xors[w]; | ||||||
|  |  | ||||||
|  |           Grid_unquiesce_nodes(); | ||||||
|  |           int rank = 0; | ||||||
|  |           while (rank < grid->_Nprocessors){ | ||||||
|  |             if (rank == grid->ThisRank() ){ | ||||||
|  |               if ( res ) { | ||||||
|  |                 std::cout << "Reproducibility failure report" << std::endl; | ||||||
|  |                 grid->PrintRankInfo(); | ||||||
|  |                 std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl; | ||||||
|  |                 std::cout << "Size of states: " << th_states.size() << std::endl; | ||||||
|  |                 std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific; | ||||||
|  |                 std::cout << "Saved partial sum  : " << th_states[n_call][thread] << std::endl; | ||||||
|  |                 std::cout << "Current partial sum: " << sumarray[thread] << std::endl; | ||||||
|  |                 std::cout << "Saved state "  << std::endl; show_binaryrep(th_states[n_call][thread]); | ||||||
|  |                 std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]); | ||||||
|  |                 std::cout << "XOR result"    << std::endl; show_binaryrep(xors, words); | ||||||
|  |               		//std::cout << std::defaultfloat;  //not supported by some compilers | ||||||
|  |                 std::cout << std::setprecision(6); | ||||||
|  |                 success = false;  | ||||||
|  |               } | ||||||
|  |             } | ||||||
|  |             rank++; | ||||||
|  |             grid->Barrier(); | ||||||
|  |           } | ||||||
|  |           Grid_quiesce_nodes();                          | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |       } else if (!do_check) | ||||||
|  |       { | ||||||
|  |         std::cout << GridLogDebug << "Saving thread state for inner product. Call n. "  | ||||||
|  |         << n_call << std::endl; | ||||||
|  |         th_states.resize(n_call+1); | ||||||
|  |         th_states[n_call].resize(grid->SumArraySize()); | ||||||
|  |           th_states[n_call] = sumarray;  // save threads state | ||||||
|  |           //n_call++; | ||||||
|  |         } | ||||||
|  |         n_call++; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
| #ifdef GRID_WARN_SUBOPTIMAL | #ifdef GRID_WARN_SUBOPTIMAL | ||||||
| #warning "Optimisation alert all these reduction loops are NOT threaded " | #warning "Optimisation alert all these reduction loops are NOT threaded " | ||||||
| #endif      | #endif      | ||||||
| @@ -39,12 +120,25 @@ namespace Grid { | |||||||
|     // Deterministic Reduction operations |     // Deterministic Reduction operations | ||||||
|     //////////////////////////////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
|   template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ |   template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){ | ||||||
|     ComplexD nrm = innerProduct(arg,arg); |         ReproducibilityState<vobj> repr; | ||||||
|     return std::real(nrm);  |         ComplexD nrm = innerProduct(arg, arg, repr); | ||||||
|   } |         return std::real(nrm);  | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     template <class vobj> | ||||||
|  |     inline RealD norm2(const Lattice<vobj> &arg, ReproducibilityState<vobj>& rep) { | ||||||
|  |       ComplexD nrm = innerProduct(arg, arg, rep); | ||||||
|  |       return std::real(nrm); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     template<class vobj> |     template<class vobj> | ||||||
|     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)  |     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right){ | ||||||
|  |       ReproducibilityState<vobj> repr; | ||||||
|  |       return innerProduct(left, right, repr); | ||||||
|  |     }  | ||||||
|  |      | ||||||
|  |     template<class vobj> | ||||||
|  |     inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right, ReproducibilityState<vobj>& repr)  | ||||||
|     { |     { | ||||||
|       typedef typename vobj::scalar_type scalar_type; |       typedef typename vobj::scalar_type scalar_type; | ||||||
|       typedef typename vobj::vector_type vector_type; |       typedef typename vobj::vector_type vector_type; | ||||||
| @@ -54,24 +148,28 @@ namespace Grid { | |||||||
|  |  | ||||||
|       std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); |       std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize()); | ||||||
|       for(int i=0;i<grid->SumArraySize();i++){ |       for(int i=0;i<grid->SumArraySize();i++){ | ||||||
| 	sumarray[i]=zero; |         sumarray[i]=zero; | ||||||
|       } |       } | ||||||
|  |  | ||||||
| PARALLEL_FOR_LOOP |       PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility | ||||||
|       for(int thr=0;thr<grid->SumArraySize();thr++){ |       for(int thr=0;thr<grid->SumArraySize();thr++){ | ||||||
| 	int nwork, mywork, myoff; |         int nwork, mywork, myoff; | ||||||
| 	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); |         GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff); | ||||||
| 	 |          | ||||||
| 	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation |         decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation | ||||||
|         for(int ss=myoff;ss<mywork+myoff; ss++){ |         for(int ss=myoff;ss<mywork+myoff; ss++){ | ||||||
| 	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); |           vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]); | ||||||
| 	} |         } | ||||||
| 	sumarray[thr]=TensorRemove(vnrm) ; |         sumarray[thr]=TensorRemove(vnrm) ; | ||||||
|       } |       } | ||||||
|      |        | ||||||
|  |       /////////// Reproducibility | ||||||
|  |       repr.check(grid, sumarray); | ||||||
|  |       /////////////////////////// | ||||||
|  |  | ||||||
|       vector_type vvnrm; vvnrm=zero;  // sum across threads |       vector_type vvnrm; vvnrm=zero;  // sum across threads | ||||||
|       for(int i=0;i<grid->SumArraySize();i++){ |       for(int i=0;i<grid->SumArraySize();i++){ | ||||||
| 	vvnrm = vvnrm+sumarray[i]; |         vvnrm = vvnrm+sumarray[i]; | ||||||
|       }  |       }  | ||||||
|       nrm = Reduce(vvnrm);// sum across simd |       nrm = Reduce(vvnrm);// sum across simd | ||||||
|       right._grid->GlobalSum(nrm); |       right._grid->GlobalSum(nrm); | ||||||
| @@ -79,26 +177,26 @@ PARALLEL_FOR_LOOP | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     template<class Op,class T1> |     template<class Op,class T1> | ||||||
|       inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) |     inline auto sum(const LatticeUnaryExpression<Op,T1> & expr) | ||||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object |     ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object | ||||||
|     { |     { | ||||||
|       return sum(closure(expr)); |       return sum(closure(expr)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     template<class Op,class T1,class T2> |     template<class Op,class T1,class T2> | ||||||
|       inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr) |     inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr) | ||||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object |     ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object | ||||||
|     { |     { | ||||||
|       return sum(closure(expr)); |       return sum(closure(expr)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|     template<class Op,class T1,class T2,class T3> |     template<class Op,class T1,class T2,class T3> | ||||||
|       inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) |     inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr) | ||||||
|       ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), |     ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)), | ||||||
| 				 eval(0,std::get<1>(expr.second)), |      eval(0,std::get<1>(expr.second)), | ||||||
| 				 eval(0,std::get<2>(expr.second)) |      eval(0,std::get<2>(expr.second)) | ||||||
| 				 ))::scalar_object |      ))::scalar_object | ||||||
|     { |     { | ||||||
|       return sum(closure(expr)); |       return sum(closure(expr)); | ||||||
|     } |     } | ||||||
| @@ -111,24 +209,24 @@ PARALLEL_FOR_LOOP | |||||||
|  |  | ||||||
|       std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize()); |       std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize()); | ||||||
|       for(int i=0;i<grid->SumArraySize();i++){ |       for(int i=0;i<grid->SumArraySize();i++){ | ||||||
| 	sumarray[i]=zero; |         sumarray[i]=zero; | ||||||
|       } |       } | ||||||
|  |  | ||||||
| PARALLEL_FOR_LOOP |       PARALLEL_FOR_LOOP | ||||||
|       for(int thr=0;thr<grid->SumArraySize();thr++){ |       for(int thr=0;thr<grid->SumArraySize();thr++){ | ||||||
| 	int nwork, mywork, myoff; |         int nwork, mywork, myoff; | ||||||
| 	GridThread::GetWork(grid->oSites(),thr,mywork,myoff); |         GridThread::GetWork(grid->oSites(),thr,mywork,myoff); | ||||||
|  |  | ||||||
| 	vobj vvsum=zero; |         vobj vvsum=zero; | ||||||
|         for(int ss=myoff;ss<mywork+myoff; ss++){ |         for(int ss=myoff;ss<mywork+myoff; ss++){ | ||||||
| 	  vvsum = vvsum + arg._odata[ss]; |           vvsum = vvsum + arg._odata[ss]; | ||||||
| 	} |         } | ||||||
| 	sumarray[thr]=vvsum; |         sumarray[thr]=vvsum; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       vobj vsum=zero;  // sum across threads |       vobj vsum=zero;  // sum across threads | ||||||
|       for(int i=0;i<grid->SumArraySize();i++){ |       for(int i=0;i<grid->SumArraySize();i++){ | ||||||
| 	vsum = vsum+sumarray[i]; |         vsum = vsum+sumarray[i]; | ||||||
|       }  |       }  | ||||||
|  |  | ||||||
|       typedef typename vobj::scalar_object sobj; |       typedef typename vobj::scalar_object sobj; | ||||||
| @@ -138,7 +236,7 @@ PARALLEL_FOR_LOOP | |||||||
|       extract(vsum,buf); |       extract(vsum,buf); | ||||||
|  |  | ||||||
|       for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i]; |       for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i]; | ||||||
|       arg._grid->GlobalSum(ssum); |         arg._grid->GlobalSum(ssum); | ||||||
|  |  | ||||||
|       return ssum; |       return ssum; | ||||||
|     } |     } | ||||||
| @@ -146,23 +244,23 @@ PARALLEL_FOR_LOOP | |||||||
|  |  | ||||||
|  |  | ||||||
| template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim) | ||||||
| { |     { | ||||||
|   typedef typename vobj::scalar_object sobj; |       typedef typename vobj::scalar_object sobj; | ||||||
|   GridBase  *grid = Data._grid; |       GridBase  *grid = Data._grid; | ||||||
|   assert(grid!=NULL); |       assert(grid!=NULL); | ||||||
|  |  | ||||||
|   // FIXME |   // FIXME | ||||||
|   // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl; |   // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl; | ||||||
|  |  | ||||||
|   const int    Nd = grid->_ndimension; |       const int    Nd = grid->_ndimension; | ||||||
|   const int Nsimd = grid->Nsimd(); |       const int Nsimd = grid->Nsimd(); | ||||||
|  |  | ||||||
|   assert(orthogdim >= 0); |       assert(orthogdim >= 0); | ||||||
|   assert(orthogdim < Nd); |       assert(orthogdim < Nd); | ||||||
|  |  | ||||||
|   int fd=grid->_fdimensions[orthogdim]; |       int fd=grid->_fdimensions[orthogdim]; | ||||||
|   int ld=grid->_ldimensions[orthogdim]; |       int ld=grid->_ldimensions[orthogdim]; | ||||||
|   int rd=grid->_rdimensions[orthogdim]; |       int rd=grid->_rdimensions[orthogdim]; | ||||||
|  |  | ||||||
|   std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first |   std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first | ||||||
|   std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars |   std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars | ||||||
|   | |||||||
| @@ -66,6 +66,9 @@ namespace Optimization { | |||||||
|     double f[4]; |     double f[4]; | ||||||
|   }; |   }; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   struct Vsplat{ |   struct Vsplat{ | ||||||
|     //Complex float |     //Complex float | ||||||
|     inline __m256 operator()(float a, float b){ |     inline __m256 operator()(float a, float b){ | ||||||
|   | |||||||
| @@ -60,6 +60,13 @@ directory | |||||||
| #include "Grid_neon.h" | #include "Grid_neon.h" | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | #ifdef GRID_DEFAULT_PRECISION_SINGLE | ||||||
|  | #define GRID_REAL_DIGITS FLT_DIG | ||||||
|  | #endif | ||||||
|  | #ifdef GRID_DEFAULT_PRECISION_DOUBLE | ||||||
|  | #define GRID_REAL_DIGITS DBL_DIG | ||||||
|  | #endif | ||||||
|  |  | ||||||
| namespace Grid { | namespace Grid { | ||||||
|  |  | ||||||
| ////////////////////////////////////// | ////////////////////////////////////// | ||||||
| @@ -146,6 +153,23 @@ class Grid_simd { | |||||||
|   Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps |   Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps | ||||||
|   Grid_simd(const Grid_simd &&rhs) : v(rhs.v){}; |   Grid_simd(const Grid_simd &&rhs) : v(rhs.v){}; | ||||||
|  |  | ||||||
|  |   ///////////////////////////// | ||||||
|  |   // Comparisons | ||||||
|  |   ///////////////////////////// | ||||||
|  |   inline bool operator==(const Grid_simd& lhs){  | ||||||
|  |     Grid_simd::conv_t conv1; | ||||||
|  |     Grid_simd::conv_t conv2; | ||||||
|  |     conv1.v = v; | ||||||
|  |     conv2.v = lhs.v; | ||||||
|  |     return std::equal(std::begin(conv1.s), std::end(conv1.s), std::begin(conv2.s)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   inline bool operator!=(const Grid_simd& lhs){  | ||||||
|  |     return !(*this == lhs); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|   ///////////////////////////// |   ///////////////////////////// | ||||||
|   // Constructors |   // Constructors | ||||||
|   ///////////////////////////// |   ///////////////////////////// | ||||||
|   | |||||||
							
								
								
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,215 @@ | |||||||
|  |     /************************************************************************************* | ||||||
|  |  | ||||||
|  |     Grid physics library, www.github.com/paboyle/Grid  | ||||||
|  |  | ||||||
|  |     Source file: ./tests/Test_cayley_cg_reproducibility.cc | ||||||
|  |  | ||||||
|  |     Copyright (C) 2015 | ||||||
|  |  | ||||||
|  |     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||||
|  |     Author: Guido Cossu <guido.cossu@ed.ac.uk> | ||||||
|  |  | ||||||
|  |     This program is free software; you can redistribute it and/or modify | ||||||
|  |     it under the terms of the GNU General Public License as published by | ||||||
|  |     the Free Software Foundation; either version 2 of the License, or | ||||||
|  |     (at your option) any later version. | ||||||
|  |  | ||||||
|  |     This program is distributed in the hope that it will be useful, | ||||||
|  |     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |     GNU General Public License for more details. | ||||||
|  |  | ||||||
|  |     You should have received a copy of the GNU General Public License along | ||||||
|  |     with this program; if not, write to the Free Software Foundation, Inc., | ||||||
|  |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||||
|  |  | ||||||
|  |     See the full license in the file "LICENSE" in the top level distribution directory | ||||||
|  |     *************************************************************************************/ | ||||||
|  |     /*  END LEGAL */ | ||||||
|  | #include <Grid/Grid.h> | ||||||
|  |  | ||||||
|  | using namespace std; | ||||||
|  | using namespace Grid; | ||||||
|  | using namespace Grid::QCD; | ||||||
|  |  | ||||||
|  | #define REPRODUCIBILITY_INTERVAL 1 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class d> | ||||||
|  | struct scal { | ||||||
|  |   d internal; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  |   Gamma::GammaMatrix Gmu [] = { | ||||||
|  |     Gamma::GammaX, | ||||||
|  |     Gamma::GammaY, | ||||||
|  |     Gamma::GammaZ, | ||||||
|  |     Gamma::GammaT | ||||||
|  |   }; | ||||||
|  |  | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGinversions(What & Ddwf,  | ||||||
|  |                        GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid, | ||||||
|  |                        GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid, | ||||||
|  |                        RealD mass, RealD M5, | ||||||
|  |                        GridParallelRNG *RNG4, | ||||||
|  |                        GridParallelRNG *RNG5); | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGschur(What & Ddwf,  | ||||||
|  |                   GridCartesian         * FGrid,               GridRedBlackCartesian * FrbGrid, | ||||||
|  |                   GridCartesian         * UGrid,               GridRedBlackCartesian * UrbGrid, | ||||||
|  |                   RealD mass, RealD M5, | ||||||
|  |                   GridParallelRNG *RNG4, | ||||||
|  |                   GridParallelRNG *RNG5); | ||||||
|  |  | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGunprec(What & Ddwf,  | ||||||
|  |                    GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid, | ||||||
|  |                    GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid, | ||||||
|  |                    RealD mass, RealD M5, | ||||||
|  |                    GridParallelRNG *RNG4, | ||||||
|  |                    GridParallelRNG *RNG5); | ||||||
|  |  | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGprec(What & Ddwf,  | ||||||
|  |                  GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid, | ||||||
|  |                  GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid, | ||||||
|  |                  RealD mass, RealD M5, | ||||||
|  |                  GridParallelRNG *RNG4, | ||||||
|  |                  GridParallelRNG *RNG5); | ||||||
|  |  | ||||||
|  | int main (int argc, char ** argv) | ||||||
|  | { | ||||||
|  |   Grid_init(&argc,&argv); | ||||||
|  |  | ||||||
|  |   int threads = GridThread::GetThreads(); | ||||||
|  |   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||||
|  |  | ||||||
|  |   std::cout << GridLogMessage << "###########################################" << std::endl; | ||||||
|  |   std::cout << GridLogMessage << "This test disables the convergence alert" << std::endl; | ||||||
|  |   std::cout << GridLogMessage << "and checks whether in consecutive runs of the CG" << std::endl; | ||||||
|  |   std::cout << GridLogMessage << "the internal reductions are bit reproducible." << std::endl; | ||||||
|  |   std::cout << GridLogMessage << "A warning and a report is produced for every failure" << std::endl; | ||||||
|  |   std::cout << GridLogMessage << "###########################################" << std::endl; | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   const int Ls=8; | ||||||
|  |   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi()); | ||||||
|  |   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||||
|  |   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||||
|  |   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |   std::vector<int> seeds4({1,2,3,4}); | ||||||
|  |   std::vector<int> seeds5({5,6,7,8}); | ||||||
|  |   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||||
|  |   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||||
|  |  | ||||||
|  |   LatticeGaugeField Umu(UGrid); | ||||||
|  |   SU3::HotConfiguration(RNG4,Umu); | ||||||
|  |   std::vector<LatticeColourMatrix> U(4,UGrid); | ||||||
|  |  | ||||||
|  |   RealD mass=0.1; | ||||||
|  |   RealD M5  =1.8; | ||||||
|  |   std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl; | ||||||
|  |   DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||||
|  |   TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   RealD b=1.5;// Scale factor b+c=2, b-c=1 | ||||||
|  |   RealD c=0.5; | ||||||
|  |   std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl; | ||||||
|  |   MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c); | ||||||
|  |   TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl; | ||||||
|  |   MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0); | ||||||
|  |   TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl; | ||||||
|  |   ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0); | ||||||
|  |   TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl; | ||||||
|  |   ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); | ||||||
|  |   TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl; | ||||||
|  |   OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0); | ||||||
|  |   TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl; | ||||||
|  |   OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0); | ||||||
|  |   TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5); | ||||||
|  |  | ||||||
|  |   Grid_finalize(); | ||||||
|  | } | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGinversions(What & Ddwf,  | ||||||
|  |                        GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid, | ||||||
|  |                        GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid, | ||||||
|  |                        RealD mass, RealD M5, | ||||||
|  |                        GridParallelRNG *RNG4, | ||||||
|  |                        GridParallelRNG *RNG5) | ||||||
|  | { | ||||||
|  |   std::cout<<GridLogMessage << "Testing unpreconditioned inverter"<<std::endl; | ||||||
|  |   TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); | ||||||
|  |   std::cout<<GridLogMessage << "Testing red black preconditioned inverter"<<std::endl; | ||||||
|  |   TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); | ||||||
|  |   std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl; | ||||||
|  |   TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGunprec(What & Ddwf,  | ||||||
|  |                    GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid, | ||||||
|  |                    GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid, | ||||||
|  |                    RealD mass, RealD M5, | ||||||
|  |                    GridParallelRNG *RNG4, | ||||||
|  |                    GridParallelRNG *RNG5) | ||||||
|  | { | ||||||
|  |   LatticeFermion src   (FGrid); random(*RNG5,src); | ||||||
|  |   LatticeFermion result(FGrid); result=zero; | ||||||
|  |  | ||||||
|  |   MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf); | ||||||
|  |   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest); | ||||||
|  |   CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL); | ||||||
|  |   CG(HermOp,src,result); | ||||||
|  |  | ||||||
|  | } | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGprec(What & Ddwf,  | ||||||
|  |                  GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid, | ||||||
|  |                  GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid, | ||||||
|  |                  RealD mass, RealD M5, | ||||||
|  |                  GridParallelRNG *RNG4, | ||||||
|  |                  GridParallelRNG *RNG5) | ||||||
|  | { | ||||||
|  |   LatticeFermion src   (FGrid); random(*RNG5,src); | ||||||
|  |   LatticeFermion    src_o(FrbGrid); | ||||||
|  |   LatticeFermion result_o(FrbGrid); | ||||||
|  |   pickCheckerboard(Odd,src_o,src); | ||||||
|  |   result_o=zero; | ||||||
|  |  | ||||||
|  |   SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf); | ||||||
|  |   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest); | ||||||
|  |   CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL); | ||||||
|  |   CG(HermOpEO,src_o,result_o); | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | template<class What>  | ||||||
|  | void  TestCGschur(What & Ddwf,  | ||||||
|  |                    GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid, | ||||||
|  |                    GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid, | ||||||
|  |                    RealD mass, RealD M5, | ||||||
|  |                    GridParallelRNG *RNG4, | ||||||
|  |                    GridParallelRNG *RNG5) | ||||||
|  | { | ||||||
|  |   LatticeFermion src   (FGrid); random(*RNG5,src); | ||||||
|  |   LatticeFermion result(FGrid); result=zero; | ||||||
|  |  | ||||||
|  |   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest); | ||||||
|  |   CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL); | ||||||
|  |   SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG); | ||||||
|  |   SchurSolver(Ddwf,src,result); | ||||||
|  | } | ||||||
| @@ -63,6 +63,9 @@ class HmcRunner : public NerscHmcRunner { | |||||||
|     Real mass = -0.77; |     Real mass = -0.77; | ||||||
|     FermionAction FermOp(U, *FGrid, *FrbGrid, mass); |     FermionAction FermOp(U, *FGrid, *FrbGrid, mass); | ||||||
|  |  | ||||||
|  |     // To enable the CG reproducibility tests use  | ||||||
|  |     //ConjugateGradient<FermionField> CG(1.0e-8, 10000, ReproducibilityTest); | ||||||
|  |     // This is the plain version | ||||||
|     ConjugateGradient<FermionField> CG(1.0e-8, 10000); |     ConjugateGradient<FermionField> CG(1.0e-8, 10000); | ||||||
|  |  | ||||||
|     TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG); |     TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG); | ||||||
|   | |||||||
| @@ -71,7 +71,7 @@ int main (int argc, char ** argv) | |||||||
|   WilsonFermionR Dw(Umu,Grid,RBGrid,mass); |   WilsonFermionR Dw(Umu,Grid,RBGrid,mass); | ||||||
|  |  | ||||||
|   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw); |   MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw); | ||||||
|   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000); |   ConjugateGradient<LatticeFermion> CG(1.0e-8,10000,true, true); | ||||||
|   CG(HermOp,src,result); |   CG(HermOp,src,result); | ||||||
|  |  | ||||||
|   Grid_finalize(); |   Grid_finalize(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user