mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	Compare commits
	
		
			20 Commits
		
	
	
		
			e5bc679fef
			...
			feature/CG
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					70068cff51 | ||
| 
						 | 
					85c055fa30 | ||
| 
						 | 
					90fedbd2af | ||
| 
						 | 
					ec0c53fa68 | ||
| 
						 | 
					6ceee102e8 | ||
| 
						 | 
					6e57bdb6b3 | ||
| 
						 | 
					4c11e36d3d | ||
| 
						 | 
					9977c53035 | ||
| 
						 | 
					3a74fec62f | ||
| 
						 | 
					8fb0a13f39 | ||
| 
						 | 
					14a1406f54 | ||
| 
						 | 
					538e64e5b4 | ||
| 
						 | 
					b2dc17e160 | ||
| 
						 | 
					afbbcd2194 | ||
| 
						 | 
					d4e0b11bb1 | ||
| 
						 | 
					7144ee7ae8 | ||
| 
						 | 
					f1908c7bc9 | ||
| 
						 | 
					036ec31c48 | ||
| 
						 | 
					53f240200e | ||
| 
						 | 
					9720c9ba3f | 
							
								
								
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								lib/Bitwise.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,49 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/Bitwise.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2016
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#include <iostream>
 | 
				
			||||||
 | 
					#include <Bitwise.h>
 | 
				
			||||||
 | 
					#include <bitset>
 | 
				
			||||||
 | 
					#include <climits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void show_binaryrep(const unsigned char* a, size_t size) {
 | 
				
			||||||
 | 
					  const unsigned char* beg = a;
 | 
				
			||||||
 | 
					  const unsigned char* end = a + size;
 | 
				
			||||||
 | 
					  unsigned int ctr = 0;
 | 
				
			||||||
 | 
					  while (beg != end) {
 | 
				
			||||||
 | 
					    std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
 | 
				
			||||||
 | 
					    ctr++;
 | 
				
			||||||
 | 
					    if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << '\n';
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								lib/Bitwise.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,76 @@
 | 
				
			|||||||
 | 
					/*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Grid physics library, www.github.com/paboyle/Grid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Source file: ./lib/Bitwise.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Copyright (C) 2016
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					(at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					See the full license in the file "LICENSE" in the top level distribution
 | 
				
			||||||
 | 
					directory
 | 
				
			||||||
 | 
					*************************************************************************************/
 | 
				
			||||||
 | 
					/*  END LEGAL */
 | 
				
			||||||
 | 
					#ifndef GRID_BITWISE_H
 | 
				
			||||||
 | 
					#define GRID_BITWISE_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <cassert>
 | 
				
			||||||
 | 
					#include <cfloat>
 | 
				
			||||||
 | 
					#include <bitset>
 | 
				
			||||||
 | 
					#include <climits>
 | 
				
			||||||
 | 
					#include <Config.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_DEFAULT_PRECISION_SINGLE
 | 
				
			||||||
 | 
					#define GRID_REAL_BYTES 4
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
				
			||||||
 | 
					#define GRID_REAL_BYTES 8
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void show_binaryrep(const unsigned char* a, size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					void show_binaryrep(const T& a) {
 | 
				
			||||||
 | 
					  const char* beg = reinterpret_cast<const char*>(&a);
 | 
				
			||||||
 | 
					  const char* end = beg + sizeof(a);
 | 
				
			||||||
 | 
					  unsigned int ctr = 0;
 | 
				
			||||||
 | 
					  while (beg != end) {
 | 
				
			||||||
 | 
					  	std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
 | 
				
			||||||
 | 
					  	ctr++;
 | 
				
			||||||
 | 
					  	if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  std::cout << '\n';
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename T>
 | 
				
			||||||
 | 
					void bitwise_xor(T& l, T& r, unsigned char* xors) {
 | 
				
			||||||
 | 
					  assert(sizeof(l) == sizeof(r));
 | 
				
			||||||
 | 
					  unsigned char* org = reinterpret_cast<unsigned char*>(&l);
 | 
				
			||||||
 | 
					  unsigned char* cur = reinterpret_cast<unsigned char*>(&r);
 | 
				
			||||||
 | 
					  int words = sizeof(l) / sizeof(*org);
 | 
				
			||||||
 | 
					  unsigned char result = 0;
 | 
				
			||||||
 | 
					  for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}; // namespace 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
@@ -62,6 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#include <Grid/serialisation/Serialisation.h>
 | 
					#include <Grid/serialisation/Serialisation.h>
 | 
				
			||||||
#include "Config.h"
 | 
					#include "Config.h"
 | 
				
			||||||
#include <Grid/Timer.h>
 | 
					#include <Grid/Timer.h>
 | 
				
			||||||
 | 
					#include <Grid/Bitwise.h>
 | 
				
			||||||
#include <Grid/PerfCount.h>
 | 
					#include <Grid/PerfCount.h>
 | 
				
			||||||
#include <Grid/Log.h>
 | 
					#include <Grid/Log.h>
 | 
				
			||||||
#include <Grid/AlignedAllocator.h>
 | 
					#include <Grid/AlignedAllocator.h>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) {
 | 
				
			|||||||
  me = shmem_my_pe();
 | 
					  me = shmem_my_pe();
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
  if (me) {
 | 
					  if (me) {
 | 
				
			||||||
    std::cout.setstate(std::ios::badbit);
 | 
					    std::cout.setstate(std::ios::badbit);// mute all nodes except 0
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,19 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#ifdef GRID_OMP
 | 
					#ifdef GRID_OMP
 | 
				
			||||||
#include <omp.h>
 | 
					#include <omp.h>
 | 
				
			||||||
#ifdef GRID_NUMA
 | 
					#ifdef GRID_NUMA
 | 
				
			||||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
 | 
					  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
 | 
				
			||||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
 | 
					  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
 | 
				
			||||||
#else
 | 
					  #else
 | 
				
			||||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
 | 
					  #define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(runtime)")
 | 
				
			||||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
 | 
					  #define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 | 
					#define PARALLEL_NESTED_LOOP2    _Pragma("omp parallel for collapse(2)")
 | 
				
			||||||
#define PARALLEL_REGION       _Pragma("omp parallel")
 | 
					#define PARALLEL_REGION          _Pragma("omp parallel")
 | 
				
			||||||
 | 
					#define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)")
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#define PARALLEL_FOR_LOOP
 | 
					#define PARALLEL_FOR_LOOP
 | 
				
			||||||
#define PARALLEL_FOR_LOOP_INTERN
 | 
					#define PARALLEL_FOR_LOOP_INTERN
 | 
				
			||||||
#define PARALLEL_NESTED_LOOP2
 | 
					#define PARALLEL_NESTED_LOOP2
 | 
				
			||||||
#define PARALLEL_REGION
 | 
					#define PARALLEL_REGION
 | 
				
			||||||
 | 
					#define PARALLEL_FOR_LOOP_STATIC
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,6 +9,7 @@ Copyright (C) 2015
 | 
				
			|||||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
					Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
 | 
				
			||||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
					Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
					Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This program is free software; you can redistribute it and/or modify
 | 
					This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
it under the terms of the GNU General Public License as published by
 | 
					it under the terms of the GNU General Public License as published by
 | 
				
			||||||
@@ -33,6 +34,21 @@ directory
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct CG_state {
 | 
				
			||||||
 | 
					  bool do_repro;
 | 
				
			||||||
 | 
					  std::vector<RealD> residuals;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  CG_state() {reset();}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void reset(){
 | 
				
			||||||
 | 
					    do_repro = false;
 | 
				
			||||||
 | 
					    residuals.clear();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					enum CGexec_mode{ Default, ReproducibilityTest };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/////////////////////////////////////////////////////////////
 | 
					/////////////////////////////////////////////////////////////
 | 
				
			||||||
// Base classes for iterative processes based on operators
 | 
					// Base classes for iterative processes based on operators
 | 
				
			||||||
// single input vec, single output vec.
 | 
					// single input vec, single output vec.
 | 
				
			||||||
@@ -45,10 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
				
			|||||||
                           // Defaults true.
 | 
					                           // Defaults true.
 | 
				
			||||||
  RealD Tolerance;
 | 
					  RealD Tolerance;
 | 
				
			||||||
  Integer MaxIterations;
 | 
					  Integer MaxIterations;
 | 
				
			||||||
  ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
 | 
					
 | 
				
			||||||
      : Tolerance(tol),
 | 
					  // Reproducibility controls
 | 
				
			||||||
        MaxIterations(maxit),
 | 
					  bool ReproTest;
 | 
				
			||||||
        ErrorOnNoConverge(err_on_no_conv){};
 | 
					  CG_state CGState; //to check reproducibility by repeating the CG
 | 
				
			||||||
 | 
					  ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Constructor
 | 
				
			||||||
 | 
					  ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default)
 | 
				
			||||||
 | 
					    : Tolerance(tol),MaxIterations(maxit){
 | 
				
			||||||
 | 
					    switch(Mode)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      case Default  : 
 | 
				
			||||||
 | 
					      ErrorOnNoConverge = true;
 | 
				
			||||||
 | 
					      ReproTest = false;
 | 
				
			||||||
 | 
					      case ReproducibilityTest :
 | 
				
			||||||
 | 
					      ErrorOnNoConverge = false;
 | 
				
			||||||
 | 
					      ReproTest = true; 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void set_reproducibility_interval(unsigned int interval){
 | 
				
			||||||
 | 
					    ReprTest.interval = interval;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
 | 
					  void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
 | 
				
			||||||
                  Field &psi) {
 | 
					                  Field &psi) {
 | 
				
			||||||
@@ -60,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
				
			|||||||
    Field p(src);
 | 
					    Field p(src);
 | 
				
			||||||
    Field mmp(src);
 | 
					    Field mmp(src);
 | 
				
			||||||
    Field r(src);
 | 
					    Field r(src);
 | 
				
			||||||
 | 
					    Field psi_start(psi);// save for the repro test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (CGState.do_repro && ReproTest)
 | 
				
			||||||
 | 
					        std::cout << GridLogMessage << "Starting reproducibility test, full check every "
 | 
				
			||||||
 | 
					                  << ReprTest.interval << " calls" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(!ReprTest.do_check)
 | 
				
			||||||
 | 
					        ReprTest.reset();
 | 
				
			||||||
 | 
					    ReprTest.enable_reprocheck=ReproTest;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Initial residual computation & set up
 | 
					    // Initial residual computation & set up
 | 
				
			||||||
    RealD guess = norm2(psi);
 | 
					    RealD guess = norm2(psi, ReprTest);
 | 
				
			||||||
    assert(std::isnan(guess) == 0);
 | 
					    assert(std::isnan(guess) == 0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    
 | 
					    Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check
 | 
				
			||||||
    Linop.HermOpAndNorm(psi, mmp, d, b);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    r = src - mmp;
 | 
					    r = src - mmp;
 | 
				
			||||||
    p = r;
 | 
					    p = r;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    a = norm2(p);
 | 
					    a = norm2(p, ReprTest);
 | 
				
			||||||
    cp = a;
 | 
					    cp = a;
 | 
				
			||||||
    ssq = norm2(src);
 | 
					    ssq = norm2(src, ReprTest);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
					    std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl;
 | 
				
			||||||
              << "ConjugateGradient: guess " << guess << std::endl;
 | 
					    std::cout << GridLogIterative << "ConjugateGradient:   src " << ssq << std::endl;
 | 
				
			||||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
					    std::cout << GridLogIterative << "ConjugateGradient:    mp " << d << std::endl;
 | 
				
			||||||
              << "ConjugateGradient:   src " << ssq << std::endl;
 | 
					    std::cout << GridLogIterative << "ConjugateGradient:   mmp " << b << std::endl;
 | 
				
			||||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
					    std::cout << GridLogIterative << "ConjugateGradient:  cp,r " << cp << std::endl;
 | 
				
			||||||
              << "ConjugateGradient:    mp " << d << std::endl;
 | 
					    std::cout << GridLogIterative << "ConjugateGradient:     p " << a << std::endl;
 | 
				
			||||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
					 | 
				
			||||||
              << "ConjugateGradient:   mmp " << b << std::endl;
 | 
					 | 
				
			||||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
					 | 
				
			||||||
              << "ConjugateGradient:  cp,r " << cp << std::endl;
 | 
					 | 
				
			||||||
    std::cout << GridLogIterative << std::setprecision(4)
 | 
					 | 
				
			||||||
              << "ConjugateGradient:     p " << a << std::endl;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RealD rsq = Tolerance * Tolerance * ssq;
 | 
					    RealD rsq = Tolerance * Tolerance * ssq;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -107,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
				
			|||||||
    SolverTimer.Start();
 | 
					    SolverTimer.Start();
 | 
				
			||||||
    int k;
 | 
					    int k;
 | 
				
			||||||
    for (k = 1; k <= MaxIterations; k++) {
 | 
					    for (k = 1; k <= MaxIterations; k++) {
 | 
				
			||||||
      c = cp;
 | 
					      c = cp;// old residual
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      MatrixTimer.Start();
 | 
					      MatrixTimer.Start();
 | 
				
			||||||
      Linop.HermOpAndNorm(p, mmp, d, qq);
 | 
					      Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp
 | 
				
			||||||
      MatrixTimer.Stop();
 | 
					      MatrixTimer.Stop();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      LinalgTimer.Start();
 | 
					      LinalgTimer.Start();
 | 
				
			||||||
@@ -118,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
				
			|||||||
      //  ComplexD dck  = innerProduct(p,mmp);
 | 
					      //  ComplexD dck  = innerProduct(p,mmp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      a = c / d;
 | 
					      a = c / d;
 | 
				
			||||||
      b_pred = a * (a * qq - d) / c;
 | 
					      b_pred = a * (a * qq - d) / c;// a check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      cp = axpy_norm(r, -a, mmp, r);
 | 
					
 | 
				
			||||||
 | 
					      axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap
 | 
				
			||||||
 | 
					      cp = norm2(r, ReprTest); // bookkeeping this norm
 | 
				
			||||||
 | 
					      if (ReproTest && !CGState.do_repro) {
 | 
				
			||||||
 | 
					        CGState.residuals.push_back(cp);  // save residuals state
 | 
				
			||||||
 | 
					                std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      if (ReproTest && CGState.do_repro){
 | 
				
			||||||
 | 
					        // check that the residual agrees with the previous run
 | 
				
			||||||
 | 
					        std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl;
 | 
				
			||||||
 | 
					        if (cp != CGState.residuals[k-1]){
 | 
				
			||||||
 | 
					                std::cout << GridLogMessage << "Failing reproducibility test";
 | 
				
			||||||
 | 
					                std::cout << GridLogMessage << " at k=" << k << std::endl;
 | 
				
			||||||
 | 
					                std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1] 
 | 
				
			||||||
 | 
					                        << " cp = " << cp << std::endl;
 | 
				
			||||||
 | 
					                exit(1);  // exit after the first failure
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
      b = cp / c;
 | 
					      b = cp / c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Fuse these loops ; should be really easy
 | 
					      // Fuse these loops ; should be really easy
 | 
				
			||||||
      psi = a * p + psi;
 | 
					      psi = a * p + psi; // update solution
 | 
				
			||||||
      p = p * b + r;
 | 
					      p = p * b + r;  // update search direction
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      LinalgTimer.Stop();
 | 
					      LinalgTimer.Stop();
 | 
				
			||||||
      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
 | 
					      std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
 | 
				
			||||||
@@ -156,6 +212,22 @@ class ConjugateGradient : public OperatorFunction<Field> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
					        if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (! (CGState.do_repro && ReproTest)){
 | 
				
			||||||
 | 
					                CGState.do_repro = true;
 | 
				
			||||||
 | 
					                ReprTest.do_check = true;
 | 
				
			||||||
 | 
					                ReprTest.reset_counter();
 | 
				
			||||||
 | 
					                this->operator()(Linop, src, psi_start);// run the repro test
 | 
				
			||||||
 | 
					                if (ReprTest.success)
 | 
				
			||||||
 | 
					                	std::cout << GridLogMessage << "Reproducibility test passed" << std::endl;
 | 
				
			||||||
 | 
					                else{
 | 
				
			||||||
 | 
					                	std::cout << GridLogMessage << "Reproducibility test failed" << std::endl;
 | 
				
			||||||
 | 
					                	exit(1);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Clear state
 | 
				
			||||||
 | 
					        CGState.reset();
 | 
				
			||||||
 | 
					        ReprTest.reset();
 | 
				
			||||||
        return;
 | 
					        return;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -59,7 +59,7 @@ public:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    int Nstop;   // Number of evecs checked for convergence
 | 
					    int Nstop;   // Number of evecs checked for convergence
 | 
				
			||||||
    int Nk;      // Number of converged sought
 | 
					    int Nk;      // Number of converged sought
 | 
				
			||||||
    int Np;      // Np -- Number of spare vecs in kryloc space
 | 
					    int Np;      // Np -- Number of spare vecs in krylov space
 | 
				
			||||||
    int Nm;      // Nm -- total number of vectors
 | 
					    int Nm;      // Nm -- total number of vectors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RealD eresid;
 | 
					    RealD eresid;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return
 | 
				
			|||||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
 | 
					const std::vector<int> & CartesianCommunicator::ProcessorGrid(void)     { return _processors; };
 | 
				
			||||||
int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };
 | 
					int                      CartesianCommunicator::ProcessorCount(void)    { return _Nprocessors; };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// very VERY rarely (Log, serial RNG) we need world without a grid
 | 
					// very VERY rarely (Log, serial RNG) we need world without a grid
 | 
				
			||||||
////////////////////////////////////////////////////////////////////////////////
 | 
					////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
@@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
 | 
				
			|||||||
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
 | 
					#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
					void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
				
			||||||
						       void *xmit,
 | 
					                                                       void *xmit,
 | 
				
			||||||
						       int xmit_to_rank,
 | 
					                                                       int xmit_to_rank,
 | 
				
			||||||
						       void *recv,
 | 
					                                                       void *recv,
 | 
				
			||||||
						       int recv_from_rank,
 | 
					                                                       int recv_from_rank,
 | 
				
			||||||
						       int bytes)
 | 
					                                                       int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
 | 
					  SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -68,6 +68,8 @@ class CartesianCommunicator {
 | 
				
			|||||||
  static MPI_Comm communicator_world;
 | 
					  static MPI_Comm communicator_world;
 | 
				
			||||||
         MPI_Comm communicator;
 | 
					         MPI_Comm communicator;
 | 
				
			||||||
  typedef MPI_Request CommsRequest_t;
 | 
					  typedef MPI_Request CommsRequest_t;
 | 
				
			||||||
 | 
					  static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
 | 
				
			||||||
 | 
					  static int length; 
 | 
				
			||||||
#else 
 | 
					#else 
 | 
				
			||||||
  typedef int CommsRequest_t;
 | 
					  typedef int CommsRequest_t;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@@ -149,6 +151,7 @@ class CartesianCommunicator {
 | 
				
			|||||||
  const std::vector<int> & ProcessorGrid(void)     ;
 | 
					  const std::vector<int> & ProcessorGrid(void)     ;
 | 
				
			||||||
  int                      ProcessorCount(void)    ;
 | 
					  int                      ProcessorCount(void)    ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void        		 PrintRankInfo(void)     ;
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  // very VERY rarely (Log, serial RNG) we need world without a grid
 | 
					  // very VERY rarely (Log, serial RNG) we need world without a grid
 | 
				
			||||||
  ////////////////////////////////////////////////////////////////////////////////
 | 
					  ////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,6 +35,8 @@ namespace Grid {
 | 
				
			|||||||
// Info that is setup once and indept of cartesian layout
 | 
					// Info that is setup once and indept of cartesian layout
 | 
				
			||||||
///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
MPI_Comm CartesianCommunicator::communicator_world;
 | 
					MPI_Comm CartesianCommunicator::communicator_world;
 | 
				
			||||||
 | 
					char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
 | 
				
			||||||
 | 
					int CartesianCommunicator::length; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Should error check all MPI calls.
 | 
					// Should error check all MPI calls.
 | 
				
			||||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
					void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
				
			||||||
@@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
 | 
				
			|||||||
    MPI_Init(argc,argv);
 | 
					    MPI_Init(argc,argv);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
					  MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MPI_Get_processor_name(name, &length);
 | 
				
			||||||
  ShmInitGeneric();
 | 
					  ShmInitGeneric();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
				
			|||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::PrintRankInfo(){
 | 
				
			||||||
 | 
					    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}// end of namespace
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
				
			|||||||
  assert(ierr==0);
 | 
					  assert(ierr==0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void CartesianCommunicator::PrintRankInfo(){
 | 
				
			||||||
 | 
					    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
 | 
				
			|||||||
  return NULL;
 | 
					  return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void CartesianCommunicator::PrintRankInfo(){
 | 
				
			||||||
 | 
					    std::cout << "Grid: Rank "<< _processor << "  -  Physical node name: " << name << std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
 | 
				
			|||||||
  ShmInitGeneric();
 | 
					  ShmInitGeneric();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void CartesianCommunicator::PrintRankInfo(){
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "No Rank Info available" << std::endl;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
					CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  _processors = processors;
 | 
					  _processors = processors;
 | 
				
			||||||
@@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){}
 | 
				
			|||||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
 | 
					void CartesianCommunicator::GlobalSumVector(double *,int N){}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
					void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
				
			||||||
					   void *recv,
 | 
					                                           void *recv,
 | 
				
			||||||
					   int xmit_to_rank,
 | 
					                                           int xmit_to_rank,
 | 
				
			||||||
					   int recv_from_rank,
 | 
					                                           int recv_from_rank,
 | 
				
			||||||
					   int bytes)
 | 
					                                           int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// Basic Halo comms primitive -- should never call in single node
 | 
					// Basic Halo comms primitive -- should never call in single node
 | 
				
			||||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
					void CartesianCommunicator::SendToRecvFrom(void *xmit,
 | 
				
			||||||
					   int dest,
 | 
					                                           int dest,
 | 
				
			||||||
					   void *recv,
 | 
					                                           void *recv,
 | 
				
			||||||
					   int from,
 | 
					                                           int from,
 | 
				
			||||||
					   int bytes)
 | 
					                                           int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
					void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
 | 
				
			||||||
						void *xmit,
 | 
					                                                void *xmit,
 | 
				
			||||||
						int dest,
 | 
					                                                int dest,
 | 
				
			||||||
						void *recv,
 | 
					                                                void *recv,
 | 
				
			||||||
						int from,
 | 
					                                                int from,
 | 
				
			||||||
						int bytes)
 | 
					                                                int bytes)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  assert(0);
 | 
					  assert(0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void CartesianCommunicator::PrintRankInfo(){
 | 
				
			||||||
 | 
					    std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -31,6 +31,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
				
			|||||||
#define GRID_LATTICE_REDUCTION_H
 | 
					#define GRID_LATTICE_REDUCTION_H
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <class T>
 | 
				
			||||||
 | 
					class ReproducibilityState {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  typedef typename T::vector_type vector_type;
 | 
				
			||||||
 | 
					  typedef std::vector<vector_type, alignedAllocator<vector_type> >  sum_type;
 | 
				
			||||||
 | 
					  unsigned int  n_call;
 | 
				
			||||||
 | 
					  bool          do_check;
 | 
				
			||||||
 | 
					  bool          enable_reprocheck;
 | 
				
			||||||
 | 
					  bool          success;
 | 
				
			||||||
 | 
					  unsigned int  interval; 
 | 
				
			||||||
 | 
					  std::vector<sum_type> th_states;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void reset_counter() { n_call = 0; }
 | 
				
			||||||
 | 
					  void reset() {
 | 
				
			||||||
 | 
					    th_states.clear();
 | 
				
			||||||
 | 
					    do_check          = false;
 | 
				
			||||||
 | 
					    enable_reprocheck = false;
 | 
				
			||||||
 | 
					    success           = true;
 | 
				
			||||||
 | 
					    n_call            = 0;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ReproducibilityState():interval(1) { 
 | 
				
			||||||
 | 
					    reset(); 
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  void check(GridBase* grid, sum_type &sumarray){
 | 
				
			||||||
 | 
					    ///////////////////////  Reproducibility section, not threaded on purpouse
 | 
				
			||||||
 | 
					    if (enable_reprocheck) {
 | 
				
			||||||
 | 
					      if (do_check && (n_call % interval) == 0) {
 | 
				
			||||||
 | 
					        for (int thread = 0; thread < sumarray.size(); thread++) {
 | 
				
			||||||
 | 
					          int words = sizeof(sumarray[thread])/sizeof(unsigned char);
 | 
				
			||||||
 | 
					          unsigned char xors[words];
 | 
				
			||||||
 | 
					          bitwise_xor(sumarray[thread], th_states[n_call][thread],xors);
 | 
				
			||||||
 | 
					          // OR all words
 | 
				
			||||||
 | 
					          unsigned char res = 0;
 | 
				
			||||||
 | 
					          for (int w = 0; w < words; w++) res = res | xors[w];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          Grid_unquiesce_nodes();
 | 
				
			||||||
 | 
					          int rank = 0;
 | 
				
			||||||
 | 
					          while (rank < grid->_Nprocessors){
 | 
				
			||||||
 | 
					            if (rank == grid->ThisRank() ){
 | 
				
			||||||
 | 
					              if ( res ) {
 | 
				
			||||||
 | 
					                std::cout << "Reproducibility failure report" << std::endl;
 | 
				
			||||||
 | 
					                grid->PrintRankInfo();
 | 
				
			||||||
 | 
					                std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl;
 | 
				
			||||||
 | 
					                std::cout << "Size of states: " << th_states.size() << std::endl;
 | 
				
			||||||
 | 
					                std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific;
 | 
				
			||||||
 | 
					                std::cout << "Saved partial sum  : " << th_states[n_call][thread] << std::endl;
 | 
				
			||||||
 | 
					                std::cout << "Current partial sum: " << sumarray[thread] << std::endl;
 | 
				
			||||||
 | 
					                std::cout << "Saved state "  << std::endl; show_binaryrep(th_states[n_call][thread]);
 | 
				
			||||||
 | 
					                std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]);
 | 
				
			||||||
 | 
					                std::cout << "XOR result"    << std::endl; show_binaryrep(xors, words);
 | 
				
			||||||
 | 
					              		//std::cout << std::defaultfloat;  //not supported by some compilers
 | 
				
			||||||
 | 
					                std::cout << std::setprecision(6);
 | 
				
			||||||
 | 
					                success = false; 
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            rank++;
 | 
				
			||||||
 | 
					            grid->Barrier();
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          Grid_quiesce_nodes();                         
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      } else if (!do_check)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					        std::cout << GridLogDebug << "Saving thread state for inner product. Call n. " 
 | 
				
			||||||
 | 
					        << n_call << std::endl;
 | 
				
			||||||
 | 
					        th_states.resize(n_call+1);
 | 
				
			||||||
 | 
					        th_states[n_call].resize(grid->SumArraySize());
 | 
				
			||||||
 | 
					          th_states[n_call] = sumarray;  // save threads state
 | 
				
			||||||
 | 
					          //n_call++;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        n_call++;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef GRID_WARN_SUBOPTIMAL
 | 
					#ifdef GRID_WARN_SUBOPTIMAL
 | 
				
			||||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
 | 
					#warning "Optimisation alert all these reduction loops are NOT threaded "
 | 
				
			||||||
#endif     
 | 
					#endif     
 | 
				
			||||||
@@ -39,12 +120,25 @@ namespace Grid {
 | 
				
			|||||||
    // Deterministic Reduction operations
 | 
					    // Deterministic Reduction operations
 | 
				
			||||||
    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
  template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
 | 
					  template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
 | 
				
			||||||
    ComplexD nrm = innerProduct(arg,arg);
 | 
					        ReproducibilityState<vobj> repr;
 | 
				
			||||||
    return std::real(nrm); 
 | 
					        ComplexD nrm = innerProduct(arg, arg, repr);
 | 
				
			||||||
  }
 | 
					        return std::real(nrm); 
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    template <class vobj>
 | 
				
			||||||
 | 
					    inline RealD norm2(const Lattice<vobj> &arg, ReproducibilityState<vobj>& rep) {
 | 
				
			||||||
 | 
					      ComplexD nrm = innerProduct(arg, arg, rep);
 | 
				
			||||||
 | 
					      return std::real(nrm);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class vobj>
 | 
					    template<class vobj>
 | 
				
			||||||
    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right) 
 | 
					    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right){
 | 
				
			||||||
 | 
					      ReproducibilityState<vobj> repr;
 | 
				
			||||||
 | 
					      return innerProduct(left, right, repr);
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    template<class vobj>
 | 
				
			||||||
 | 
					    inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right, ReproducibilityState<vobj>& repr) 
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      typedef typename vobj::scalar_type scalar_type;
 | 
					      typedef typename vobj::scalar_type scalar_type;
 | 
				
			||||||
      typedef typename vobj::vector_type vector_type;
 | 
					      typedef typename vobj::vector_type vector_type;
 | 
				
			||||||
@@ -54,24 +148,28 @@ namespace Grid {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
 | 
					      std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
 | 
				
			||||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
					      for(int i=0;i<grid->SumArraySize();i++){
 | 
				
			||||||
	sumarray[i]=zero;
 | 
					        sumarray[i]=zero;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					      PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility
 | 
				
			||||||
      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
					      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
				
			||||||
	int nwork, mywork, myoff;
 | 
					        int nwork, mywork, myoff;
 | 
				
			||||||
	GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
 | 
					        GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
	decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
 | 
					        decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation
 | 
				
			||||||
        for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
					        for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
				
			||||||
	  vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
 | 
					          vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
 | 
				
			||||||
	}
 | 
					        }
 | 
				
			||||||
	sumarray[thr]=TensorRemove(vnrm) ;
 | 
					        sumarray[thr]=TensorRemove(vnrm) ;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
 | 
					      /////////// Reproducibility
 | 
				
			||||||
 | 
					      repr.check(grid, sumarray);
 | 
				
			||||||
 | 
					      ///////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      vector_type vvnrm; vvnrm=zero;  // sum across threads
 | 
					      vector_type vvnrm; vvnrm=zero;  // sum across threads
 | 
				
			||||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
					      for(int i=0;i<grid->SumArraySize();i++){
 | 
				
			||||||
	vvnrm = vvnrm+sumarray[i];
 | 
					        vvnrm = vvnrm+sumarray[i];
 | 
				
			||||||
      } 
 | 
					      } 
 | 
				
			||||||
      nrm = Reduce(vvnrm);// sum across simd
 | 
					      nrm = Reduce(vvnrm);// sum across simd
 | 
				
			||||||
      right._grid->GlobalSum(nrm);
 | 
					      right._grid->GlobalSum(nrm);
 | 
				
			||||||
@@ -79,26 +177,26 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class Op,class T1>
 | 
					    template<class Op,class T1>
 | 
				
			||||||
      inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
 | 
					    inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
 | 
				
			||||||
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
 | 
					    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      return sum(closure(expr));
 | 
					      return sum(closure(expr));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class Op,class T1,class T2>
 | 
					    template<class Op,class T1,class T2>
 | 
				
			||||||
      inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
 | 
					    inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
 | 
				
			||||||
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
 | 
					    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      return sum(closure(expr));
 | 
					      return sum(closure(expr));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    template<class Op,class T1,class T2,class T3>
 | 
					    template<class Op,class T1,class T2,class T3>
 | 
				
			||||||
      inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
 | 
					    inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
 | 
				
			||||||
      ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
					    ->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
 | 
				
			||||||
				 eval(0,std::get<1>(expr.second)),
 | 
					     eval(0,std::get<1>(expr.second)),
 | 
				
			||||||
				 eval(0,std::get<2>(expr.second))
 | 
					     eval(0,std::get<2>(expr.second))
 | 
				
			||||||
				 ))::scalar_object
 | 
					     ))::scalar_object
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
      return sum(closure(expr));
 | 
					      return sum(closure(expr));
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -111,24 +209,24 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
 | 
					      std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
 | 
				
			||||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
					      for(int i=0;i<grid->SumArraySize();i++){
 | 
				
			||||||
	sumarray[i]=zero;
 | 
					        sumarray[i]=zero;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PARALLEL_FOR_LOOP
 | 
					      PARALLEL_FOR_LOOP
 | 
				
			||||||
      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
					      for(int thr=0;thr<grid->SumArraySize();thr++){
 | 
				
			||||||
	int nwork, mywork, myoff;
 | 
					        int nwork, mywork, myoff;
 | 
				
			||||||
	GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
 | 
					        GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	vobj vvsum=zero;
 | 
					        vobj vvsum=zero;
 | 
				
			||||||
        for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
					        for(int ss=myoff;ss<mywork+myoff; ss++){
 | 
				
			||||||
	  vvsum = vvsum + arg._odata[ss];
 | 
					          vvsum = vvsum + arg._odata[ss];
 | 
				
			||||||
	}
 | 
					        }
 | 
				
			||||||
	sumarray[thr]=vvsum;
 | 
					        sumarray[thr]=vvsum;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      vobj vsum=zero;  // sum across threads
 | 
					      vobj vsum=zero;  // sum across threads
 | 
				
			||||||
      for(int i=0;i<grid->SumArraySize();i++){
 | 
					      for(int i=0;i<grid->SumArraySize();i++){
 | 
				
			||||||
	vsum = vsum+sumarray[i];
 | 
					        vsum = vsum+sumarray[i];
 | 
				
			||||||
      } 
 | 
					      } 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      typedef typename vobj::scalar_object sobj;
 | 
					      typedef typename vobj::scalar_object sobj;
 | 
				
			||||||
@@ -138,7 +236,7 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
      extract(vsum,buf);
 | 
					      extract(vsum,buf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
 | 
					      for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
 | 
				
			||||||
      arg._grid->GlobalSum(ssum);
 | 
					        arg._grid->GlobalSum(ssum);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      return ssum;
 | 
					      return ssum;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -146,23 +244,23 @@ PARALLEL_FOR_LOOP
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
 | 
					template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
 | 
				
			||||||
{
 | 
					    {
 | 
				
			||||||
  typedef typename vobj::scalar_object sobj;
 | 
					      typedef typename vobj::scalar_object sobj;
 | 
				
			||||||
  GridBase  *grid = Data._grid;
 | 
					      GridBase  *grid = Data._grid;
 | 
				
			||||||
  assert(grid!=NULL);
 | 
					      assert(grid!=NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // FIXME
 | 
					  // FIXME
 | 
				
			||||||
  // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
 | 
					  // std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const int    Nd = grid->_ndimension;
 | 
					      const int    Nd = grid->_ndimension;
 | 
				
			||||||
  const int Nsimd = grid->Nsimd();
 | 
					      const int Nsimd = grid->Nsimd();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  assert(orthogdim >= 0);
 | 
					      assert(orthogdim >= 0);
 | 
				
			||||||
  assert(orthogdim < Nd);
 | 
					      assert(orthogdim < Nd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  int fd=grid->_fdimensions[orthogdim];
 | 
					      int fd=grid->_fdimensions[orthogdim];
 | 
				
			||||||
  int ld=grid->_ldimensions[orthogdim];
 | 
					      int ld=grid->_ldimensions[orthogdim];
 | 
				
			||||||
  int rd=grid->_rdimensions[orthogdim];
 | 
					      int rd=grid->_rdimensions[orthogdim];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
 | 
					  std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
 | 
				
			||||||
  std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
 | 
					  std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -66,6 +66,9 @@ namespace Optimization {
 | 
				
			|||||||
    double f[4];
 | 
					    double f[4];
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  struct Vsplat{
 | 
					  struct Vsplat{
 | 
				
			||||||
    //Complex float
 | 
					    //Complex float
 | 
				
			||||||
    inline __m256 operator()(float a, float b){
 | 
					    inline __m256 operator()(float a, float b){
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -60,6 +60,13 @@ directory
 | 
				
			|||||||
#include "Grid_neon.h"
 | 
					#include "Grid_neon.h"
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef GRID_DEFAULT_PRECISION_SINGLE
 | 
				
			||||||
 | 
					#define GRID_REAL_DIGITS FLT_DIG
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					#ifdef GRID_DEFAULT_PRECISION_DOUBLE
 | 
				
			||||||
 | 
					#define GRID_REAL_DIGITS DBL_DIG
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace Grid {
 | 
					namespace Grid {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//////////////////////////////////////
 | 
					//////////////////////////////////////
 | 
				
			||||||
@@ -146,6 +153,23 @@ class Grid_simd {
 | 
				
			|||||||
  Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps
 | 
					  Grid_simd(const Grid_simd &rhs) : v(rhs.v){};  // compiles in movaps
 | 
				
			||||||
  Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
 | 
					  Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  // Comparisons
 | 
				
			||||||
 | 
					  /////////////////////////////
 | 
				
			||||||
 | 
					  inline bool operator==(const Grid_simd& lhs){ 
 | 
				
			||||||
 | 
					    Grid_simd::conv_t conv1;
 | 
				
			||||||
 | 
					    Grid_simd::conv_t conv2;
 | 
				
			||||||
 | 
					    conv1.v = v;
 | 
				
			||||||
 | 
					    conv2.v = lhs.v;
 | 
				
			||||||
 | 
					    return std::equal(std::begin(conv1.s), std::end(conv1.s), std::begin(conv2.s));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  inline bool operator!=(const Grid_simd& lhs){ 
 | 
				
			||||||
 | 
					    return !(*this == lhs);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /////////////////////////////
 | 
					  /////////////////////////////
 | 
				
			||||||
  // Constructors
 | 
					  // Constructors
 | 
				
			||||||
  /////////////////////////////
 | 
					  /////////////////////////////
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										215
									
								
								tests/debug/Test_cayley_cg_reproducibility.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,215 @@
 | 
				
			|||||||
 | 
					    /*************************************************************************************
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Grid physics library, www.github.com/paboyle/Grid 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Source file: ./tests/Test_cayley_cg_reproducibility.cc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Copyright (C) 2015
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
				
			||||||
 | 
					    Author: Guido Cossu <guido.cossu@ed.ac.uk>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is free software; you can redistribute it and/or modify
 | 
				
			||||||
 | 
					    it under the terms of the GNU General Public License as published by
 | 
				
			||||||
 | 
					    the Free Software Foundation; either version 2 of the License, or
 | 
				
			||||||
 | 
					    (at your option) any later version.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					    GNU General Public License for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    You should have received a copy of the GNU General Public License along
 | 
				
			||||||
 | 
					    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
				
			||||||
 | 
					    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    See the full license in the file "LICENSE" in the top level distribution directory
 | 
				
			||||||
 | 
					    *************************************************************************************/
 | 
				
			||||||
 | 
					    /*  END LEGAL */
 | 
				
			||||||
 | 
					#include <Grid/Grid.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using namespace std;
 | 
				
			||||||
 | 
					using namespace Grid;
 | 
				
			||||||
 | 
					using namespace Grid::QCD;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define REPRODUCIBILITY_INTERVAL 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class d>
 | 
				
			||||||
 | 
					struct scal {
 | 
				
			||||||
 | 
					  d internal;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Gamma::GammaMatrix Gmu [] = {
 | 
				
			||||||
 | 
					    Gamma::GammaX,
 | 
				
			||||||
 | 
					    Gamma::GammaY,
 | 
				
			||||||
 | 
					    Gamma::GammaZ,
 | 
				
			||||||
 | 
					    Gamma::GammaT
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGinversions(What & Ddwf, 
 | 
				
			||||||
 | 
					                       GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                       GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                       RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                       GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                       GridParallelRNG *RNG5);
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGschur(What & Ddwf, 
 | 
				
			||||||
 | 
					                  GridCartesian         * FGrid,               GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                  GridCartesian         * UGrid,               GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                  RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                  GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                  GridParallelRNG *RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGunprec(What & Ddwf, 
 | 
				
			||||||
 | 
					                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                   RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                   GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                   GridParallelRNG *RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGprec(What & Ddwf, 
 | 
				
			||||||
 | 
					                 GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                 GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                 RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                 GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                 GridParallelRNG *RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int main (int argc, char ** argv)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  Grid_init(&argc,&argv);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  int threads = GridThread::GetThreads();
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "###########################################" << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "This test disables the convergence alert" << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "and checks whether in consecutive runs of the CG" << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "the internal reductions are bit reproducible." << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "A warning and a report is produced for every failure" << std::endl;
 | 
				
			||||||
 | 
					  std::cout << GridLogMessage << "###########################################" << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  const int Ls=8;
 | 
				
			||||||
 | 
					  GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
				
			||||||
 | 
					  GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					  GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::vector<int> seeds4({1,2,3,4});
 | 
				
			||||||
 | 
					  std::vector<int> seeds5({5,6,7,8});
 | 
				
			||||||
 | 
					  GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5);
 | 
				
			||||||
 | 
					  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  LatticeGaugeField Umu(UGrid);
 | 
				
			||||||
 | 
					  SU3::HotConfiguration(RNG4,Umu);
 | 
				
			||||||
 | 
					  std::vector<LatticeColourMatrix> U(4,UGrid);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  RealD mass=0.1;
 | 
				
			||||||
 | 
					  RealD M5  =1.8;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
 | 
				
			||||||
 | 
					  TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  RealD b=1.5;// Scale factor b+c=2, b-c=1
 | 
				
			||||||
 | 
					  RealD c=0.5;
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
 | 
				
			||||||
 | 
					  TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
 | 
				
			||||||
 | 
					  TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
 | 
				
			||||||
 | 
					  TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
 | 
				
			||||||
 | 
					  TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
 | 
				
			||||||
 | 
					  TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
 | 
				
			||||||
 | 
					  OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
 | 
				
			||||||
 | 
					  TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Grid_finalize();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGinversions(What & Ddwf, 
 | 
				
			||||||
 | 
					                       GridCartesian         * FGrid,          GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                       GridCartesian         * UGrid,          GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                       RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                       GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                       GridParallelRNG *RNG5)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Testing unpreconditioned inverter"<<std::endl;
 | 
				
			||||||
 | 
					  TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Testing red black preconditioned inverter"<<std::endl;
 | 
				
			||||||
 | 
					  TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
 | 
				
			||||||
 | 
					  std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl;
 | 
				
			||||||
 | 
					  TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGunprec(What & Ddwf, 
 | 
				
			||||||
 | 
					                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                   RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                   GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                   GridParallelRNG *RNG5)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  LatticeFermion src   (FGrid); random(*RNG5,src);
 | 
				
			||||||
 | 
					  LatticeFermion result(FGrid); result=zero;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf);
 | 
				
			||||||
 | 
					  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
 | 
				
			||||||
 | 
					  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
 | 
				
			||||||
 | 
					  CG(HermOp,src,result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGprec(What & Ddwf, 
 | 
				
			||||||
 | 
					                 GridCartesian         * FGrid,        GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                 GridCartesian         * UGrid,        GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                 RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                 GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                 GridParallelRNG *RNG5)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  LatticeFermion src   (FGrid); random(*RNG5,src);
 | 
				
			||||||
 | 
					  LatticeFermion    src_o(FrbGrid);
 | 
				
			||||||
 | 
					  LatticeFermion result_o(FrbGrid);
 | 
				
			||||||
 | 
					  pickCheckerboard(Odd,src_o,src);
 | 
				
			||||||
 | 
					  result_o=zero;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
 | 
				
			||||||
 | 
					  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
 | 
				
			||||||
 | 
					  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
 | 
				
			||||||
 | 
					  CG(HermOpEO,src_o,result_o);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template<class What> 
 | 
				
			||||||
 | 
					void  TestCGschur(What & Ddwf, 
 | 
				
			||||||
 | 
					                   GridCartesian         * FGrid,              GridRedBlackCartesian * FrbGrid,
 | 
				
			||||||
 | 
					                   GridCartesian         * UGrid,              GridRedBlackCartesian * UrbGrid,
 | 
				
			||||||
 | 
					                   RealD mass, RealD M5,
 | 
				
			||||||
 | 
					                   GridParallelRNG *RNG4,
 | 
				
			||||||
 | 
					                   GridParallelRNG *RNG5)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  LatticeFermion src   (FGrid); random(*RNG5,src);
 | 
				
			||||||
 | 
					  LatticeFermion result(FGrid); result=zero;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
 | 
				
			||||||
 | 
					  CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
 | 
				
			||||||
 | 
					  SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
 | 
				
			||||||
 | 
					  SchurSolver(Ddwf,src,result);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -63,6 +63,9 @@ class HmcRunner : public NerscHmcRunner {
 | 
				
			|||||||
    Real mass = -0.77;
 | 
					    Real mass = -0.77;
 | 
				
			||||||
    FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
 | 
					    FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // To enable the CG reproducibility tests use 
 | 
				
			||||||
 | 
					    //ConjugateGradient<FermionField> CG(1.0e-8, 10000, ReproducibilityTest);
 | 
				
			||||||
 | 
					    // This is the plain version
 | 
				
			||||||
    ConjugateGradient<FermionField> CG(1.0e-8, 10000);
 | 
					    ConjugateGradient<FermionField> CG(1.0e-8, 10000);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
 | 
					    TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -71,7 +71,7 @@ int main (int argc, char ** argv)
 | 
				
			|||||||
  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 | 
					  WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
 | 
					  MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
 | 
				
			||||||
  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
 | 
					  ConjugateGradient<LatticeFermion> CG(1.0e-8,10000,true, true);
 | 
				
			||||||
  CG(HermOp,src,result);
 | 
					  CG(HermOp,src,result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Grid_finalize();
 | 
					  Grid_finalize();
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user