mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 04:37:05 +01:00
Compare commits
20 Commits
ad83c3bc60
...
feature/CG
Author | SHA1 | Date | |
---|---|---|---|
70068cff51 | |||
85c055fa30 | |||
90fedbd2af | |||
ec0c53fa68 | |||
6ceee102e8 | |||
6e57bdb6b3 | |||
4c11e36d3d | |||
9977c53035 | |||
3a74fec62f | |||
8fb0a13f39 | |||
14a1406f54 | |||
538e64e5b4 | |||
b2dc17e160 | |||
afbbcd2194 | |||
d4e0b11bb1 | |||
7144ee7ae8 | |||
f1908c7bc9 | |||
036ec31c48 | |||
53f240200e | |||
9720c9ba3f |
49
lib/Bitwise.cc
Normal file
49
lib/Bitwise.cc
Normal file
@ -0,0 +1,49 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Bitwise.cc
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <iostream>
|
||||
#include <Bitwise.h>
|
||||
#include <bitset>
|
||||
#include <climits>
|
||||
|
||||
namespace Grid {
|
||||
|
||||
void show_binaryrep(const unsigned char* a, size_t size) {
|
||||
const unsigned char* beg = a;
|
||||
const unsigned char* end = a + size;
|
||||
unsigned int ctr = 0;
|
||||
while (beg != end) {
|
||||
std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
|
||||
ctr++;
|
||||
if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
76
lib/Bitwise.h
Normal file
76
lib/Bitwise.h
Normal file
@ -0,0 +1,76 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/Bitwise.h
|
||||
|
||||
Copyright (C) 2016
|
||||
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#ifndef GRID_BITWISE_H
|
||||
#define GRID_BITWISE_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cfloat>
|
||||
#include <bitset>
|
||||
#include <climits>
|
||||
#include <Config.h>
|
||||
|
||||
#ifdef GRID_DEFAULT_PRECISION_SINGLE
|
||||
#define GRID_REAL_BYTES 4
|
||||
#endif
|
||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||
#define GRID_REAL_BYTES 8
|
||||
#endif
|
||||
|
||||
|
||||
namespace Grid {
|
||||
|
||||
void show_binaryrep(const unsigned char* a, size_t size);
|
||||
|
||||
template <typename T>
|
||||
void show_binaryrep(const T& a) {
|
||||
const char* beg = reinterpret_cast<const char*>(&a);
|
||||
const char* end = beg + sizeof(a);
|
||||
unsigned int ctr = 0;
|
||||
while (beg != end) {
|
||||
std::cout << std::bitset<CHAR_BIT>(*beg++) << ' ';
|
||||
ctr++;
|
||||
if (ctr % GRID_REAL_BYTES == 0) std::cout << '\n';
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void bitwise_xor(T& l, T& r, unsigned char* xors) {
|
||||
assert(sizeof(l) == sizeof(r));
|
||||
unsigned char* org = reinterpret_cast<unsigned char*>(&l);
|
||||
unsigned char* cur = reinterpret_cast<unsigned char*>(&r);
|
||||
int words = sizeof(l) / sizeof(*org);
|
||||
unsigned char result = 0;
|
||||
for (int w = 0; w < words; w++) xors[w] = (org[w] ^ cur[w]);
|
||||
}
|
||||
|
||||
}; // namespace
|
||||
|
||||
|
||||
#endif
|
@ -62,6 +62,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/serialisation/Serialisation.h>
|
||||
#include "Config.h"
|
||||
#include <Grid/Timer.h>
|
||||
#include <Grid/Bitwise.h>
|
||||
#include <Grid/PerfCount.h>
|
||||
#include <Grid/Log.h>
|
||||
#include <Grid/AlignedAllocator.h>
|
||||
|
@ -100,7 +100,7 @@ void Grid_quiesce_nodes(void) {
|
||||
me = shmem_my_pe();
|
||||
#endif
|
||||
if (me) {
|
||||
std::cout.setstate(std::ios::badbit);
|
||||
std::cout.setstate(std::ios::badbit);// mute all nodes except 0
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,19 +38,21 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#ifdef GRID_OMP
|
||||
#include <omp.h>
|
||||
#ifdef GRID_NUMA
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(runtime)")
|
||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(runtime)")
|
||||
#endif
|
||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
||||
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||
#define PARALLEL_FOR_LOOP_STATIC _Pragma("omp parallel for schedule(static)")
|
||||
#else
|
||||
#define PARALLEL_FOR_LOOP
|
||||
#define PARALLEL_FOR_LOOP_INTERN
|
||||
#define PARALLEL_NESTED_LOOP2
|
||||
#define PARALLEL_REGION
|
||||
#define PARALLEL_FOR_LOOP_STATIC
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
@ -9,6 +9,7 @@ Copyright (C) 2015
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -33,6 +34,21 @@ directory
|
||||
|
||||
namespace Grid {
|
||||
|
||||
struct CG_state {
|
||||
bool do_repro;
|
||||
std::vector<RealD> residuals;
|
||||
|
||||
CG_state() {reset();}
|
||||
|
||||
void reset(){
|
||||
do_repro = false;
|
||||
residuals.clear();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
enum CGexec_mode{ Default, ReproducibilityTest };
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Base classes for iterative processes based on operators
|
||||
// single input vec, single output vec.
|
||||
@ -45,10 +61,30 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
// Defaults true.
|
||||
RealD Tolerance;
|
||||
Integer MaxIterations;
|
||||
ConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true)
|
||||
: Tolerance(tol),
|
||||
MaxIterations(maxit),
|
||||
ErrorOnNoConverge(err_on_no_conv){};
|
||||
|
||||
// Reproducibility controls
|
||||
bool ReproTest;
|
||||
CG_state CGState; //to check reproducibility by repeating the CG
|
||||
ReproducibilityState<typename Field::vector_object> ReprTest; // for the inner proucts
|
||||
|
||||
// Constructor
|
||||
ConjugateGradient(RealD tol, Integer maxit, CGexec_mode Mode = Default)
|
||||
: Tolerance(tol),MaxIterations(maxit){
|
||||
switch(Mode)
|
||||
{
|
||||
case Default :
|
||||
ErrorOnNoConverge = true;
|
||||
ReproTest = false;
|
||||
case ReproducibilityTest :
|
||||
ErrorOnNoConverge = false;
|
||||
ReproTest = true;
|
||||
}
|
||||
};
|
||||
|
||||
void set_reproducibility_interval(unsigned int interval){
|
||||
ReprTest.interval = interval;
|
||||
}
|
||||
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src,
|
||||
Field &psi) {
|
||||
@ -60,34 +96,37 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
Field p(src);
|
||||
Field mmp(src);
|
||||
Field r(src);
|
||||
Field psi_start(psi);// save for the repro test
|
||||
|
||||
if (CGState.do_repro && ReproTest)
|
||||
std::cout << GridLogMessage << "Starting reproducibility test, full check every "
|
||||
<< ReprTest.interval << " calls" << std::endl;
|
||||
|
||||
if(!ReprTest.do_check)
|
||||
ReprTest.reset();
|
||||
ReprTest.enable_reprocheck=ReproTest;
|
||||
|
||||
|
||||
|
||||
// Initial residual computation & set up
|
||||
RealD guess = norm2(psi);
|
||||
RealD guess = norm2(psi, ReprTest);
|
||||
assert(std::isnan(guess) == 0);
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);// eventually split this for the norm check
|
||||
|
||||
Linop.HermOpAndNorm(psi, mmp, d, b);
|
||||
|
||||
|
||||
r = src - mmp;
|
||||
p = r;
|
||||
|
||||
a = norm2(p);
|
||||
a = norm2(p, ReprTest);
|
||||
cp = a;
|
||||
ssq = norm2(src);
|
||||
ssq = norm2(src, ReprTest);
|
||||
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << std::setprecision(4)
|
||||
<< "ConjugateGradient: p " << a << std::endl;
|
||||
std::cout << GridLogIterative << "ConjugateGradient: guess " << guess << std::endl;
|
||||
std::cout << GridLogIterative << "ConjugateGradient: src " << ssq << std::endl;
|
||||
std::cout << GridLogIterative << "ConjugateGradient: mp " << d << std::endl;
|
||||
std::cout << GridLogIterative << "ConjugateGradient: mmp " << b << std::endl;
|
||||
std::cout << GridLogIterative << "ConjugateGradient: cp,r " << cp << std::endl;
|
||||
std::cout << GridLogIterative << "ConjugateGradient: p " << a << std::endl;
|
||||
|
||||
RealD rsq = Tolerance * Tolerance * ssq;
|
||||
|
||||
@ -107,10 +146,10 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
c = cp;
|
||||
c = cp;// old residual
|
||||
|
||||
MatrixTimer.Start();
|
||||
Linop.HermOpAndNorm(p, mmp, d, qq);
|
||||
Linop.HermOpAndNorm(p, mmp, d, qq);// mmp = Ap, d=pAp
|
||||
MatrixTimer.Stop();
|
||||
|
||||
LinalgTimer.Start();
|
||||
@ -118,14 +157,31 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
// ComplexD dck = innerProduct(p,mmp);
|
||||
|
||||
a = c / d;
|
||||
b_pred = a * (a * qq - d) / c;
|
||||
b_pred = a * (a * qq - d) / c;// a check
|
||||
|
||||
cp = axpy_norm(r, -a, mmp, r);
|
||||
|
||||
axpy(r, -a, mmp, r);// new residual r = r_old - a * Ap
|
||||
cp = norm2(r, ReprTest); // bookkeeping this norm
|
||||
if (ReproTest && !CGState.do_repro) {
|
||||
CGState.residuals.push_back(cp); // save residuals state
|
||||
std::cout << GridLogIterative << "ReproTest: Saving state" << std::endl;
|
||||
}
|
||||
if (ReproTest && CGState.do_repro){
|
||||
// check that the residual agrees with the previous run
|
||||
std::cout << GridLogIterative << "ReproTest: Checking state k=" << k << std::endl;
|
||||
if (cp != CGState.residuals[k-1]){
|
||||
std::cout << GridLogMessage << "Failing reproducibility test";
|
||||
std::cout << GridLogMessage << " at k=" << k << std::endl;
|
||||
std::cout << GridLogMessage << "saved residual = " << CGState.residuals[k-1]
|
||||
<< " cp = " << cp << std::endl;
|
||||
exit(1); // exit after the first failure
|
||||
}
|
||||
}
|
||||
b = cp / c;
|
||||
|
||||
// Fuse these loops ; should be really easy
|
||||
psi = a * p + psi;
|
||||
p = p * b + r;
|
||||
psi = a * p + psi; // update solution
|
||||
p = p * b + r; // update search direction
|
||||
|
||||
LinalgTimer.Stop();
|
||||
std::cout << GridLogIterative << "ConjugateGradient: Iteration " << k
|
||||
@ -156,6 +212,22 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
if (! (CGState.do_repro && ReproTest)){
|
||||
CGState.do_repro = true;
|
||||
ReprTest.do_check = true;
|
||||
ReprTest.reset_counter();
|
||||
this->operator()(Linop, src, psi_start);// run the repro test
|
||||
if (ReprTest.success)
|
||||
std::cout << GridLogMessage << "Reproducibility test passed" << std::endl;
|
||||
else{
|
||||
std::cout << GridLogMessage << "Reproducibility test failed" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear state
|
||||
CGState.reset();
|
||||
ReprTest.reset();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ public:
|
||||
|
||||
int Nstop; // Number of evecs checked for convergence
|
||||
int Nk; // Number of converged sought
|
||||
int Np; // Np -- Number of spare vecs in kryloc space
|
||||
int Np; // Np -- Number of spare vecs in krylov space
|
||||
int Nm; // Nm -- total number of vectors
|
||||
|
||||
RealD eresid;
|
||||
|
@ -65,6 +65,7 @@ const std::vector<int> & CartesianCommunicator::ThisProcessorCoor(void) { return
|
||||
const std::vector<int> & CartesianCommunicator::ProcessorGrid(void) { return _processors; };
|
||||
int CartesianCommunicator::ProcessorCount(void) { return _Nprocessors; };
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -89,11 +90,11 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N)
|
||||
#if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L)
|
||||
|
||||
void CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
void *xmit,
|
||||
int xmit_to_rank,
|
||||
void *recv,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
SendToRecvFromBegin(list,xmit,xmit_to_rank,recv,recv_from_rank,bytes);
|
||||
}
|
||||
|
@ -68,6 +68,8 @@ class CartesianCommunicator {
|
||||
static MPI_Comm communicator_world;
|
||||
MPI_Comm communicator;
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
static char name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
|
||||
static int length;
|
||||
#else
|
||||
typedef int CommsRequest_t;
|
||||
#endif
|
||||
@ -149,6 +151,7 @@ class CartesianCommunicator {
|
||||
const std::vector<int> & ProcessorGrid(void) ;
|
||||
int ProcessorCount(void) ;
|
||||
|
||||
void PrintRankInfo(void) ;
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// very VERY rarely (Log, serial RNG) we need world without a grid
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -35,6 +35,8 @@ namespace Grid {
|
||||
// Info that is setup once and indept of cartesian layout
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
MPI_Comm CartesianCommunicator::communicator_world;
|
||||
char CartesianCommunicator::name[MPI_MAX_PROCESSOR_NAME]; // processing node physical name
|
||||
int CartesianCommunicator::length;
|
||||
|
||||
// Should error check all MPI calls.
|
||||
void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
@ -44,6 +46,8 @@ void CartesianCommunicator::Init(int *argc, char ***argv) {
|
||||
MPI_Init(argc,argv);
|
||||
}
|
||||
MPI_Comm_dup (MPI_COMM_WORLD,&communicator_world);
|
||||
|
||||
MPI_Get_processor_name(name, &length);
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
@ -206,5 +210,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::PrintRankInfo(){
|
||||
std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl;
|
||||
}
|
||||
|
||||
|
||||
}// end of namespace
|
||||
|
||||
|
@ -576,5 +576,10 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
void CartesianCommunicator::PrintRankInfo(){
|
||||
std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -869,6 +869,9 @@ void *CartesianCommunicator::ShmBufferTranslate(int rank,void * local_p) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void CartesianCommunicator::PrintRankInfo(){
|
||||
std::cout << "Grid: Rank "<< _processor << " - Physical node name: " << name << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
@ -37,6 +37,11 @@ void CartesianCommunicator::Init(int *argc, char *** arv)
|
||||
ShmInitGeneric();
|
||||
}
|
||||
|
||||
void CartesianCommunicator::PrintRankInfo(){
|
||||
std::cout << GridLogMessage << "No Rank Info available" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors)
|
||||
{
|
||||
_processors = processors;
|
||||
@ -60,10 +65,10 @@ void CartesianCommunicator::GlobalSum(uint64_t &){}
|
||||
void CartesianCommunicator::GlobalSumVector(double *,int N){}
|
||||
|
||||
void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
void *recv,
|
||||
int xmit_to_rank,
|
||||
int recv_from_rank,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
@ -71,19 +76,19 @@ void CartesianCommunicator::SendRecvPacket(void *xmit,
|
||||
|
||||
// Basic Halo comms primitive -- should never call in single node
|
||||
void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
void *xmit,
|
||||
int dest,
|
||||
void *recv,
|
||||
int from,
|
||||
int bytes)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
|
@ -333,5 +333,9 @@ void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
}
|
||||
}
|
||||
|
||||
void CartesianCommunicator::PrintRankInfo(){
|
||||
std::cout << GridLogMessage << "SHMEM: Rank Info not implemented yet" << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,87 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#define GRID_LATTICE_REDUCTION_H
|
||||
|
||||
namespace Grid {
|
||||
|
||||
template <class T>
|
||||
class ReproducibilityState {
|
||||
public:
|
||||
typedef typename T::vector_type vector_type;
|
||||
typedef std::vector<vector_type, alignedAllocator<vector_type> > sum_type;
|
||||
unsigned int n_call;
|
||||
bool do_check;
|
||||
bool enable_reprocheck;
|
||||
bool success;
|
||||
unsigned int interval;
|
||||
std::vector<sum_type> th_states;
|
||||
|
||||
void reset_counter() { n_call = 0; }
|
||||
void reset() {
|
||||
th_states.clear();
|
||||
do_check = false;
|
||||
enable_reprocheck = false;
|
||||
success = true;
|
||||
n_call = 0;
|
||||
};
|
||||
|
||||
ReproducibilityState():interval(1) {
|
||||
reset();
|
||||
}
|
||||
|
||||
void check(GridBase* grid, sum_type &sumarray){
|
||||
/////////////////////// Reproducibility section, not threaded on purpouse
|
||||
if (enable_reprocheck) {
|
||||
if (do_check && (n_call % interval) == 0) {
|
||||
for (int thread = 0; thread < sumarray.size(); thread++) {
|
||||
int words = sizeof(sumarray[thread])/sizeof(unsigned char);
|
||||
unsigned char xors[words];
|
||||
bitwise_xor(sumarray[thread], th_states[n_call][thread],xors);
|
||||
// OR all words
|
||||
unsigned char res = 0;
|
||||
for (int w = 0; w < words; w++) res = res | xors[w];
|
||||
|
||||
Grid_unquiesce_nodes();
|
||||
int rank = 0;
|
||||
while (rank < grid->_Nprocessors){
|
||||
if (rank == grid->ThisRank() ){
|
||||
if ( res ) {
|
||||
std::cout << "Reproducibility failure report" << std::endl;
|
||||
grid->PrintRankInfo();
|
||||
std::cout << "Call: "<< n_call << " Thread: " << thread << std::endl;
|
||||
std::cout << "Size of states: " << th_states.size() << std::endl;
|
||||
std::cout << std::setprecision(GRID_REAL_DIGITS+1) << std::scientific;
|
||||
std::cout << "Saved partial sum : " << th_states[n_call][thread] << std::endl;
|
||||
std::cout << "Current partial sum: " << sumarray[thread] << std::endl;
|
||||
std::cout << "Saved state " << std::endl; show_binaryrep(th_states[n_call][thread]);
|
||||
std::cout << "Current state" << std::endl; show_binaryrep(sumarray[thread]);
|
||||
std::cout << "XOR result" << std::endl; show_binaryrep(xors, words);
|
||||
//std::cout << std::defaultfloat; //not supported by some compilers
|
||||
std::cout << std::setprecision(6);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
rank++;
|
||||
grid->Barrier();
|
||||
}
|
||||
Grid_quiesce_nodes();
|
||||
}
|
||||
|
||||
} else if (!do_check)
|
||||
{
|
||||
std::cout << GridLogDebug << "Saving thread state for inner product. Call n. "
|
||||
<< n_call << std::endl;
|
||||
th_states.resize(n_call+1);
|
||||
th_states[n_call].resize(grid->SumArraySize());
|
||||
th_states[n_call] = sumarray; // save threads state
|
||||
//n_call++;
|
||||
}
|
||||
n_call++;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#ifdef GRID_WARN_SUBOPTIMAL
|
||||
#warning "Optimisation alert all these reduction loops are NOT threaded "
|
||||
#endif
|
||||
@ -39,12 +120,25 @@ namespace Grid {
|
||||
// Deterministic Reduction operations
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline RealD norm2(const Lattice<vobj> &arg){
|
||||
ComplexD nrm = innerProduct(arg,arg);
|
||||
return std::real(nrm);
|
||||
}
|
||||
ReproducibilityState<vobj> repr;
|
||||
ComplexD nrm = innerProduct(arg, arg, repr);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
template <class vobj>
|
||||
inline RealD norm2(const Lattice<vobj> &arg, ReproducibilityState<vobj>& rep) {
|
||||
ComplexD nrm = innerProduct(arg, arg, rep);
|
||||
return std::real(nrm);
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right)
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right){
|
||||
ReproducibilityState<vobj> repr;
|
||||
return innerProduct(left, right, repr);
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
inline ComplexD innerProduct(const Lattice<vobj> &left,const Lattice<vobj> &right, ReproducibilityState<vobj>& repr)
|
||||
{
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
@ -54,24 +148,28 @@ namespace Grid {
|
||||
|
||||
std::vector<vector_type,alignedAllocator<vector_type> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
PARALLEL_FOR_LOOP_STATIC //request statically scheduled threads for reproducibility
|
||||
for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm=zero; // private to thread; sub summation
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(left._grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
decltype(innerProduct(left._odata[0],right._odata[0])) vnrm = zero; // private to thread; sub summation
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
vnrm = vnrm + innerProduct(left._odata[ss],right._odata[ss]);
|
||||
}
|
||||
sumarray[thr]=TensorRemove(vnrm) ;
|
||||
}
|
||||
|
||||
|
||||
/////////// Reproducibility
|
||||
repr.check(grid, sumarray);
|
||||
///////////////////////////
|
||||
|
||||
vector_type vvnrm; vvnrm=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
vvnrm = vvnrm+sumarray[i];
|
||||
}
|
||||
nrm = Reduce(vvnrm);// sum across simd
|
||||
right._grid->GlobalSum(nrm);
|
||||
@ -79,26 +177,26 @@ PARALLEL_FOR_LOOP
|
||||
}
|
||||
|
||||
template<class Op,class T1>
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
inline auto sum(const LatticeUnaryExpression<Op,T1> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
template<class Op,class T1,class T2>
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
|
||||
inline auto sum(const LatticeBinaryExpression<Op,T1,T2> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),eval(0,std::get<1>(expr.second))))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
|
||||
|
||||
template<class Op,class T1,class T2,class T3>
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
inline auto sum(const LatticeTrinaryExpression<Op,T1,T2,T3> & expr)
|
||||
->typename decltype(expr.first.func(eval(0,std::get<0>(expr.second)),
|
||||
eval(0,std::get<1>(expr.second)),
|
||||
eval(0,std::get<2>(expr.second))
|
||||
))::scalar_object
|
||||
{
|
||||
return sum(closure(expr));
|
||||
}
|
||||
@ -111,24 +209,24 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > sumarray(grid->SumArraySize());
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
sumarray[i]=zero;
|
||||
sumarray[i]=zero;
|
||||
}
|
||||
|
||||
PARALLEL_FOR_LOOP
|
||||
PARALLEL_FOR_LOOP
|
||||
for(int thr=0;thr<grid->SumArraySize();thr++){
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
int nwork, mywork, myoff;
|
||||
GridThread::GetWork(grid->oSites(),thr,mywork,myoff);
|
||||
|
||||
vobj vvsum=zero;
|
||||
vobj vvsum=zero;
|
||||
for(int ss=myoff;ss<mywork+myoff; ss++){
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
vvsum = vvsum + arg._odata[ss];
|
||||
}
|
||||
sumarray[thr]=vvsum;
|
||||
}
|
||||
|
||||
vobj vsum=zero; // sum across threads
|
||||
for(int i=0;i<grid->SumArraySize();i++){
|
||||
vsum = vsum+sumarray[i];
|
||||
vsum = vsum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
@ -138,7 +236,7 @@ PARALLEL_FOR_LOOP
|
||||
extract(vsum,buf);
|
||||
|
||||
for(int i=0;i<Nsimd;i++) ssum = ssum + buf[i];
|
||||
arg._grid->GlobalSum(ssum);
|
||||
arg._grid->GlobalSum(ssum);
|
||||
|
||||
return ssum;
|
||||
}
|
||||
@ -146,23 +244,23 @@ PARALLEL_FOR_LOOP
|
||||
|
||||
|
||||
template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data._grid;
|
||||
assert(grid!=NULL);
|
||||
{
|
||||
typedef typename vobj::scalar_object sobj;
|
||||
GridBase *grid = Data._grid;
|
||||
assert(grid!=NULL);
|
||||
|
||||
// FIXME
|
||||
// std::cout<<GridLogMessage<<"WARNING ! SliceSum is unthreaded "<<grid->SumArraySize()<<" threads "<<std::endl;
|
||||
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
const int Nd = grid->_ndimension;
|
||||
const int Nsimd = grid->Nsimd();
|
||||
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
assert(orthogdim >= 0);
|
||||
assert(orthogdim < Nd);
|
||||
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
int fd=grid->_fdimensions[orthogdim];
|
||||
int ld=grid->_ldimensions[orthogdim];
|
||||
int rd=grid->_rdimensions[orthogdim];
|
||||
|
||||
std::vector<vobj,alignedAllocator<vobj> > lvSum(rd); // will locally sum vectors first
|
||||
std::vector<sobj> lsSum(ld,zero); // sum across these down to scalars
|
||||
|
@ -66,6 +66,9 @@ namespace Optimization {
|
||||
double f[4];
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
struct Vsplat{
|
||||
//Complex float
|
||||
inline __m256 operator()(float a, float b){
|
||||
|
@ -60,6 +60,13 @@ directory
|
||||
#include "Grid_neon.h"
|
||||
#endif
|
||||
|
||||
#ifdef GRID_DEFAULT_PRECISION_SINGLE
|
||||
#define GRID_REAL_DIGITS FLT_DIG
|
||||
#endif
|
||||
#ifdef GRID_DEFAULT_PRECISION_DOUBLE
|
||||
#define GRID_REAL_DIGITS DBL_DIG
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
//////////////////////////////////////
|
||||
@ -146,6 +153,23 @@ class Grid_simd {
|
||||
Grid_simd(const Grid_simd &rhs) : v(rhs.v){}; // compiles in movaps
|
||||
Grid_simd(const Grid_simd &&rhs) : v(rhs.v){};
|
||||
|
||||
/////////////////////////////
|
||||
// Comparisons
|
||||
/////////////////////////////
|
||||
inline bool operator==(const Grid_simd& lhs){
|
||||
Grid_simd::conv_t conv1;
|
||||
Grid_simd::conv_t conv2;
|
||||
conv1.v = v;
|
||||
conv2.v = lhs.v;
|
||||
return std::equal(std::begin(conv1.s), std::end(conv1.s), std::begin(conv2.s));
|
||||
}
|
||||
|
||||
inline bool operator!=(const Grid_simd& lhs){
|
||||
return !(*this == lhs);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Constructors
|
||||
/////////////////////////////
|
||||
|
215
tests/debug/Test_cayley_cg_reproducibility.cc
Normal file
215
tests/debug/Test_cayley_cg_reproducibility.cc
Normal file
@ -0,0 +1,215 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_cayley_cg_reproducibility.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
using namespace Grid::QCD;
|
||||
|
||||
#define REPRODUCIBILITY_INTERVAL 1
|
||||
|
||||
|
||||
template<class d>
|
||||
struct scal {
|
||||
d internal;
|
||||
};
|
||||
|
||||
Gamma::GammaMatrix Gmu [] = {
|
||||
Gamma::GammaX,
|
||||
Gamma::GammaY,
|
||||
Gamma::GammaZ,
|
||||
Gamma::GammaT
|
||||
};
|
||||
|
||||
template<class What>
|
||||
void TestCGinversions(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5);
|
||||
template<class What>
|
||||
void TestCGschur(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5);
|
||||
|
||||
template<class What>
|
||||
void TestCGunprec(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5);
|
||||
|
||||
template<class What>
|
||||
void TestCGprec(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5);
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "###########################################" << std::endl;
|
||||
std::cout << GridLogMessage << "This test disables the convergence alert" << std::endl;
|
||||
std::cout << GridLogMessage << "and checks whether in consecutive runs of the CG" << std::endl;
|
||||
std::cout << GridLogMessage << "the internal reductions are bit reproducible." << std::endl;
|
||||
std::cout << GridLogMessage << "A warning and a report is produced for every failure" << std::endl;
|
||||
std::cout << GridLogMessage << "###########################################" << std::endl;
|
||||
|
||||
|
||||
const int Ls=8;
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
SU3::HotConfiguration(RNG4,Umu);
|
||||
std::vector<LatticeColourMatrix> U(4,UGrid);
|
||||
|
||||
RealD mass=0.1;
|
||||
RealD M5 =1.8;
|
||||
std::cout<<GridLogMessage <<"DomainWallFermion test"<<std::endl;
|
||||
DomainWallFermionR Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
TestCGinversions<DomainWallFermionR>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
RealD b=1.5;// Scale factor b+c=2, b-c=1
|
||||
RealD c=0.5;
|
||||
std::cout<<GridLogMessage <<"MobiusFermion test"<<std::endl;
|
||||
MobiusFermionR Dmob(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
||||
TestCGinversions<MobiusFermionR>(Dmob,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
std::cout<<GridLogMessage <<"MobiusZolotarevFermion test"<<std::endl;
|
||||
MobiusZolotarevFermionR Dzolo(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c,0.1,2.0);
|
||||
TestCGinversions<MobiusZolotarevFermionR>(Dzolo,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
std::cout<<GridLogMessage <<"ScaledShamirFermion test"<<std::endl;
|
||||
ScaledShamirFermionR Dsham(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,2.0);
|
||||
TestCGinversions<ScaledShamirFermionR>(Dsham,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
std::cout<<GridLogMessage <<"ShamirZolotarevFermion test"<<std::endl;
|
||||
ShamirZolotarevFermionR Dshamz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
|
||||
TestCGinversions<ShamirZolotarevFermionR>(Dshamz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
std::cout<<GridLogMessage <<"OverlapWilsonCayleyTanhFermion test"<<std::endl;
|
||||
OverlapWilsonCayleyTanhFermionR Dov(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,1.0);
|
||||
TestCGinversions<OverlapWilsonCayleyTanhFermionR>(Dov,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
std::cout<<GridLogMessage <<"OverlapWilsonCayleyZolotarevFermion test"<<std::endl;
|
||||
OverlapWilsonCayleyZolotarevFermionR Dovz(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,0.1,2.0);
|
||||
TestCGinversions<OverlapWilsonCayleyZolotarevFermionR>(Dovz,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,&RNG4,&RNG5);
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
template<class What>
|
||||
void TestCGinversions(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5)
|
||||
{
|
||||
std::cout<<GridLogMessage << "Testing unpreconditioned inverter"<<std::endl;
|
||||
TestCGunprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
|
||||
std::cout<<GridLogMessage << "Testing red black preconditioned inverter"<<std::endl;
|
||||
TestCGprec<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
|
||||
std::cout<<GridLogMessage << "Testing red black Schur inverter"<<std::endl;
|
||||
TestCGschur<What>(Ddwf,FGrid,FrbGrid,UGrid,UrbGrid,mass,M5,RNG4,RNG5);
|
||||
}
|
||||
|
||||
template<class What>
|
||||
void TestCGunprec(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5)
|
||||
{
|
||||
LatticeFermion src (FGrid); random(*RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
|
||||
MdagMLinearOperator<What,LatticeFermion> HermOp(Ddwf);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
|
||||
CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
|
||||
CG(HermOp,src,result);
|
||||
|
||||
}
|
||||
template<class What>
|
||||
void TestCGprec(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5)
|
||||
{
|
||||
LatticeFermion src (FGrid); random(*RNG5,src);
|
||||
LatticeFermion src_o(FrbGrid);
|
||||
LatticeFermion result_o(FrbGrid);
|
||||
pickCheckerboard(Odd,src_o,src);
|
||||
result_o=zero;
|
||||
|
||||
SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
|
||||
CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
|
||||
CG(HermOpEO,src_o,result_o);
|
||||
}
|
||||
|
||||
|
||||
template<class What>
|
||||
void TestCGschur(What & Ddwf,
|
||||
GridCartesian * FGrid, GridRedBlackCartesian * FrbGrid,
|
||||
GridCartesian * UGrid, GridRedBlackCartesian * UrbGrid,
|
||||
RealD mass, RealD M5,
|
||||
GridParallelRNG *RNG4,
|
||||
GridParallelRNG *RNG5)
|
||||
{
|
||||
LatticeFermion src (FGrid); random(*RNG5,src);
|
||||
LatticeFermion result(FGrid); result=zero;
|
||||
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000, ReproducibilityTest);
|
||||
CG.set_reproducibility_interval(REPRODUCIBILITY_INTERVAL);
|
||||
SchurRedBlackDiagMooeeSolve<LatticeFermion> SchurSolver(CG);
|
||||
SchurSolver(Ddwf,src,result);
|
||||
}
|
@ -63,6 +63,9 @@ class HmcRunner : public NerscHmcRunner {
|
||||
Real mass = -0.77;
|
||||
FermionAction FermOp(U, *FGrid, *FrbGrid, mass);
|
||||
|
||||
// To enable the CG reproducibility tests use
|
||||
//ConjugateGradient<FermionField> CG(1.0e-8, 10000, ReproducibilityTest);
|
||||
// This is the plain version
|
||||
ConjugateGradient<FermionField> CG(1.0e-8, 10000);
|
||||
|
||||
TwoFlavourPseudoFermionAction<ImplPolicy> Nf2(FermOp, CG, CG);
|
||||
|
@ -71,7 +71,7 @@ int main (int argc, char ** argv)
|
||||
WilsonFermionR Dw(Umu,Grid,RBGrid,mass);
|
||||
|
||||
MdagMLinearOperator<WilsonFermionR,LatticeFermion> HermOp(Dw);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000);
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-8,10000,true, true);
|
||||
CG(HermOp,src,result);
|
||||
|
||||
Grid_finalize();
|
||||
|
Reference in New Issue
Block a user