First cut at faster GPU slice sum via atomics

Added gauge invariance test
CPU open doesn't need to free space
2025-08-18 12:11:53 +01:00 · 2022-12-22 15:13:45 -05:00 · 2022-12-21 07:23:16 -05:00 · 2022-12-20 05:10:23 -05:00 · 2022-12-20 05:10:23 -05:00 · 2022-12-20 05:10:23 -05:00
6 changed files with 548 additions and 0 deletions
--- a/Grid/lattice/Lattice.h
+++ b/Grid/lattice/Lattice.h
@@ -46,3 +46,4 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 #include <Grid/lattice/Lattice_unary.h>
 #include <Grid/lattice/Lattice_transfer.h>
 #include <Grid/lattice/Lattice_basis.h>
+#include <Grid/lattice/Lattice_crc.h>
--- a/Grid/lattice/Lattice_crc.h
+++ b/Grid/lattice/Lattice_crc.h
@@ -0,0 +1,55 @@
+/*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./lib/lattice/Lattice_crc.h
+
+    Copyright (C) 2021
+
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+*************************************************************************************/
+/*  END LEGAL */
+#pragma once
+
+NAMESPACE_BEGIN(Grid);
+
+template<class vobj> void DumpSliceNorm(std::string s,Lattice<vobj> &f,int mu=-1)
+{
+  auto ff = localNorm2(f);
+  if ( mu==-1 ) mu = f.Grid()->Nd()-1;
+  typedef typename vobj::tensor_reduced normtype;
+  typedef typename normtype::scalar_object scalar;
+  std::vector<scalar> sff;
+  sliceSum(ff,sff,mu);
+  for(int t=0;t<sff.size();t++){
+    std::cout << s<<" "<<t<<" "<<sff[t]<<std::endl;
+  }
+}
+
+template<class vobj> uint32_t crc(Lattice<vobj> & buf)
+{
+  autoView( buf_v , buf, CpuRead);
+  return ::crc32(0L,(unsigned char *)&buf_v[0],(size_t)sizeof(vobj)*buf.oSites());
+}
+
+#define CRC(U) std::cout << "FingerPrint "<<__FILE__ <<" "<< __LINE__ <<" "<< #U <<" "<<crc(U)<<std::endl;
+
+NAMESPACE_END(Grid);
+
+
--- a/Grid/lattice/Lattice_slice_gpu.h
+++ b/Grid/lattice/Lattice_slice_gpu.h
@@ -0,0 +1,126 @@
+NAMESPACE_BEGIN(Grid);
+
+// If NOT CUDA or HIP -- we should provide
+// -- atomicAdd(float *,float)
+// -- atomicAdd(double *,double)
+// 
+// Augment CUDA with complex atomics
+#if !defined(GRID_HIP) || !defined(GRID_CUDA)
+inline void atomicAdd(float *acc,float elem)
+{
+  *acc += elem;
+}
+inline void atomicAdd(double *acc,double elem)
+{
+  *acc += elem;
+}
+#endif
+inline void atomicAdd(ComplexD *accum,ComplexD & elem)
+{
+  double *a_p = (double *)accum;
+  double *e_p = (double *)&elem;
+  for(int w=0;w<2;w++){
+    atomicAdd(&a_p[w],e_p[w]);
+  }
+}
+inline void atomicAdd(ComplexF *accum,ComplexF & elem)
+{
+  float *a_p = (float *)accum;
+  float *e_p = (float *)&elem;
+  for(int w=0;w<2;w++){
+    atomicAdd(&a_p[w],e_p[w]);
+  }
+}
+// Augment CUDA with vobj atomics
+template<class vobj> accelerator_inline void atomicAdd(vobj *accum, vobj & elem)
+{
+  typedef typename vobj::scalar_type scalar_type;
+  scalar_type *a_p= (scalar_type *)accum;
+  scalar_type *e_p= (scalar_type *)& elem;
+  for(int w=0;w<vobj::Nsimd();w++){
+    atomicAdd(&a_p[w],e_p[w]);
+  }
+}
+// Atomics based slice sum
+template<class vobj> inline void sliceSumGpu(const Lattice<vobj> &Data,std::vector<typename vobj::scalar_object> &result,int orthogdim)
+{
+  typedef typename vobj::scalar_object sobj;
+  typedef typename vobj::scalar_object::scalar_type scalar_type;
+  GridBase  *grid = Data.Grid();
+  assert(grid!=NULL);
+
+  const int    Nd = grid->_ndimension;
+  const int Nsimd = grid->Nsimd();
+
+  assert(orthogdim >= 0);
+  assert(orthogdim < Nd);
+
+  int fd=grid->_fdimensions[orthogdim];
+  int ld=grid->_ldimensions[orthogdim];
+  int rd=grid->_rdimensions[orthogdim];
+
+  // Move to device memory and copy in / out
+  Vector<vobj> lvSum(rd); // will locally sum vectors first
+  Vector<sobj> lsSum(ld,Zero());                    // sum across these down to scalars
+  ExtractBuffer<sobj> extracted(Nsimd);                  // splitting the SIMD
+
+  result.resize(fd); // And then global sum to return the same vector to every node 
+  for(int r=0;r<rd;r++){
+    lvSum[r]=Zero();
+  }
+
+  int e1=    grid->_slice_nblock[orthogdim];
+  int e2=    grid->_slice_block [orthogdim];
+  int stride=grid->_slice_stride[orthogdim];
+
+  // sum over reduced dimension planes, breaking out orthog dir
+  // Parallel over orthog direction
+  autoView( Data_v, Data, AcceleratorRead);
+  auto lvSum_p=&lvSum[0];
+  int ostride = grid->_ostride[orthogdim]; 
+  accelerator_for( ree,rd*e1*e2,1, {
+    int b = ree%e2;
+    int re= ree/e2;
+    int n=re%e1;
+    int r=re/e1;
+    int so=r*ostride;
+    int ss=so+n*stride+b;
+    atomicAdd(&lvSum_p[r],Data_v[ss]);
+  });
+
+  // Sum across simd lanes in the plane, breaking out orthog dir.
+  Coordinate icoor(Nd);
+
+  for(int rt=0;rt<rd;rt++){
+
+    extract(lvSum[rt],extracted);
+
+    for(int idx=0;idx<Nsimd;idx++){
+
+      grid->iCoorFromIindex(icoor,idx);
+
+      int ldx =rt+icoor[orthogdim]*rd;
+
+      lsSum[ldx]=lsSum[ldx]+extracted[idx];
+
+    }
+  }
+  
+  // sum over nodes.
+  for(int t=0;t<fd;t++){
+    int pt = t/ld; // processor plane
+    int lt = t%ld;
+    if ( pt == grid->_processor_coor[orthogdim] ) {
+      result[t]=lsSum[lt];
+    } else {
+      result[t]=Zero();
+    }
+
+  }
+  scalar_type * ptr = (scalar_type *) &result[0];
+  int words = fd*sizeof(sobj)/sizeof(scalar_type);
+  grid->GlobalSumVector(ptr, words);
+}
+
+
+NAMESPACE_END(Grid);
--- a/Grid/qcd/QCD.h
+++ b/Grid/qcd/QCD.h
@@ -451,9 +451,20 @@ template<class vobj> void pokeLorentz(vobj &lhs,const decltype(peekIndex<Lorentz
 // Fermion <-> propagator assignements
 //////////////////////////////////////////////
 //template <class Prop, class Ferm>
+#define FAST_FERM_TO_PROP
 template <class Fimpl>
 void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::FermionField &f, const int s, const int c)
 {
+#ifdef FAST_FERM_TO_PROP
+  autoView(p_v,p,CpuWrite);
+  autoView(f_v,f,CpuRead);
+  thread_for(idx,p_v.oSites(),{
+      for(int ss = 0; ss < Ns; ++ss) {
+      for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
+	p_v[idx]()(ss,s)(cc,c) = f_v[idx]()(ss)(cc); // Propagator sink index is LEFT, suitable for left mult by gauge link (e.g.)
+      }}
+    });
+#else
  for(int j = 0; j < Ns; ++j)
    {
      auto pjs = peekSpin(p, j, s);
@@ -465,12 +476,23 @@ void FermToProp(typename Fimpl::PropagatorField &p, const typename Fimpl::Fermio
 	}
      pokeSpin(p, pjs, j, s);
    }
+#endif
 }
    
 //template <class Prop, class Ferm>
 template <class Fimpl>
 void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::PropagatorField &p, const int s, const int c)
 {
+#ifdef FAST_FERM_TO_PROP
+  autoView(p_v,p,CpuRead);
+  autoView(f_v,f,CpuWrite);
+  thread_for(idx,p_v.oSites(),{
+      for(int ss = 0; ss < Ns; ++ss) {
+      for(int cc = 0; cc < Fimpl::Dimension; ++cc) {
+	f_v[idx]()(ss)(cc) = p_v[idx]()(ss,s)(cc,c); // LEFT index is copied across for s,c right index
+      }}
+    });
+#else
  for(int j = 0; j < Ns; ++j)
    {
      auto pjs = peekSpin(p, j, s);
@@ -482,6 +504,7 @@ void PropToFerm(typename Fimpl::FermionField &f, const typename Fimpl::Propagato
 	}
      pokeSpin(f, fj, j);
    }
+#endif
 }
    
 //////////////////////////////////////////////
--- a/tests/core/Test_fft_matt.cc
+++ b/tests/core/Test_fft_matt.cc
@@ -0,0 +1,270 @@
+    /*************************************************************************************
+    grid` physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_cshift.cc
+
+    Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+
+using namespace Grid;
+
+Gamma::Algebra Gmu [] = {
+  Gamma::Algebra::GammaX,
+  Gamma::Algebra::GammaY,
+  Gamma::Algebra::GammaZ,
+  Gamma::Algebra::GammaT,
+  Gamma::Algebra::Gamma5
+};
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int threads = GridThread::GetThreads();
+  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
+
+  Coordinate latt_size   = GridDefaultLatt();
+  Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
+  Coordinate mpi_layout  = GridDefaultMpi();
+
+  int vol = 1;
+  for(int d=0;d<latt_size.size();d++){
+    vol = vol * latt_size[d];
+  }
+  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
+  GridRedBlackCartesian RBGRID(&GRID);
+
+  LatticeComplexD    coor(&GRID);
+  ComplexD ci(0.0,1.0);
+
+  std::vector<int> seeds({1,2,3,4});
+  GridSerialRNG          sRNG;  sRNG.SeedFixedIntegers(seeds); // naughty seeding
+  GridParallelRNG          pRNG(&GRID);
+  pRNG.SeedFixedIntegers(seeds);
+
+  LatticeGaugeFieldD Umu(&GRID);
+  SU<Nc>::ColdConfiguration(pRNG,Umu); // Unit gauge
+
+  ////////////////////////////////////////////////////
+  // Wilson test
+  ////////////////////////////////////////////////////
+  {
+    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
+    LatticeFermionD    src_p(&GRID);
+    LatticeFermionD    tmp(&GRID);
+    LatticeFermionD    ref(&GRID);
+    LatticeFermionD    result(&GRID);
+    
+    RealD mass=0.1;
+    WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
+    
+    Dw.M(src,ref);
+    std::cout << "Norm src "<<norm2(src)<<std::endl;
+    std::cout << "Norm Dw x src "<<norm2(ref)<<std::endl;
+    {
+      FFT theFFT(&GRID);
+
+      ////////////////
+      // operator in Fourier space
+      ////////////////
+      tmp =ref;
+      theFFT.FFT_all_dim(result,tmp,FFT::forward);
+      std::cout<<"FFT[ Dw x src ]  "<< norm2(result)<<std::endl;    
+
+      tmp = src;
+      theFFT.FFT_all_dim(src_p,tmp,FFT::forward);
+      std::cout<<"FFT[ src      ]  "<< norm2(src_p)<<std::endl;
+      
+      /////////////////////////////////////////////////////////////////
+      // work out the predicted FT from Fourier
+      /////////////////////////////////////////////////////////////////
+      auto FGrid = &GRID;
+      LatticeFermionD    Kinetic(FGrid); Kinetic = Zero();
+      LatticeComplexD    kmu(FGrid); 
+      LatticeInteger     scoor(FGrid); 
+      LatticeComplexD    sk (FGrid); sk = Zero();
+      LatticeComplexD    sk2(FGrid); sk2= Zero();
+      LatticeComplexD    W(FGrid); W= Zero();
+      LatticeComplexD    one(FGrid); one =ComplexD(1.0,0.0);
+      ComplexD ci(0.0,1.0);
+    
+      for(int mu=0;mu<Nd;mu++) {
+	
+	RealD TwoPiL =  M_PI * 2.0/ latt_size[mu];
+
+	LatticeCoordinate(kmu,mu);
+
+	kmu = TwoPiL * kmu;
+      
+	sk2 = sk2 + 2.0*sin(kmu*0.5)*sin(kmu*0.5);
+	sk  = sk  +     sin(kmu)    *sin(kmu); 
+      
+	// -1/2 Dw ->  1/2 gmu (eip - emip) = i sinp gmu
+	Kinetic = Kinetic + sin(kmu)*ci*(Gamma(Gmu[mu])*src_p);
+	
+      }
+    
+      W = mass + sk2; 
+      Kinetic = Kinetic + W * src_p;
+    
+      std::cout<<"Momentum space src         "<< norm2(src_p)<<std::endl;
+      std::cout<<"Momentum space Dw x src    "<< norm2(Kinetic)<<std::endl;
+      std::cout<<"FT[Coordinate space Dw]    "<< norm2(result)<<std::endl;
+    
+      result = result - Kinetic;
+      std::cout<<"diff "<< norm2(result)<<std::endl;
+      
+    }
+
+    std::cout << " =======================================" <<std::endl;
+    std::cout << " Checking FourierFreePropagator x Dw = 1" <<std::endl;
+    std::cout << " =======================================" <<std::endl;
+    std::cout << "Dw src = " <<norm2(src)<<std::endl;
+    std::cout << "Dw tmp = " <<norm2(tmp)<<std::endl;
+    Dw.M(src,tmp);
+    Dw.FreePropagator(tmp,ref,mass);
+
+    std::cout << "Dw ref = " <<norm2(ref)<<std::endl;
+    
+    ref = ref - src;
+    
+    std::cout << "Dw ref-src = " <<norm2(ref)<<std::endl;
+  }
+
+
+  ////////////////////////////////////////////////////
+  // Wilson prop
+  ////////////////////////////////////////////////////
+  {
+    std::cout<<"****************************************"<<std::endl;
+    std::cout << "Wilson Mom space 4d propagator \n";
+    std::cout<<"****************************************"<<std::endl;
+
+    LatticeFermionD    src(&GRID); gaussian(pRNG,src);
+    LatticeFermionD    tmp(&GRID);
+    LatticeFermionD    ref(&GRID);
+    LatticeFermionD    diff(&GRID);
+
+    src=Zero();
+    Coordinate point(4,0); // 0,0,0,0
+    SpinColourVectorD ferm;
+    ferm=Zero();
+    ferm()(0)(0) = ComplexD(1.0);
+    pokeSite(ferm,src,point);
+
+    RealD mass=0.1;
+    WilsonFermionD Dw(Umu,GRID,RBGRID,mass);
+
+    // Momentum space prop
+    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
+    Dw.FreePropagator(src,ref,mass) ;
+
+    Gamma G5(Gamma::Algebra::Gamma5);
+
+    LatticeFermionD    result(&GRID); 
+    const int sdir=0;
+    
+    ////////////////////////////////////////////////////////////////////////
+    // Conjugate gradient on normal equations system
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
+    Dw.Mdag(src,tmp);
+    src=tmp;
+    MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
+    ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
+    CG(HermOp,src,result);
+    
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Taking difference" <<std::endl;
+    std::cout << "Dw result "<<norm2(result)<<std::endl;
+    std::cout << "Dw ref     "<<norm2(ref)<<std::endl;
+    
+    diff = ref - result;
+    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
+
+    DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
+  }
+
+  ////////////////////////////////////////////////////
+  //Gauge invariance test
+  ////////////////////////////////////////////////////
+  {
+    std::cout<<"****************************************"<<std::endl;
+    std::cout << "Gauge invariance test \n";
+    std::cout<<"****************************************"<<std::endl;
+    LatticeGaugeField     U_GT(&GRID); // Gauge transformed field
+    LatticeColourMatrix   g(&GRID);    // local Gauge xform matrix
+    U_GT = Umu;
+    // Make a random xform to teh gauge field
+    SU<Nc>::RandomGaugeTransform(pRNG,U_GT,g); // Unit gauge
+
+    LatticeFermionD    src(&GRID);
+    LatticeFermionD    tmp(&GRID);
+    LatticeFermionD    ref(&GRID);
+    LatticeFermionD    diff(&GRID);
+
+    // could loop over colors
+    src=Zero();
+    Coordinate point(4,0); // 0,0,0,0
+    SpinColourVectorD ferm;
+    ferm=Zero();
+    ferm()(0)(0) = ComplexD(1.0);
+    pokeSite(ferm,src,point);
+
+    RealD mass=0.1;
+    WilsonFermionD Dw(U_GT,GRID,RBGRID,mass);
+
+    // Momentum space prop
+    std::cout << " Solving by FFT and Feynman rules" <<std::endl;
+    Dw.FreePropagator(src,ref,mass) ;
+
+    Gamma G5(Gamma::Algebra::Gamma5);
+
+    LatticeFermionD    result(&GRID); 
+    const int sdir=0;
+    
+    ////////////////////////////////////////////////////////////////////////
+    // Conjugate gradient on normal equations system
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Solving by Conjugate Gradient (CGNE)" <<std::endl;
+    Dw.Mdag(src,tmp);
+    src=tmp;
+    MdagMLinearOperator<WilsonFermionD,LatticeFermionD> HermOp(Dw);
+    ConjugateGradient<LatticeFermionD> CG(1.0e-10,10000);
+    CG(HermOp,src,result);
+    
+    ////////////////////////////////////////////////////////////////////////
+    std::cout << " Taking difference" <<std::endl;
+    std::cout << "Dw result "<<norm2(result)<<std::endl;
+    std::cout << "Dw ref     "<<norm2(ref)<<std::endl;
+    
+    diff = ref - result;
+    std::cout << "result - ref     "<<norm2(diff)<<std::endl;
+
+    DumpSliceNorm("Slice Norm Solution ",result,Nd-1);
+  }
+  
+  
+  Grid_finalize();
+}
--- a/tests/core/Test_slicesum.cc
+++ b/tests/core/Test_slicesum.cc
@@ -0,0 +1,73 @@
+    /*************************************************************************************
+
+    Grid physics library, www.github.com/paboyle/Grid 
+
+    Source file: ./tests/Test_poisson_fft.cc
+
+    Copyright (C) 2015
+
+Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
+Author: Peter Boyle <paboyle@ph.ed.ac.uk>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+    See the full license in the file "LICENSE" in the top level distribution directory
+    *************************************************************************************/
+    /*  END LEGAL */
+#include <Grid/Grid.h>
+#include <Grid/lattice/Lattice_slice_gpu.h>
+
+using namespace Grid;
+
+int main (int argc, char ** argv)
+{
+  Grid_init(&argc,&argv);
+
+  int N=16;
+  std::vector<int> latt_size  ({N,N,N,N});
+  std::vector<int> simd_layout({vComplexD::Nsimd(),1,1,1});
+  std::vector<int> mpi_layout ({1,1,1,1});
+
+  GridCartesian         GRID(latt_size,simd_layout,mpi_layout);
+
+  LatticeComplexD  rn(&GRID);
+
+  GridParallelRNG RNG(&GRID);
+  RNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));  
+  gaussian(RNG,rn);
+
+  std::vector<TComplex> reduced_ref;
+  std::vector<TComplex> reduced_gpu;
+  for(int d=0;d<4;d++){
+    {
+      RealD t=-usecond();
+      sliceSum(rn,reduced_ref,d);
+      t+=usecond();
+      std::cout << " sliceSum took "<<t<<" usecs"<<std::endl;
+    }
+    {
+      RealD t=-usecond();
+      sliceSumGpu(rn,reduced_gpu,d);
+      t+=usecond();
+      std::cout << " sliceSumGpu took "<<t<<" usecs"<<std::endl;
+    }
+    for(int t=0;t<reduced_ref.size();t++){
+      std::cout << t<<" ref "<< reduced_ref[t] <<" opt " << reduced_gpu[t] << " diff "<<reduced_ref[t]-reduced_gpu[t]<<std::endl;
+      TComplex diff = reduced_ref[t]-reduced_gpu[t];
+      assert(abs(TensorRemove(diff)) < 1e-8 );
+    }
+  }
+  Grid_finalize();
+}
Author	SHA1	Message	Date
Peter Boyle	8b91b61b61	First cut at faster GPU slice sum via atomics	2022-12-22 15:13:45 -05:00
Peter Boyle	4ca1bf7cca	Added gauge invariance test	2022-12-21 07:23:16 -05:00
Peter Boyle	2ff868f7a5	CPU open doesn't need to free space	2022-12-20 05:10:23 -05:00
Peter Boyle	ede02b6883	Memory manager debug Felix case	2022-12-20 05:10:23 -05:00
Peter Boyle	1822ced302	Bug fix	2022-12-20 05:10:23 -05:00
Peter Boyle	37ba32776f	More logging	2022-12-20 05:10:23 -05:00
Peter Boyle	99b3697b03	More loggin	2022-12-20 05:10:23 -05:00
Peter Boyle	43a45ec97b	SSC_START	2022-12-20 05:10:23 -05:00
Peter Boyle	b00a4142e5	A=A fix	2022-12-20 05:10:23 -05:00
Peter Boyle	3791bc527b	Logging pulled in from dirichlet branch	2022-12-20 05:10:23 -05:00
Peter Boyle	d8c29f5fcf	Updated FFT test for PETSc	2022-12-18 12:05:00 -05:00
Peter Boyle	281f8101fe	Matt FFT test	2022-12-17 20:35:33 -05:00
Peter Boyle	07acfe89f2	Merge pull request #417 from rrhodgson/feature/fermtoprop Feature/fermtoprop	2022-12-06 12:45:03 -05:00
Raoul Hodgson	40234f531f	FermToProp accelerator_for -> thread_for	2022-12-06 17:34:51 +00:00
Raoul Hodgson	d49694f38f	PropToFerm fix	2022-12-06 15:48:54 +00:00
Peter Boyle	97a098636d	FermToProp	2022-11-30 15:36:35 -05:00
Peter Boyle	e13930c8b2	Faster fermtoprop case	2022-11-30 15:11:29 -05:00