mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	OMP dslash working
This commit is contained in:
		
							
								
								
									
										77
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										77
									
								
								TODO
									
									
									
									
									
								
							@@ -8,60 +8,39 @@
 | 
			
		||||
 | 
			
		||||
* const audit
 | 
			
		||||
 | 
			
		||||
Insert/Extract
 | 
			
		||||
* Replace vset with a call to merge.; 
 | 
			
		||||
* care in Gmerge,Gextract over vset .
 | 
			
		||||
* extract / merge extra implementation removal      
 | 
			
		||||
 | 
			
		||||
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
 | 
			
		||||
* Optimise the extract/merge SIMD routines; Azusa??
 | 
			
		||||
 - I have collated into single location at least.
 | 
			
		||||
 - Need to use _mm_*insert/extract routines.
 | 
			
		||||
 | 
			
		||||
* Thread scaling tests Xeon, XeonPhi
 | 
			
		||||
 | 
			
		||||
================================================================
 | 
			
		||||
*** New Functionality
 | 
			
		||||
================================================================
 | 
			
		||||
 | 
			
		||||
* Implement where within expression template scheme.
 | 
			
		||||
 | 
			
		||||
* - BinaryWriter, TextWriter etc...
 | 
			
		||||
  - use protocol buffers? replace xmlReader/Writer ec..
 | 
			
		||||
  - Binary use htonll, htonl
 | 
			
		||||
 | 
			
		||||
* Expression template engine: -- DONE
 | 
			
		||||
   -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
 | 
			
		||||
 | 
			
		||||
* CovariantShift support                             -----Use a class to store gauge field? (parallel transport?)
 | 
			
		||||
 | 
			
		||||
** Make the Tensor types and Complex etc... play more nicely.
 | 
			
		||||
  - TensorRemove is a hack, come up with a long term rationalised approach to Complex vs. Scalar<Scalar<Scalar<Complex > > >
 | 
			
		||||
 | 
			
		||||
  QDP forces use of "toDouble" to get back to non tensor scalar. This role is presently taken TensorRemove, but I
 | 
			
		||||
  want to introduce a syntax that does not require this.
 | 
			
		||||
 | 
			
		||||
  - Reductions that contract indices on a site should always demote the tensor structure.
 | 
			
		||||
    norm2(), innerProduct.
 | 
			
		||||
 | 
			
		||||
  - Result of Sum(), SliceSum // spatial sums
 | 
			
		||||
              trace, traceIndex etc.. do not.
 | 
			
		||||
 | 
			
		||||
  - problem arises because "trace" returns Lattice<TComplex> moving everything down to Scalar,
 | 
			
		||||
    and then Sum and SliceSum to not remove the Scalars. This would be fixed if we 
 | 
			
		||||
    template specialize the scalar scalar scalar sum and SliceSum,  on the basis of being
 | 
			
		||||
    pure scalar.
 | 
			
		||||
 | 
			
		||||
* Optimise the extract/merge SIMD routines; Azusa??
 | 
			
		||||
 - I have collated into single location at least.
 | 
			
		||||
 - Need to use _mm_*insert/extract routines.
 | 
			
		||||
 | 
			
		||||
* Flavour matrices?
 | 
			
		||||
* Pauli, SU subgroup, etc.. 
 | 
			
		||||
* su3 exponentiation & log etc.. [Jamie's code?]
 | 
			
		||||
* TaProj
 | 
			
		||||
* FFTnD ?
 | 
			
		||||
 | 
			
		||||
* Parallel io improvements
 | 
			
		||||
  - optional parallel MPI2 IO
 | 
			
		||||
  - move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
 | 
			
		||||
  
 | 
			
		||||
* rb4d support for 5th dimension in Mobius.
 | 
			
		||||
*** Expression template engine: -- DONE
 | 
			
		||||
 | 
			
		||||
[   -- Norm2(expression) problem: introduce norm2 unary op, or Introduce conversion automatic from expression to Lattice<vobj>
 | 
			
		||||
* Strong test for norm2, conj and all primitive types. -- tests/Grid_simd.cc is almost there
 | 
			
		||||
* Implement where within expression template scheme.
 | 
			
		||||
* Check for missing functionality                    - partially audited against QDP++ layout
 | 
			
		||||
 | 
			
		||||
   // Unary functions
 | 
			
		||||
   // cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
 | 
			
		||||
   // exp, log, sqrt, fabs
 | 
			
		||||
@@ -69,7 +48,21 @@
 | 
			
		||||
   // adjColor, adjSpin,
 | 
			
		||||
   // copyMask.
 | 
			
		||||
   // localMaxAbs
 | 
			
		||||
   // Fourier transform equivalent.
 | 
			
		||||
   // Fourier transform equivalent.]
 | 
			
		||||
 | 
			
		||||
================================================================
 | 
			
		||||
*** New Functionality
 | 
			
		||||
================================================================
 | 
			
		||||
 | 
			
		||||
* - BinaryWriter, TextWriter etc...
 | 
			
		||||
  - use protocol buffers? replace xmlReader/Writer ec..
 | 
			
		||||
  - Binary use htonll, htonl
 | 
			
		||||
 | 
			
		||||
* CovariantShift support                             -----Use a class to store gauge field? (parallel transport?)
 | 
			
		||||
 | 
			
		||||
* Parallel io improvements
 | 
			
		||||
  - optional parallel MPI2 IO
 | 
			
		||||
  - move Plaquette and link trace checks into nersc reader from the Grid_nersc_io.cc test.
 | 
			
		||||
 | 
			
		||||
Actions -- coherent framework for implementing actions and their forces.
 | 
			
		||||
 | 
			
		||||
@@ -80,9 +73,6 @@ Actions -- coherent framework for implementing actions and their forces.
 | 
			
		||||
  - Mobius
 | 
			
		||||
  - z-Mobius
 | 
			
		||||
 | 
			
		||||
* Gauge
 | 
			
		||||
  - Wilson, symanzik, iwasaki
 | 
			
		||||
 | 
			
		||||
Algorithms (lots of reuse/port from BFM)
 | 
			
		||||
* LinearOperator
 | 
			
		||||
* LinearSolver
 | 
			
		||||
@@ -97,8 +87,21 @@ Algorithms (lots of reuse/port from BFM)
 | 
			
		||||
* HDCG
 | 
			
		||||
* HMC, 
 | 
			
		||||
* Heatbath
 | 
			
		||||
* Integrators, leapfrog, omelyan, force gradient etc...
 | 
			
		||||
* etc..
 | 
			
		||||
 | 
			
		||||
* Gauge
 | 
			
		||||
  - Wilson, symanzik, iwasaki
 | 
			
		||||
 | 
			
		||||
* rb4d support for 5th dimension in Mobius.
 | 
			
		||||
 | 
			
		||||
* Flavour matrices?
 | 
			
		||||
* Pauli, SU subgroup, etc.. 
 | 
			
		||||
* su3 exponentiation & log etc.. [Jamie's code?]
 | 
			
		||||
* TaProj
 | 
			
		||||
* FFTnD ?
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
======================================================================================================
 | 
			
		||||
FUNCTIONALITY: it pleases me to keep track of things I have done (keeps me arguably sane)
 | 
			
		||||
======================================================================================================
 | 
			
		||||
 
 | 
			
		||||
@@ -80,7 +80,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  int ncall=1000;
 | 
			
		||||
  double t0=usecond();
 | 
			
		||||
  for(int i=0;i<ncall;i++){
 | 
			
		||||
    Dw.multiply(src,result);
 | 
			
		||||
    Dw.M(src,result);
 | 
			
		||||
  }
 | 
			
		||||
  double t1=usecond();
 | 
			
		||||
  double flops=1320*volume*ncall;
 | 
			
		||||
 
 | 
			
		||||
@@ -85,7 +85,17 @@ void WilsonMatrix::DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeF
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void WilsonMatrix::multiply(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
void WilsonMatrix::M(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
{
 | 
			
		||||
  Dhop(in,out);
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
void WilsonMatrix::Mdag(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
{
 | 
			
		||||
  Dhop(in,out);
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
void WilsonMatrix::MdagM(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
{
 | 
			
		||||
  Dhop(in,out);
 | 
			
		||||
  return;
 | 
			
		||||
@@ -96,18 +106,18 @@ void WilsonMatrix::Dhop(const LatticeFermion &in, LatticeFermion &out)
 | 
			
		||||
  WilsonCompressor compressor;
 | 
			
		||||
  Stencil.HaloExchange<vSpinColourVector,vHalfSpinColourVector,WilsonCompressor>(in,comm_buf,compressor);
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int sss=0;sss<grid->oSites();sss++){
 | 
			
		||||
 | 
			
		||||
    vHalfSpinColourVector  tmp;    
 | 
			
		||||
    vHalfSpinColourVector  chi;    
 | 
			
		||||
    vSpinColourVector result;
 | 
			
		||||
    vHalfSpinColourVector Uchi;
 | 
			
		||||
    int offset,local,perm, ptype;
 | 
			
		||||
 | 
			
		||||
PARALLEL_FOR_LOOP
 | 
			
		||||
  for(int sss=0;sss<grid->oSites();sss++){
 | 
			
		||||
 | 
			
		||||
    //    int ss = Stencil._LebesgueReorder[sss];
 | 
			
		||||
    int ss = sss;
 | 
			
		||||
    int ssu= ss;
 | 
			
		||||
    //    int ss = Stencil._LebesgueReorder[sss];
 | 
			
		||||
 | 
			
		||||
    // Xp
 | 
			
		||||
    offset = Stencil._offsets [Xp][ss];
 | 
			
		||||
 
 | 
			
		||||
@@ -3,16 +3,12 @@
 | 
			
		||||
 | 
			
		||||
#include <Grid.h>
 | 
			
		||||
 | 
			
		||||
#include <algorithms/LinearOperator.h>
 | 
			
		||||
 | 
			
		||||
namespace Grid {
 | 
			
		||||
 | 
			
		||||
  namespace QCD {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    template<class vtype> class LinearOperatorBase {
 | 
			
		||||
    public:
 | 
			
		||||
      void multiply(const Lattice<vtype> &in, Lattice<vtype> &out){ assert(0);}
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    class WilsonMatrix : public LinearOperatorBase<LatticeFermion>
 | 
			
		||||
    {
 | 
			
		||||
      //NB r=1;
 | 
			
		||||
@@ -40,7 +36,9 @@ namespace Grid {
 | 
			
		||||
      void DoubleStore(LatticeDoubledGaugeField &Uds,const LatticeGaugeField &Umu);
 | 
			
		||||
 | 
			
		||||
      // override multiply
 | 
			
		||||
      void multiply(const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
      virtual void M    (const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
      virtual void Mdag (const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
      virtual void MdagM(const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
 | 
			
		||||
      // non-hermitian hopping term; half cb or both
 | 
			
		||||
      void Dhop(const LatticeFermion &in, LatticeFermion &out);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user