mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-03 21:44:33 +00:00 
			
		
		
		
	merge
This commit is contained in:
		@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-06-09 14:26:59 neo>
 | 
			
		||||
// Time-stamp: <2015-06-16 23:30:41 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <immintrin.h>
 | 
			
		||||
@@ -248,7 +248,7 @@ namespace Optimization {
 | 
			
		||||
      return _mm256_set_m128i(a1,a0);
 | 
			
		||||
#endif
 | 
			
		||||
#if defined (AVX2)
 | 
			
		||||
      return _mm256_mul_epi32(a,b);
 | 
			
		||||
      return _mm256_mullo_epi32(a,b);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
 | 
			
		||||
  Using intrinsics
 | 
			
		||||
*/
 | 
			
		||||
// Time-stamp: <2015-06-09 14:24:01 neo>
 | 
			
		||||
// Time-stamp: <2015-06-16 23:27:54 neo>
 | 
			
		||||
//----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
#include <pmmintrin.h>
 | 
			
		||||
@@ -97,7 +97,7 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline __m128i operator()(Integer *a){
 | 
			
		||||
      return _mm_set_epi32(a[0],a[1],a[2],a[3]);
 | 
			
		||||
      return _mm_set_epi32(a[3],a[2],a[1],a[0]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -181,7 +181,7 @@ namespace Optimization {
 | 
			
		||||
    }
 | 
			
		||||
    // Integer
 | 
			
		||||
    inline __m128i operator()(__m128i a, __m128i b){
 | 
			
		||||
      return _mm_mul_epi32(a,b);
 | 
			
		||||
      return _mm_mullo_epi32(a,b);
 | 
			
		||||
    }
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -63,26 +63,70 @@ namespace Grid {
 | 
			
		||||
      for(int c1=0;c1<N;c1++){
 | 
			
		||||
	nrm = 0.0; 
 | 
			
		||||
	for(int c2=0;c2<N;c2++)
 | 
			
		||||
	  nrm = real(innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]));
 | 
			
		||||
	  nrm += real(innerProduct(ret._internal[c1][c2],ret._internal[c1][c2]));
 | 
			
		||||
	nrm = 1.0/sqrt(nrm);
 | 
			
		||||
	std::cout << "norm : "<< nrm << "\n";
 | 
			
		||||
	for(int c2=0;c2<N;c2++)
 | 
			
		||||
	  ret._internal[c1][c2]*= nrm;
 | 
			
		||||
      
 | 
			
		||||
	for (int b=c1+1; b<N; ++b){
 | 
			
		||||
	  decltype(ret._internal[b][b]*ret._internal[b][b]) pr = 0.0;
 | 
			
		||||
	  for(int c=0; c<N; ++c)
 | 
			
		||||
	    pr += ret._internal[c1][c]*ret._internal[b][c];
 | 
			
		||||
	    pr += conjugate(ret._internal[c1][c])*ret._internal[b][c];
 | 
			
		||||
	  
 | 
			
		||||
	  std::cout << "pr : "<< pr << "\n";
 | 
			
		||||
	  for(int c=0; c<N; ++c){
 | 
			
		||||
	    ret._internal[b][c] -= pr * ret._internal[c1][c];
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	  
 | 
			
		||||
      }
 | 
			
		||||
      // assuming the determinant is ok
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////// 
 | 
			
		||||
  // Determinant function for scalar, vector, matrix
 | 
			
		||||
  /////////////////////////////////////////////// 
 | 
			
		||||
  inline ComplexF Determinant( const ComplexF &arg){    return arg;}
 | 
			
		||||
  inline ComplexD Determinant( const ComplexD &arg){    return arg;}
 | 
			
		||||
  inline RealF Determinant( const RealF &arg){    return arg;}
 | 
			
		||||
  inline RealD Determinant( const RealD &arg){    return arg;}
 | 
			
		||||
 | 
			
		||||
  template<class vtype> inline auto Determinant(const iScalar<vtype>&r) -> iScalar<decltype(Determinant(r._internal))>
 | 
			
		||||
    {
 | 
			
		||||
      iScalar<decltype(Determinant(r._internal))> ret;
 | 
			
		||||
      ret._internal = Determinant(r._internal);
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr> 
 | 
			
		||||
    inline iScalar<vtype> Determinant(const iMatrix<vtype,N> &arg)
 | 
			
		||||
    {
 | 
			
		||||
      iMatrix<vtype,N> ret(arg);
 | 
			
		||||
      iScalar<vtype> det = vtype(1.0);
 | 
			
		||||
      /* Conversion of matrix to upper triangular */
 | 
			
		||||
      for(int i = 0; i < N; i++){
 | 
			
		||||
        for(int j = 0; j < N; j++){
 | 
			
		||||
	  if(j>i){
 | 
			
		||||
	    vtype ratio = ret._internal[j][i]/ret._internal[i][i];
 | 
			
		||||
	    for(int k = 0; k < N; k++){
 | 
			
		||||
	      ret._internal[j][k] -= ratio * ret._internal[i][k];
 | 
			
		||||
	    }
 | 
			
		||||
	  }
 | 
			
		||||
        }
 | 
			
		||||
      }      
 | 
			
		||||
 | 
			
		||||
      for(int i = 0; i < N; i++)
 | 
			
		||||
	det *= ret._internal[i][i];   
 | 
			
		||||
 | 
			
		||||
      return det;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /////////////////////////////////////////////// 
 | 
			
		||||
  // Exponentiate function for scalar, vector, matrix
 | 
			
		||||
  /////////////////////////////////////////////// 
 | 
			
		||||
 
 | 
			
		||||
@@ -57,7 +57,7 @@ inline void extract(typename std::enable_if<!isGridTensor<vsimd>::value, const v
 | 
			
		||||
    extracted[i]=buf[i*s];
 | 
			
		||||
    for(int ii=1;ii<s;ii++){
 | 
			
		||||
      if ( buf[i*s]!=buf[i*s+ii] ){
 | 
			
		||||
	std::cout << " SIMD extract failure splat="<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
 | 
			
		||||
	std::cout << " SIMD extract failure splat = "<<s<<" ii "<<ii<<" " <<Nextr<<" "<< Nsimd<<" "<<std::endl;
 | 
			
		||||
	for(int vv=0;vv<Nsimd;vv++) {
 | 
			
		||||
	  std::cout<< buf[vv]<<" ";
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
 | 
			
		||||
bin_PROGRAMS = Test_GaugeAction Test_Metropolis Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cg Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd
 | 
			
		||||
bin_PROGRAMS = Test_GaugeAction Test_Metropolis Test_cayley_cg Test_cayley_coarsen_support Test_cayley_even_odd Test_cayley_ldop_cg Test_cayley_ldop_cr Test_cf_coarsen_support Test_cf_cr_unprec Test_contfrac_cg Test_contfrac_even_odd Test_cshift Test_cshift_red_black Test_dwf_cg_prec Test_dwf_cg_schur Test_dwf_cg_unprec Test_dwf_cr_unprec Test_dwf_even_odd Test_gamma Test_lie_generators Test_main Test_multishift_sqrt Test_nersc_io Test_quenched_update Test_remez Test_rng Test_rng_fixed Test_simd Test_stencil Test_wilson_cg_prec Test_wilson_cg_schur Test_wilson_cg_unprec Test_wilson_cr_unprec Test_wilson_even_odd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_GaugeAction_SOURCES=Test_GaugeAction.cc
 | 
			
		||||
@@ -94,6 +94,10 @@ Test_nersc_io_SOURCES=Test_nersc_io.cc
 | 
			
		||||
Test_nersc_io_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_quenched_update_SOURCES=Test_quenched_update.cc
 | 
			
		||||
Test_quenched_update_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Test_remez_SOURCES=Test_remez.cc
 | 
			
		||||
Test_remez_LDADD=-lGrid
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,5 @@
 | 
			
		||||
#include "Grid.h"
 | 
			
		||||
 | 
			
		||||
//DEBUG
 | 
			
		||||
#ifdef SSE4
 | 
			
		||||
#include "simd/Grid_vector_types.h"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
@@ -216,16 +212,27 @@ int main (int argc, char ** argv)
 | 
			
		||||
    scm=transposeIndex<1>(scm);
 | 
			
		||||
    
 | 
			
		||||
   
 | 
			
		||||
    //random(SerialRNG, cm);
 | 
			
		||||
    random(SerialRNG, cm);
 | 
			
		||||
    std::cout << cm << std::endl;
 | 
			
		||||
 | 
			
		||||
    //cm = Ta(cm);
 | 
			
		||||
    //TComplex tracecm= trace(cm);      
 | 
			
		||||
    //std::cout << cm << std::endl;
 | 
			
		||||
 | 
			
		||||
    cm = Ta(cm);
 | 
			
		||||
    //TComplex tracecm= trace(cm);      
 | 
			
		||||
    //std::cout << cm << "  "<< tracecm << std::endl;
 | 
			
		||||
 | 
			
		||||
    cm = ProjectOnGroup(cm);
 | 
			
		||||
    std::cout << cm << "  " << std::endl;
 | 
			
		||||
    cm = ProjectOnGroup(cm);
 | 
			
		||||
    std::cout << cm << "  " << std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    TComplex det = Determinant(cm);
 | 
			
		||||
    
 | 
			
		||||
    std::cout << "determinant: " << det <<  std::endl;
 | 
			
		||||
    cm = Exponentiate(cm, 1.0, 10);
 | 
			
		||||
    std::cout << cm << "  " << std::endl;
 | 
			
		||||
    det = Determinant(cm);
 | 
			
		||||
    std::cout << "determinant: " << det <<  std::endl;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//    Foo = Foo+scalar; // LatticeColourMatrix+Scalar
 | 
			
		||||
//    Foo = Foo*scalar; // LatticeColourMatrix*Scalar
 | 
			
		||||
@@ -296,10 +303,12 @@ int main (int argc, char ** argv)
 | 
			
		||||
      LatticeInteger coor(&Fine);
 | 
			
		||||
      LatticeCoordinate(coor,d);
 | 
			
		||||
      lex = lex + coor*mm[d];
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Bar = zero;
 | 
			
		||||
    Bar = where(lex<Integer(10),Foo,Bar);
 | 
			
		||||
    cout << "peeking sites..\n";
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<int> coor(4);
 | 
			
		||||
      for(coor[3]=0;coor[3]<latt_size[3]/mpi_layout[3];coor[3]++){
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user