mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-11 22:50:45 +01:00
Removing redundant arguments for integrator functions, step 1
This commit is contained in:
parent
0bd296dda4
commit
ce1a115e0b
@ -255,19 +255,28 @@ PARALLEL_FOR_LOOP
|
|||||||
}
|
}
|
||||||
|
|
||||||
Lattice(const Lattice& r){ // copy constructor
|
Lattice(const Lattice& r){ // copy constructor
|
||||||
_grid = r._grid;
|
_grid = r._grid;
|
||||||
checkerboard = r.checkerboard;
|
checkerboard = r.checkerboard;
|
||||||
_odata.resize(_grid->oSites());// essential
|
_odata.resize(_grid->oSites());// essential
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
_odata[ss]=r._odata[ss];
|
_odata[ss]=r._odata[ss];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
|
void reset(GridBase* grid) {
|
||||||
|
if (_grid != grid) {
|
||||||
|
_grid = grid;
|
||||||
|
_odata.resize(grid->oSites());
|
||||||
|
checkerboard = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
template<class sobj> strong_inline Lattice<vobj> & operator = (const sobj & r){
|
||||||
PARALLEL_FOR_LOOP
|
PARALLEL_FOR_LOOP
|
||||||
for(int ss=0;ss<_grid->oSites();ss++){
|
for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
@ -27,17 +27,17 @@ with this program; if not, write to the Free Software Foundation, Inc.,
|
|||||||
directory
|
directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
#ifndef GENERIC_HMC_RUNNER
|
#ifndef GRID_GENERIC_HMC_RUNNER
|
||||||
#define GENERIC_HMC_RUNNER
|
#define GRID_GENERIC_HMC_RUNNER
|
||||||
|
|
||||||
namespace Grid {
|
namespace Grid {
|
||||||
namespace QCD {
|
namespace QCD {
|
||||||
|
|
||||||
// Virtual Class for HMC specific for gauge theories
|
// Virtual Class for HMC specific for gauge theories
|
||||||
// implement a specific theory by defining the BuildTheAction
|
// implement a specific theory by defining the BuildTheAction
|
||||||
template <class Implementation, class RepresentationsPolicy = NoHirep>
|
template <class Implementation, class RepresentationsPolicy = NoHirep>
|
||||||
class BinaryHmcRunnerTemplate {
|
class BinaryHmcRunnerTemplate {
|
||||||
public:
|
public:
|
||||||
INHERIT_FIELD_TYPES(Implementation);
|
INHERIT_FIELD_TYPES(Implementation);
|
||||||
typedef Implementation ImplPolicy;
|
typedef Implementation ImplPolicy;
|
||||||
|
|
||||||
@ -56,8 +56,10 @@ namespace QCD {
|
|||||||
IntegratorParameters MDparameters;
|
IntegratorParameters MDparameters;
|
||||||
|
|
||||||
GridCartesian * UGrid;
|
GridCartesian * UGrid;
|
||||||
GridCartesian * FGrid;
|
|
||||||
GridRedBlackCartesian *UrbGrid;
|
GridRedBlackCartesian *UrbGrid;
|
||||||
|
|
||||||
|
// These two are unnecessary, eliminate
|
||||||
|
GridCartesian * FGrid;
|
||||||
GridRedBlackCartesian *FrbGrid;
|
GridRedBlackCartesian *FrbGrid;
|
||||||
|
|
||||||
std::vector<int> SerialSeed;
|
std::vector<int> SerialSeed;
|
||||||
@ -68,11 +70,11 @@ namespace QCD {
|
|||||||
ParallelSeed = P;
|
ParallelSeed = P;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void BuildTheAction(int argc, char **argv) = 0; // necessary?
|
virtual void BuildTheAction(int argc, char **argv) = 0; // necessary?
|
||||||
|
|
||||||
// A couple of wrapper classes
|
// A couple of wrapper classes
|
||||||
template <class IOCheckpointer>
|
template <class IOCheckpointer>
|
||||||
void Run(int argc, char **argv, IOCheckpointer &Checkpoint) {
|
void Run(int argc, char **argv, IOCheckpointer &Checkpoint) {
|
||||||
NoSmearing<Implementation> S;
|
NoSmearing<Implementation> S;
|
||||||
Runner(argc, argv, Checkpoint, S);
|
Runner(argc, argv, Checkpoint, S);
|
||||||
}
|
}
|
||||||
@ -83,6 +85,8 @@ namespace QCD {
|
|||||||
}
|
}
|
||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <class SmearingPolicy, class IOCheckpointer>
|
template <class SmearingPolicy, class IOCheckpointer>
|
||||||
void Runner(int argc,
|
void Runner(int argc,
|
||||||
@ -141,11 +145,7 @@ namespace QCD {
|
|||||||
Field U(UGrid);
|
Field U(UGrid);
|
||||||
|
|
||||||
|
|
||||||
typedef MinimumNorm2<Implementation,
|
typedef MinimumNorm2<Implementation, SmearingPolicy, RepresentationsPolicy> IntegratorType; // change here to change the algorithm
|
||||||
SmearingPolicy,
|
|
||||||
RepresentationsPolicy>
|
|
||||||
IntegratorType; // change here to change the algorithm
|
|
||||||
|
|
||||||
IntegratorType MDynamics(UGrid, MDparameters, TheAction, Smearing);
|
IntegratorType MDynamics(UGrid, MDparameters, TheAction, Smearing);
|
||||||
|
|
||||||
HMCparameters HMCpar;
|
HMCparameters HMCpar;
|
||||||
@ -187,7 +187,7 @@ namespace QCD {
|
|||||||
// Run it
|
// Run it
|
||||||
HMC.evolve();
|
HMC.evolve();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// These are for gauge fields
|
// These are for gauge fields
|
||||||
typedef BinaryHmcRunnerTemplate<PeriodicGimplR> BinaryHmcRunner;
|
typedef BinaryHmcRunnerTemplate<PeriodicGimplR> BinaryHmcRunner;
|
||||||
@ -199,6 +199,7 @@ namespace QCD {
|
|||||||
|
|
||||||
typedef BinaryHmcRunnerTemplate<ScalarImplR, ScalarFields>
|
typedef BinaryHmcRunnerTemplate<ScalarImplR, ScalarFields>
|
||||||
ScalarBinaryHmcRunner;
|
ScalarBinaryHmcRunner;
|
||||||
}
|
|
||||||
}
|
} // namespace QCD
|
||||||
#endif
|
} // namespace Grid
|
||||||
|
#endif
|
||||||
|
@ -189,7 +189,8 @@ class Integrator {
|
|||||||
|
|
||||||
// Initialization of momenta and actions
|
// Initialization of momenta and actions
|
||||||
void refresh(Field& U, GridParallelRNG& pRNG) {
|
void refresh(Field& U, GridParallelRNG& pRNG) {
|
||||||
assert(P._grid == U._grid);
|
//assert(P._grid == U._grid);
|
||||||
|
P.reset(U._grid);
|
||||||
std::cout << GridLogIntegrator << "Integrator refresh\n";
|
std::cout << GridLogIntegrator << "Integrator refresh\n";
|
||||||
FieldImplementation::generate_momenta(P, pRNG);
|
FieldImplementation::generate_momenta(P, pRNG);
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
|
|
||||||
Grid physics library, www.github.com/paboyle/Grid
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
Source file: ./lib/simd/Grid_avx.h
|
Source file: ./lib/simd/Grid_avx.h
|
||||||
|
|
||||||
@ -29,15 +29,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
See the full license in the file "LICENSE" in the top level distribution directory
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
//----------------------------------------------------------------------
|
|
||||||
/*! @file Grid_avx.h
|
|
||||||
@brief Optimization libraries for AVX1/2 instructions set
|
|
||||||
|
|
||||||
Using intrinsics
|
|
||||||
*/
|
|
||||||
// Time-stamp: <2015-06-16 23:30:41 neo>
|
|
||||||
//----------------------------------------------------------------------
|
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#ifdef AVXFMA4
|
#ifdef AVXFMA4
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
@ -66,9 +57,9 @@ namespace Optimization {
|
|||||||
double f[4];
|
double f[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Vsplat{
|
struct Vsplat{
|
||||||
//Complex float
|
// Complex float
|
||||||
inline __m256 operator()(float a, float b){
|
inline __m256 operator()(float a, float b) {
|
||||||
return _mm256_set_ps(b,a,b,a,b,a,b,a);
|
return _mm256_set_ps(b,a,b,a,b,a,b,a);
|
||||||
}
|
}
|
||||||
// Real float
|
// Real float
|
||||||
@ -90,7 +81,7 @@ namespace Optimization {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct Vstore{
|
struct Vstore{
|
||||||
//Float
|
//Float
|
||||||
inline void operator()(__m256 a, float* F){
|
inline void operator()(__m256 a, float* F){
|
||||||
_mm256_store_ps(F,a);
|
_mm256_store_ps(F,a);
|
||||||
}
|
}
|
||||||
@ -119,15 +110,15 @@ namespace Optimization {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct Vset{
|
struct Vset{
|
||||||
// Complex float
|
// Complex float
|
||||||
inline __m256 operator()(Grid::ComplexF *a){
|
inline __m256 operator()(Grid::ComplexF *a){
|
||||||
return _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
return _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline __m256d operator()(Grid::ComplexD *a){
|
inline __m256d operator()(Grid::ComplexD *a){
|
||||||
return _mm256_set_pd(a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
return _mm256_set_pd(a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
|
||||||
}
|
}
|
||||||
// Real float
|
// Real float
|
||||||
inline __m256 operator()(float *a){
|
inline __m256 operator()(float *a){
|
||||||
return _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
return _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
|
||||||
}
|
}
|
||||||
@ -144,8 +135,8 @@ namespace Optimization {
|
|||||||
|
|
||||||
template <typename Out_type, typename In_type>
|
template <typename Out_type, typename In_type>
|
||||||
struct Reduce{
|
struct Reduce{
|
||||||
//Need templated class to overload output type
|
// Need templated class to overload output type
|
||||||
//General form must generate error if compiled
|
// General form must generate error if compiled
|
||||||
inline Out_type operator()(In_type in){
|
inline Out_type operator()(In_type in){
|
||||||
printf("Error, using wrong Reduce function\n");
|
printf("Error, using wrong Reduce function\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
@ -224,7 +215,7 @@ namespace Optimization {
|
|||||||
ymm1 = _mm256_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
|
ymm1 = _mm256_shuffle_ps(b,b,_MM_SELECT_FOUR_FOUR(2,3,0,1)); // ymm1 <- br,bi
|
||||||
ymm2 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
|
ymm2 = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(3,3,1,1)); // ymm2 <- ai,ai
|
||||||
ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
ymm1 = _mm256_mul_ps(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
return _mm256_addsub_ps(ymm0,ymm1);
|
return _mm256_addsub_ps(ymm0,ymm1);
|
||||||
#endif
|
#endif
|
||||||
#if defined (AVXFMA4)
|
#if defined (AVXFMA4)
|
||||||
__m256 a_real = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ar ar,
|
__m256 a_real = _mm256_shuffle_ps(a,a,_MM_SELECT_FOUR_FOUR(2,2,0,0)); // ar ar,
|
||||||
@ -241,10 +232,10 @@ namespace Optimization {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// Complex double
|
// Complex double
|
||||||
inline __m256d operator()(__m256d a, __m256d b){
|
inline __m256d operator()(__m256d a, __m256d b) {
|
||||||
//Multiplication of (ak+ibk)*(ck+idk)
|
// Multiplication of (ak+ibk)*(ck+idk)
|
||||||
// a + i b can be stored as a data structure
|
// a + i b can be stored as a data structure
|
||||||
//From intel optimisation reference guide
|
// From intel optimisation reference guide
|
||||||
/*
|
/*
|
||||||
movsldup xmm0, Src1; load real parts into the destination,
|
movsldup xmm0, Src1; load real parts into the destination,
|
||||||
; a1, a1, a0, a0
|
; a1, a1, a0, a0
|
||||||
@ -268,7 +259,7 @@ namespace Optimization {
|
|||||||
__m256d ymm0,ymm1,ymm2;
|
__m256d ymm0,ymm1,ymm2;
|
||||||
ymm0 = _mm256_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00
|
ymm0 = _mm256_shuffle_pd(a,a,0x0); // ymm0 <- ar ar, ar,ar b'00,00
|
||||||
ymm0 = _mm256_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
ymm0 = _mm256_mul_pd(ymm0,b); // ymm0 <- ar bi, ar br
|
||||||
ymm1 = _mm256_shuffle_pd(b,b,0x5); // ymm1 <- br,bi b'01,01
|
ymm1 = _mm256_shuffle_pd(b,b,0x5); // ymm1 <- br,bi b'01,01
|
||||||
ymm2 = _mm256_shuffle_pd(a,a,0xF); // ymm2 <- ai,ai b'11,11
|
ymm2 = _mm256_shuffle_pd(a,a,0xF); // ymm2 <- ai,ai b'11,11
|
||||||
ymm1 = _mm256_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
ymm1 = _mm256_mul_pd(ymm1,ymm2); // ymm1 <- br ai, ai bi
|
||||||
return _mm256_addsub_pd(ymm0,ymm1);
|
return _mm256_addsub_pd(ymm0,ymm1);
|
||||||
@ -365,10 +356,10 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Div{
|
struct Div {
|
||||||
// Real float
|
// Real float
|
||||||
inline __m256 operator()(__m256 a, __m256 b){
|
inline __m256 operator()(__m256 a, __m256 b) {
|
||||||
return _mm256_div_ps(a,b);
|
return _mm256_div_ps(a, b);
|
||||||
}
|
}
|
||||||
// Real double
|
// Real double
|
||||||
inline __m256d operator()(__m256d a, __m256d b){
|
inline __m256d operator()(__m256d a, __m256d b){
|
||||||
@ -454,7 +445,7 @@ namespace Optimization {
|
|||||||
#define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16)
|
#define _mm256_alignr_epi64_grid(ret,a,b,n) ret=(__m256d) _mm256_alignr_epi8((__m256i)a,(__m256i)b,(n*8)%16)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (AVX1) || defined (AVXFMA)
|
#if defined (AVX1) || defined (AVXFMA)
|
||||||
#define _mm256_alignr_epi32_grid(ret,a,b,n) { \
|
#define _mm256_alignr_epi32_grid(ret,a,b,n) { \
|
||||||
__m128 aa, bb; \
|
__m128 aa, bb; \
|
||||||
\
|
\
|
||||||
@ -487,7 +478,7 @@ namespace Optimization {
|
|||||||
|
|
||||||
struct Rotate{
|
struct Rotate{
|
||||||
|
|
||||||
static inline __m256 rotate(__m256 in,int n){
|
static inline __m256 rotate(__m256 in,int n){
|
||||||
switch(n){
|
switch(n){
|
||||||
case 0: return tRotate<0>(in);break;
|
case 0: return tRotate<0>(in);break;
|
||||||
case 1: return tRotate<1>(in);break;
|
case 1: return tRotate<1>(in);break;
|
||||||
@ -500,7 +491,7 @@ namespace Optimization {
|
|||||||
default: assert(0);
|
default: assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
static inline __m256d rotate(__m256d in,int n){
|
static inline __m256d rotate(__m256d in,int n){
|
||||||
switch(n){
|
switch(n){
|
||||||
case 0: return tRotate<0>(in);break;
|
case 0: return tRotate<0>(in);break;
|
||||||
case 1: return tRotate<1>(in);break;
|
case 1: return tRotate<1>(in);break;
|
||||||
@ -509,28 +500,28 @@ namespace Optimization {
|
|||||||
default: assert(0);
|
default: assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<int n>
|
|
||||||
static inline __m256 tRotate(__m256 in){
|
|
||||||
__m256 tmp = Permute::Permute0(in);
|
|
||||||
__m256 ret = in;
|
|
||||||
if ( n > 3 ) {
|
|
||||||
_mm256_alignr_epi32_grid(ret,in,tmp,n);
|
|
||||||
} else {
|
|
||||||
_mm256_alignr_epi32_grid(ret,tmp,in,n);
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<int n>
|
template<int n>
|
||||||
static inline __m256d tRotate(__m256d in){
|
static inline __m256 tRotate(__m256 in){
|
||||||
__m256d tmp = Permute::Permute0(in);
|
__m256 tmp = Permute::Permute0(in);
|
||||||
__m256d ret = in;
|
__m256 ret;
|
||||||
if ( n > 1 ) {
|
if ( n > 3 ) {
|
||||||
_mm256_alignr_epi64_grid(ret,in,tmp,n);
|
_mm256_alignr_epi32_grid(ret,in,tmp,n);
|
||||||
} else {
|
} else {
|
||||||
_mm256_alignr_epi64_grid(ret,tmp,in,n);
|
_mm256_alignr_epi32_grid(ret,tmp,in,n);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int n>
|
||||||
|
static inline __m256d tRotate(__m256d in){
|
||||||
|
__m256d tmp = Permute::Permute0(in);
|
||||||
|
__m256d ret;
|
||||||
|
if ( n > 1 ) {
|
||||||
|
_mm256_alignr_epi64_grid(ret,in,tmp,n);
|
||||||
|
} else {
|
||||||
|
_mm256_alignr_epi64_grid(ret,tmp,in,n);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
};
|
};
|
||||||
@ -543,7 +534,7 @@ namespace Optimization {
|
|||||||
__m256 v1,v2;
|
__m256 v1,v2;
|
||||||
v1=Optimization::Permute::Permute0(in); // avx 256; quad complex single
|
v1=Optimization::Permute::Permute0(in); // avx 256; quad complex single
|
||||||
v1= _mm256_add_ps(v1,in);
|
v1= _mm256_add_ps(v1,in);
|
||||||
v2=Optimization::Permute::Permute1(v1);
|
v2=Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm256_add_ps(v1,v2);
|
v1 = _mm256_add_ps(v1,v2);
|
||||||
u256f conv; conv.v = v1;
|
u256f conv; conv.v = v1;
|
||||||
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
return Grid::ComplexF(conv.f[0],conv.f[1]);
|
||||||
@ -555,15 +546,15 @@ namespace Optimization {
|
|||||||
__m256 v1,v2;
|
__m256 v1,v2;
|
||||||
v1 = Optimization::Permute::Permute0(in); // avx 256; octo-double
|
v1 = Optimization::Permute::Permute0(in); // avx 256; octo-double
|
||||||
v1 = _mm256_add_ps(v1,in);
|
v1 = _mm256_add_ps(v1,in);
|
||||||
v2 = Optimization::Permute::Permute1(v1);
|
v2 = Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm256_add_ps(v1,v2);
|
v1 = _mm256_add_ps(v1,v2);
|
||||||
v2 = Optimization::Permute::Permute2(v1);
|
v2 = Optimization::Permute::Permute2(v1);
|
||||||
v1 = _mm256_add_ps(v1,v2);
|
v1 = _mm256_add_ps(v1,v2);
|
||||||
u256f conv; conv.v=v1;
|
u256f conv; conv.v=v1;
|
||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//Complex double Reduce
|
//Complex double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::ComplexD Reduce<Grid::ComplexD, __m256d>::operator()(__m256d in){
|
inline Grid::ComplexD Reduce<Grid::ComplexD, __m256d>::operator()(__m256d in){
|
||||||
@ -573,14 +564,14 @@ namespace Optimization {
|
|||||||
u256d conv; conv.v = v1;
|
u256d conv; conv.v = v1;
|
||||||
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
return Grid::ComplexD(conv.f[0],conv.f[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Real double Reduce
|
//Real double Reduce
|
||||||
template<>
|
template<>
|
||||||
inline Grid::RealD Reduce<Grid::RealD, __m256d>::operator()(__m256d in){
|
inline Grid::RealD Reduce<Grid::RealD, __m256d>::operator()(__m256d in){
|
||||||
__m256d v1,v2;
|
__m256d v1,v2;
|
||||||
v1 = Optimization::Permute::Permute0(in); // avx 256; quad double
|
v1 = Optimization::Permute::Permute0(in); // avx 256; quad double
|
||||||
v1 = _mm256_add_pd(v1,in);
|
v1 = _mm256_add_pd(v1,in);
|
||||||
v2 = Optimization::Permute::Permute1(v1);
|
v2 = Optimization::Permute::Permute1(v1);
|
||||||
v1 = _mm256_add_pd(v1,v2);
|
v1 = _mm256_add_pd(v1,v2);
|
||||||
u256d conv; conv.v = v1;
|
u256d conv; conv.v = v1;
|
||||||
return conv.f[0];
|
return conv.f[0];
|
||||||
@ -593,17 +584,17 @@ namespace Optimization {
|
|||||||
printf("Reduce : Missing integer implementation -> FIX\n");
|
printf("Reduce : Missing integer implementation -> FIX\n");
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Here assign types
|
// Here assign types
|
||||||
|
|
||||||
typedef __m256 SIMD_Ftype; // Single precision type
|
typedef __m256 SIMD_Ftype; // Single precision type
|
||||||
typedef __m256d SIMD_Dtype; // Double precision type
|
typedef __m256d SIMD_Dtype; // Double precision type
|
||||||
typedef __m256i SIMD_Itype; // Integer type
|
typedef __m256i SIMD_Itype; // Integer type
|
||||||
|
|
||||||
// prefecthing
|
// prefecthing
|
||||||
inline void v_prefetch0(int size, const char *ptr){
|
inline void v_prefetch0(int size, const char *ptr){
|
||||||
for(int i=0;i<size;i+=64){ // Define L1 linesize above
|
for(int i=0;i<size;i+=64){ // Define L1 linesize above
|
||||||
_mm_prefetch(ptr+i+4096,_MM_HINT_T1);
|
_mm_prefetch(ptr+i+4096,_MM_HINT_T1);
|
||||||
@ -611,7 +602,7 @@ namespace Optimization {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void prefetch_HINT_T0(const char *ptr){
|
inline void prefetch_HINT_T0(const char *ptr){
|
||||||
_mm_prefetch(ptr,_MM_HINT_T0);
|
_mm_prefetch(ptr, _MM_HINT_T0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function name aliases
|
// Function name aliases
|
||||||
@ -620,7 +611,7 @@ namespace Optimization {
|
|||||||
typedef Optimization::Vset VsetSIMD;
|
typedef Optimization::Vset VsetSIMD;
|
||||||
typedef Optimization::Vstream VstreamSIMD;
|
typedef Optimization::Vstream VstreamSIMD;
|
||||||
|
|
||||||
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S,T>;
|
template <typename S, typename T> using ReduceSIMD = Optimization::Reduce<S, T>;
|
||||||
|
|
||||||
// Arithmetic operations
|
// Arithmetic operations
|
||||||
typedef Optimization::Sum SumSIMD;
|
typedef Optimization::Sum SumSIMD;
|
||||||
@ -632,4 +623,4 @@ namespace Optimization {
|
|||||||
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
typedef Optimization::TimesMinusI TimesMinusISIMD;
|
||||||
typedef Optimization::TimesI TimesISIMD;
|
typedef Optimization::TimesI TimesISIMD;
|
||||||
|
|
||||||
}
|
} // namespace Grid
|
||||||
|
@ -46,4 +46,4 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif // COMPILER_CHECK_H
|
||||||
|
@ -63,6 +63,11 @@ class HMCRunnerParameters : Serializable {
|
|||||||
class HmcRunner : public BinaryHmcRunner {
|
class HmcRunner : public BinaryHmcRunner {
|
||||||
public:
|
public:
|
||||||
HMCRunnerParameters HMCPar;
|
HMCRunnerParameters HMCPar;
|
||||||
|
void BuildTheAction(int argc, char **argv){}
|
||||||
|
};
|
||||||
|
/*
|
||||||
|
|
||||||
|
// eliminate arcg and argv from here
|
||||||
void BuildTheAction(int argc, char **argv)
|
void BuildTheAction(int argc, char **argv)
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -90,6 +95,7 @@ class HmcRunner : public BinaryHmcRunner {
|
|||||||
|
|
||||||
// Add observables
|
// Add observables
|
||||||
// options for checkpointers
|
// options for checkpointers
|
||||||
|
// this can be moved outside the BuildTheAction
|
||||||
//BinaryHmcCheckpointer
|
//BinaryHmcCheckpointer
|
||||||
//ILDGHmcCheckpointer
|
//ILDGHmcCheckpointer
|
||||||
//NerscHmcCheckpointer
|
//NerscHmcCheckpointer
|
||||||
@ -107,9 +113,11 @@ class HmcRunner : public BinaryHmcRunner {
|
|||||||
ObservablesList.push_back(&PlaqLog);
|
ObservablesList.push_back(&PlaqLog);
|
||||||
ObservablesList.push_back(&Checkpoint);
|
ObservablesList.push_back(&Checkpoint);
|
||||||
|
|
||||||
|
// This must run from here so that the grids are defined
|
||||||
Run(argc, argv, Checkpoint); // no smearing
|
Run(argc, argv, Checkpoint); // no smearing
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +144,57 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
TheHMC.MDparameters.set(TheHMC.HMCPar.MDsteps, TheHMC.HMCPar.TrajectorLength);
|
TheHMC.MDparameters.set(TheHMC.HMCPar.MDsteps, TheHMC.HMCPar.TrajectorLength);
|
||||||
|
|
||||||
TheHMC.BuildTheAction(argc, argv);
|
//TheHMC.BuildTheAction(argc, argv);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Typedefs to simplify notation
|
||||||
|
typedef WilsonGaugeActionR GaugeAction;
|
||||||
|
typedef WilsonImplR ImplPolicy;
|
||||||
|
typedef WilsonFermionR FermionAction;
|
||||||
|
typedef typename FermionAction::FermionField FermionField;
|
||||||
|
|
||||||
|
// this can be simplified too. MakeDefaultGrid(Nd)
|
||||||
|
TheHMC.UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||||
|
GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
|
||||||
|
|
||||||
|
// Gauge action
|
||||||
|
std::cout << GridLogMessage << "Beta: " << TheHMC.HMCPar.beta << std::endl;
|
||||||
|
GaugeAction Waction(TheHMC.HMCPar.beta);
|
||||||
|
|
||||||
|
// Collect actions
|
||||||
|
ActionLevel<BinaryHmcRunner::Field> Level1(1);
|
||||||
|
Level1.push_back(&Waction);
|
||||||
|
TheHMC.TheAction.push_back(Level1);
|
||||||
|
|
||||||
|
// Add observables
|
||||||
|
// options for checkpointers
|
||||||
|
// this can be moved outside the BuildTheAction
|
||||||
|
//BinaryHmcCheckpointer
|
||||||
|
//ILDGHmcCheckpointer
|
||||||
|
//NerscHmcCheckpointer
|
||||||
|
NerscHmcCheckpointer<BinaryHmcRunner::ImplPolicy> Checkpoint(
|
||||||
|
TheHMC.HMCPar.conf_prefix, TheHMC.HMCPar.rng_prefix, TheHMC.HMCPar.SaveInterval, TheHMC.HMCPar.format);
|
||||||
|
// Can implement also a specific function in the hmcrunner
|
||||||
|
// AddCheckpoint (...) that takes the same parameters + a string/tag
|
||||||
|
// defining the type of the checkpointer
|
||||||
|
// with tags can be implemented by overloading and no ifs
|
||||||
|
// Then force all checkpoint to have few common functions
|
||||||
|
// return an object that is then passed to the Run function
|
||||||
|
|
||||||
|
PlaquetteLogger<BinaryHmcRunner::ImplPolicy> PlaqLog(
|
||||||
|
std::string("Plaquette"));
|
||||||
|
TheHMC.ObservablesList.push_back(&PlaqLog);
|
||||||
|
TheHMC.ObservablesList.push_back(&Checkpoint);
|
||||||
|
|
||||||
|
// This must run from here so that the grids are defined
|
||||||
|
TheHMC.Run(argc, argv, Checkpoint); // no smearing
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,7 @@ int main(int argc, char** argv) {
|
|||||||
GridParallelRNG RNG4(UGrid);
|
GridParallelRNG RNG4(UGrid);
|
||||||
RNG4.SeedFixedIntegers(seeds4);
|
RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
std::cout << GridLogMessage << "Generating random ferrmion field" << std::endl;
|
std::cout << GridLogMessage << "Generating random fermion field" << std::endl;
|
||||||
LatticeFermion src(FGrid);
|
LatticeFermion src(FGrid);
|
||||||
random(RNG5, src);
|
random(RNG5, src);
|
||||||
LatticeFermion result(FGrid);
|
LatticeFermion result(FGrid);
|
||||||
@ -96,7 +96,7 @@ int main(int argc, char** argv) {
|
|||||||
GridStopWatch CGTimer;
|
GridStopWatch CGTimer;
|
||||||
|
|
||||||
SchurDiagMooeeOperator<DomainWallFermionVec5dR, LatticeFermion> HermOpEO(Ddwf);
|
SchurDiagMooeeOperator<DomainWallFermionVec5dR, LatticeFermion> HermOpEO(Ddwf);
|
||||||
ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0);// switch off the assert
|
ConjugateGradient<LatticeFermion> CG(1.0e-8, 10000, 0); // switch off the assert
|
||||||
|
|
||||||
Ddwf.ZeroCounters();
|
Ddwf.ZeroCounters();
|
||||||
CGTimer.Start();
|
CGTimer.Start();
|
||||||
@ -110,4 +110,4 @@ int main(int argc, char** argv) {
|
|||||||
Ddwf.Report();
|
Ddwf.Report();
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user