mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-18 07:47:06 +01:00
Merge branch 'develop' into feature/CG_repro
This commit is contained in:
@ -29,9 +29,13 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
#ifndef _GRID_FFT_H_
|
||||
#define _GRID_FFT_H_
|
||||
|
||||
#ifdef HAVE_FFTW
|
||||
#ifdef HAVE_FFTW
|
||||
#ifdef USE_MKL
|
||||
#include <fftw/fftw3.h>
|
||||
#else
|
||||
#include <fftw3.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
namespace Grid {
|
||||
@ -122,7 +126,8 @@ namespace Grid {
|
||||
|
||||
double Flops(void) {return flops;}
|
||||
double MFlops(void) {return flops/usec;}
|
||||
|
||||
double USec(void) {return (double)usec;}
|
||||
|
||||
FFT ( GridCartesian * grid ) :
|
||||
vgrid(grid),
|
||||
Nd(grid->_ndimension),
|
||||
|
@ -369,7 +369,7 @@ void Grid_init(int *argc,char ***argv)
|
||||
|
||||
void Grid_finalize(void)
|
||||
{
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||
#if defined (GRID_COMMS_MPI) || defined (GRID_COMMS_MPI3)
|
||||
MPI_Finalize();
|
||||
Grid_unquiesce_nodes();
|
||||
#endif
|
||||
|
@ -93,7 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||
////////////////////////////////////////////////////////////
|
||||
void Grid_quiesce_nodes(void) {
|
||||
int me = 0;
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3)
|
||||
#if defined(GRID_COMMS_MPI) || defined(GRID_COMMS_MPI3) || defined(GRID_COMMS_MPI3L)
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
#endif
|
||||
#ifdef GRID_COMMS_SHMEM
|
||||
|
@ -43,6 +43,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#else
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
namespace Grid {
|
||||
|
||||
@ -86,7 +89,6 @@ inline uint64_t cyclecount(void){
|
||||
return tmp;
|
||||
}
|
||||
#elif defined __x86_64__
|
||||
#include <x86intrin.h>
|
||||
inline uint64_t cyclecount(void){
|
||||
return __rdtsc();
|
||||
// unsigned int dummy;
|
||||
|
@ -191,7 +191,7 @@ class ConjugateGradient : public OperatorFunction<Field> {
|
||||
<< LinalgTimer.Elapsed();
|
||||
std::cout << std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 1000.0);
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
if (!CGState.do_repro && ReproTest){
|
||||
CGState.do_repro = true;
|
||||
|
@ -97,7 +97,7 @@ void CartesianCommunicator::Barrier(void){}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||
int CartesianCommunicator::RankFromProcessorCoor(std::vector<int> &coor) { return 0;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ assert(0);}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, std::vector<int> &coor){ coor = _processor_coor ;}
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
{
|
||||
source =0;
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -53,24 +54,26 @@ WilsonKernels<Impl >::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,
|
||||
}
|
||||
|
||||
#if defined(AVX512)
|
||||
|
||||
#include <simd/Intel512wilson.h>
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are AVX512 specialise the single precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#include <simd/Intel512wilson.h>
|
||||
|
||||
#include <simd/Intel512single.h>
|
||||
|
||||
static Vector<vComplexF> signs;
|
||||
|
||||
int setupSigns(void ){
|
||||
Vector<vComplexF> bother(2);
|
||||
static Vector<vComplexF> signsF;
|
||||
|
||||
template<typename vtype>
|
||||
int setupSigns(Vector<vtype>& signs ){
|
||||
Vector<vtype> bother(2);
|
||||
signs = bother;
|
||||
vrsign(signs[0]);
|
||||
visign(signs[1]);
|
||||
return 1;
|
||||
}
|
||||
static int signInit = setupSigns();
|
||||
|
||||
static int signInitF = setupSigns(signsF);
|
||||
|
||||
#define label(A) ilabel(A)
|
||||
#define ilabel(A) ".globl\n" #A ":\n"
|
||||
@ -78,6 +81,8 @@ static Vector<vComplexF> signs;
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||
#define FX(A) WILSONASM_ ##A
|
||||
#define COMPLEX_TYPE vComplexF
|
||||
#define signs signsF
|
||||
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
@ -98,8 +103,8 @@ WilsonKernels<WilsonImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder
|
||||
#undef FX
|
||||
#define FX(A) DWFASM_ ## A
|
||||
#define MAYBEPERM(A,B)
|
||||
#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
||||
#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
||||
//#define VMOVIDUP(A,B,C) VBCASTIDUPf(A,B,C)
|
||||
//#define VMOVRDUP(A,B,C) VBCASTRDUPf(A,B,C)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||
|
||||
#undef KERNEL_DAG
|
||||
@ -113,8 +118,71 @@ template<> void
|
||||
WilsonKernels<DomainWallVec5dImplF>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
#undef COMPLEX_TYPE
|
||||
#undef signs
|
||||
#undef VMOVRDUP
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#undef FX
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// If we are AVX512 specialise the double precision routine
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
#include <simd/Intel512double.h>
|
||||
|
||||
static Vector<vComplexD> signsD;
|
||||
#define signs signsD
|
||||
static int signInitD = setupSigns(signsD);
|
||||
|
||||
#define MAYBEPERM(A,perm) if (perm) { A ; }
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN(ptr,pf)
|
||||
#define FX(A) WILSONASM_ ##A
|
||||
#define COMPLEX_TYPE vComplexD
|
||||
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<WilsonImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#endif
|
||||
#undef VMOVIDUP
|
||||
#undef VMOVRDUP
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#undef FX
|
||||
#define FX(A) DWFASM_ ## A
|
||||
#define MAYBEPERM(A,B)
|
||||
//#define VMOVIDUP(A,B,C) VBCASTIDUPd(A,B,C)
|
||||
//#define VMOVRDUP(A,B,C) VBCASTRDUPd(A,B,C)
|
||||
#define MULT_2SPIN(ptr,pf) MULT_ADDSUB_2SPIN_LS(ptr,pf)
|
||||
|
||||
#undef KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#define KERNEL_DAG
|
||||
template<> void
|
||||
WilsonKernels<DomainWallVec5dImplD>::DiracOptAsmDhopSiteDag(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U,SiteHalfSpinor *buf,
|
||||
int ss,int ssU,int Ls,int Ns,const FermionField &in, FermionField &out)
|
||||
#include <qcd/action/fermion/WilsonKernelsAsmBody.h>
|
||||
|
||||
#undef COMPLEX_TYPE
|
||||
#undef signs
|
||||
#undef VMOVRDUP
|
||||
#undef MAYBEPERM
|
||||
#undef MULT_2SPIN
|
||||
#undef FX
|
||||
|
||||
#endif //AVX512
|
||||
|
||||
#define INSTANTIATE_ASM(A)\
|
||||
template void WilsonKernels<A>::DiracOptAsmDhopSite(StencilImpl &st,LebesgueOrder & lo,DoubledGaugeField &U, SiteHalfSpinor *buf,\
|
||||
|
@ -5,7 +5,9 @@
|
||||
const uint64_t plocal =(uint64_t) & in._odata[0];
|
||||
|
||||
// vComplexF isigns[2] = { signs[0], signs[1] };
|
||||
vComplexF *isigns = &signs[0];
|
||||
//COMPLEX_TYPE is vComplexF of vComplexD depending
|
||||
//on the chosen precision
|
||||
COMPLEX_TYPE *isigns = &signs[0];
|
||||
|
||||
MASK_REGS;
|
||||
int nmax=U._grid->oSites();
|
||||
|
@ -116,7 +116,7 @@ class NerscHmcRunnerTemplate {
|
||||
NoSmearing<Gimpl> SmearingPolicy;
|
||||
typedef MinimumNorm2<GaugeField, NoSmearing<Gimpl>, RepresentationsPolicy >
|
||||
IntegratorType; // change here to change the algorithm
|
||||
IntegratorParameters MDpar(20, 1.0);
|
||||
IntegratorParameters MDpar(40, 1.0);
|
||||
IntegratorType MDynamics(UGrid, MDpar, TheAction, SmearingPolicy);
|
||||
|
||||
// Checkpoint strategy
|
||||
|
@ -382,7 +382,6 @@ namespace Optimization {
|
||||
// Some Template specialization
|
||||
|
||||
// Hack for CLANG until mm512_reduce_add_ps etc... are implemented in GCC and Clang releases
|
||||
|
||||
#ifndef __INTEL_COMPILER
|
||||
#warning "Slow reduction due to incomplete reduce intrinsics"
|
||||
//Complex float Reduce
|
||||
|
Reference in New Issue
Block a user