mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-24 10:42:03 +01:00
Rework of WilsonFlow class Fixed logic error in smear method where the step index was initialized to 1 rather than 0, resulting in the logged output value of tau being too large by epsilon Previously smear_adaptive would maintain the current value of tau as a class member variable whereas smear would compute it separately; now both methods maintain the current value internally and it is updated by the evolve_step routines. Both evolve methods are now const. smear_adaptive now also maintains the current value of epsilon internally, allowing it to be a const method and also allowing the same class instance to be reused without needing to be reset Replaced the fixed evaluation of the plaquette energy density and plaquette topological charge during the smearing with a highly flexible general strategy where the user can add arbitrary measurements as functional objects that are evaluated at an arbitrary frequency By default the same plaquette-based measurements are performed, but additional example functions are provided where the smearing is performed with different choices of measurement that are returned as an array for further processing Added a method to compute the energy density using the Cloverleaf approach which has smaller discretization errors Added a new tensor utility operation, copyLane, which allows for the copying of a single SIMD lane between two instances of the same tensor type but potentially different precisions To LocalCoherenceLanczos, added the option to compute the high/low eval of the fine operator on every restart to aid in tuning the Chebyshev Added Test_field_array_io which demonstrates and tests a single-file write of an arbitrary array of fields Added Test_evec_compression which generates evecs using Lanczos and attempts to compress them using the local coherence technique Added Test_compressed_lanczos_gparity which demonstrates the local coherence Lanczos for G-parity BCs Added HMC main programs for the 40ID and 48ID G-parity lattices
254 lines
9.8 KiB
C++
254 lines
9.8 KiB
C++
/*************************************************************************************
|
|
n
|
|
Grid physics library, www.github.com/paboyle/Grid
|
|
|
|
Source file: ./lib/tensors/Tensor_extract_merge.h
|
|
|
|
Copyright (C) 2015
|
|
|
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|
Author: neo <cossu@post.kek.jp>
|
|
Author: paboyle <paboyle@ph.ed.ac.uk>
|
|
Author: Christopher Kelly <ckelly@phys.columbia.edu>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
See the full license in the file "LICENSE" in the top level distribution directory
|
|
*************************************************************************************/
|
|
/* END LEGAL */
|
|
#pragma once
|
|
|
|
#include <string.h>
|
|
|
|
//#pragma GCC optimize("no-strict-aliasing")
|
|
|
|
NAMESPACE_BEGIN(Grid);
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
// Generic extract/merge/permute
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
template<class __T> using ExtractPointerArray = AcceleratorVector<__T *,GRID_MAX_SIMD>;
|
|
template<class __T> using ExtractBuffer = AcceleratorVector<__T ,GRID_MAX_SIMD>;
|
|
|
|
//void extract(const vobj &vec,ExtractBuffer<typename vobj::scalar_object> &extracted);
|
|
//void extract(const vobj &vec,ExtractPointerArray<sobj> &extracted, int offset);
|
|
//void merge(vobj &vec,ExtractBuffer<typename vobj::scalar_object> &extracted)
|
|
//void merge(vobj &vec,ExtractPointerArray<typename vobj::scalar_object> &extracted)
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Extract to contiguous array scalar object
|
|
////////////////////////////////////////////////////////////////////////
|
|
template<class vobj,class sobj> accelerator
|
|
void extract(const vobj &vec,ExtractBuffer<sobj> &extracted)
|
|
{
|
|
typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::scalar_type scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::vector_type vector_type;
|
|
|
|
const int words=sizeof(vobj)/sizeof(vector_type);
|
|
const int Nsimd=vector_type::Nsimd();
|
|
const int Nextr=extracted.size();
|
|
const int s=Nsimd/Nextr;
|
|
sobj_scalar_type *sp = (sobj_scalar_type *) &extracted[0];
|
|
scalar_type *vp = (scalar_type *)&vec;
|
|
scalar_type vtmp;
|
|
sobj_scalar_type stmp;
|
|
for(int w=0;w<words;w++){
|
|
for(int i=0;i<Nextr;i++){
|
|
memcpy((char *)&vtmp,(char *)&vp[w*Nsimd+i*s],sizeof(vtmp));
|
|
stmp = vtmp;
|
|
memcpy((char *)&sp[i*words+w],(char *)&stmp,sizeof(stmp));
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Merge a contiguous array of scalar objects
|
|
////////////////////////////////////////////////////////////////////////
|
|
template<class vobj,class sobj> accelerator
|
|
void merge(vobj &vec,ExtractBuffer<sobj> &extracted)
|
|
{
|
|
typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::scalar_type scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::vector_type vector_type;
|
|
|
|
const int words=sizeof(vobj)/sizeof(vector_type);
|
|
const int Nsimd=vector_type::Nsimd();
|
|
const int Nextr = extracted.size();
|
|
const int s=Nsimd/Nextr;
|
|
|
|
sobj_scalar_type *sp = (sobj_scalar_type *)&extracted[0];
|
|
scalar_type *vp = (scalar_type *)&vec;
|
|
scalar_type vtmp;
|
|
sobj_scalar_type stmp;
|
|
for(int w=0;w<words;w++){
|
|
for(int i=0;i<Nextr;i++){
|
|
for(int ii=0;ii<s;ii++){
|
|
memcpy((char *)&stmp,(char *)&sp[i*words+w],sizeof(stmp));
|
|
vtmp = stmp;
|
|
memcpy((char *)&vp[w*Nsimd+i*s+ii],(char *)&vtmp,sizeof(vtmp));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Extract/Insert a single lane
|
|
////////////////////////////////////////////////////////////////////////
|
|
template<class vobj> accelerator_inline
|
|
typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec)
|
|
{
|
|
typedef typename vobj::scalar_type scalar_type;
|
|
typedef typename vobj::scalar_object scalar_object;
|
|
typedef typename vobj::vector_type vector_type;
|
|
typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
|
|
typedef extract_type * pointer;
|
|
|
|
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
|
constexpr int Nsimd=vector_type::Nsimd();
|
|
|
|
scalar_object extracted;
|
|
pointer __restrict__ sp = (pointer)&extracted; // Type pun
|
|
pointer __restrict__ vp = (pointer)&vec;
|
|
for(int w=0;w<words;w++){
|
|
sp[w]=vp[w*Nsimd+lane];
|
|
}
|
|
return extracted;
|
|
}
|
|
|
|
template<class vobj> accelerator_inline
|
|
void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_object & __restrict__ extracted)
|
|
{
|
|
typedef typename vobj::vector_type vector_type;
|
|
typedef typename vector_type::scalar_type scalar_type;
|
|
typedef typename ExtractTypeMap<scalar_type>::extract_type extract_type;
|
|
typedef extract_type * pointer;
|
|
|
|
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
|
constexpr int Nsimd=vector_type::Nsimd();
|
|
|
|
pointer __restrict__ sp = (pointer)&extracted;
|
|
pointer __restrict__ vp = (pointer)&vec;
|
|
for(int w=0;w<words;w++){
|
|
vp[w*Nsimd+lane]=sp[w];
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Extract to a bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
|
|
////////////////////////////////////////////////////////////////////////
|
|
template<class vobj, class sobj> accelerator
|
|
void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
|
|
{
|
|
typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::scalar_type scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::vector_type vector_type;
|
|
|
|
const int words=sizeof(vobj)/sizeof(vector_type);
|
|
const int Nsimd=vector_type::Nsimd();
|
|
const int Nextr=extracted.size();
|
|
const int s = Nsimd/Nextr;
|
|
|
|
scalar_type * vp = (scalar_type *)&vec;
|
|
scalar_type vtmp;
|
|
sobj_scalar_type stmp;
|
|
for(int w=0;w<words;w++){
|
|
for(int i=0;i<Nextr;i++){
|
|
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
|
memcpy((char *)&vtmp,(char *)&vp[w*Nsimd+i*s],sizeof(vtmp));
|
|
stmp = vtmp;
|
|
memcpy((char *)&pointer[w],(char *)&stmp,sizeof(stmp)); // may do a precision conversion
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
// Merge bunch of scalar object pointers of different scalar type, with offset. Useful for precision change
|
|
////////////////////////////////////////////////////////////////////////
|
|
template<class vobj, class sobj> accelerator
|
|
void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
|
|
{
|
|
typedef typename GridTypeMapper<sobj>::scalar_type sobj_scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::scalar_type scalar_type;
|
|
typedef typename GridTypeMapper<vobj>::vector_type vector_type;
|
|
|
|
const int words=sizeof(vobj)/sizeof(vector_type);
|
|
const int Nsimd=vector_type::Nsimd();
|
|
const int Nextr=extracted.size();
|
|
const int s = Nsimd/Nextr;
|
|
|
|
scalar_type * vp = (scalar_type *)&vec;
|
|
scalar_type vtmp;
|
|
sobj_scalar_type stmp;
|
|
for(int w=0;w<words;w++){
|
|
for(int i=0;i<Nextr;i++){
|
|
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
|
for(int ii=0;ii<s;ii++){
|
|
memcpy((char *)&stmp,(char *)&pointer[w],sizeof(stmp));
|
|
vtmp=stmp;
|
|
memcpy((char *)&vp[w*Nsimd+i*s+ii],(char *)&vtmp,sizeof(vtmp));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
//Copy a single lane of a SIMD tensor type from one object to another
|
|
//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
template<class vobjOut, class vobjIn>
|
|
accelerator_inline
|
|
void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
|
|
{
|
|
static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
|
|
|
|
typedef typename vobjOut::vector_type ovector_type;
|
|
typedef typename vobjIn::vector_type ivector_type;
|
|
constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
|
|
constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
|
|
static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
|
|
|
|
typedef typename vobjOut::scalar_type oscalar_type;
|
|
typedef typename vobjIn::scalar_type iscalar_type;
|
|
typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
|
|
typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
|
|
|
|
typedef oextract_type * opointer;
|
|
typedef iextract_type * ipointer;
|
|
|
|
constexpr int oNsimd=ovector_type::Nsimd();
|
|
constexpr int iNsimd=ivector_type::Nsimd();
|
|
|
|
iscalar_type itmp;
|
|
oscalar_type otmp;
|
|
|
|
opointer __restrict__ op = (opointer)&vecOut;
|
|
ipointer __restrict__ ip = (ipointer)&vecIn;
|
|
for(int w=0;w<owords;w++){
|
|
memcpy( (char*)&itmp, (char*)(ip + lane_in + iNsimd*w), sizeof(iscalar_type) );
|
|
otmp = itmp; //potential precision change
|
|
memcpy( (char*)(op + lane_out + oNsimd*w), (char*)&otmp, sizeof(oscalar_type) );
|
|
}
|
|
}
|
|
|
|
|
|
NAMESPACE_END(Grid);
|
|
|