mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Booster update
This commit is contained in:
parent
228bbb9d81
commit
7e5bd46dd3
@ -31,7 +31,7 @@ Author: Peter Boyle <pboyle@bnl.gov>
|
|||||||
#include <hipblas/hipblas.h>
|
#include <hipblas/hipblas.h>
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
#include <hipblas/hipblas.h>
|
#include <cublas_v2.h>
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
#include <oneapi/mkl.hpp>
|
#include <oneapi/mkl.hpp>
|
||||||
@ -51,7 +51,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
typedef hipblasHandle_t gridblasHandle_t;
|
typedef hipblasHandle_t gridblasHandle_t;
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
typedef cudablasHandle_t gridblasHandle_t;
|
typedef cublasHandle_t gridblasHandle_t;
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
typedef cl::sycl::queue *gridblasHandle_t;
|
typedef cl::sycl::queue *gridblasHandle_t;
|
||||||
@ -78,6 +78,7 @@ public:
|
|||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
std::cout << "cublasCreate"<<std::endl;
|
std::cout << "cublasCreate"<<std::endl;
|
||||||
cublasCreate(&gridblasHandle);
|
cublasCreate(&gridblasHandle);
|
||||||
|
cublasSetPointerMode(gridblasHandle, CUBLAS_POINTER_MODE_DEVICE);
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
std::cout << "hipblasCreate"<<std::endl;
|
std::cout << "hipblasCreate"<<std::endl;
|
||||||
|
@ -280,20 +280,16 @@ void StaggeredKernels<Impl>::DhopImproved(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
|
|
||||||
if( interior && exterior ) {
|
if( interior && exterior ) {
|
||||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,1); return;}
|
||||||
#ifndef GRID_CUDA
|
|
||||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;}
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,1); return;}
|
||||||
|
#ifndef GRID_CUDA
|
||||||
if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;}
|
if (Opt == OptInlineAsm ) { ASM_CALL(DhopSiteAsm); return;}
|
||||||
#endif
|
#endif
|
||||||
} else if( interior ) {
|
} else if( interior ) {
|
||||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,1); return;}
|
||||||
#ifndef GRID_CUDA
|
|
||||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;}
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,1); return;}
|
||||||
#endif
|
|
||||||
} else if( exterior ) {
|
} else if( exterior ) {
|
||||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,1); return;}
|
||||||
#ifndef GRID_CUDA
|
|
||||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;}
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,1); return;}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
assert(0 && " Kernel optimisation case not covered ");
|
assert(0 && " Kernel optimisation case not covered ");
|
||||||
}
|
}
|
||||||
@ -322,19 +318,13 @@ void StaggeredKernels<Impl>::DhopNaive(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
|
|
||||||
if( interior && exterior ) {
|
if( interior && exterior ) {
|
||||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGeneric,0); return;}
|
||||||
#ifndef GRID_CUDA
|
|
||||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;}
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHand,0); return;}
|
||||||
#endif
|
|
||||||
} else if( interior ) {
|
} else if( interior ) {
|
||||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericInt,0); return;}
|
||||||
#ifndef GRID_CUDA
|
|
||||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;}
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandInt,0); return;}
|
||||||
#endif
|
|
||||||
} else if( exterior ) {
|
} else if( exterior ) {
|
||||||
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
|
if (Opt == OptGeneric ) { KERNEL_CALL(DhopSiteGenericExt,0); return;}
|
||||||
#ifndef GRID_CUDA
|
|
||||||
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;}
|
if (Opt == OptHandUnroll ) { KERNEL_CALL(DhopSiteHandExt,0); return;}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,10 +5,12 @@ LIME=/p/home/jusers/boyle2/juwels/gm2dwf/boyle/
|
|||||||
--enable-gen-simd-width=64 \
|
--enable-gen-simd-width=64 \
|
||||||
--enable-shm=nvlink \
|
--enable-shm=nvlink \
|
||||||
--enable-accelerator=cuda \
|
--enable-accelerator=cuda \
|
||||||
|
--disable-gparity \
|
||||||
|
--disable-fermion-reps \
|
||||||
--with-lime=$LIME \
|
--with-lime=$LIME \
|
||||||
--disable-accelerator-cshift \
|
--enable-accelerator-cshift \
|
||||||
--disable-unified \
|
--disable-unified \
|
||||||
CXX=nvcc \
|
CXX=nvcc \
|
||||||
LDFLAGS="-cudart shared " \
|
LDFLAGS="-cudart shared " \
|
||||||
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++14 -cudart shared"
|
CXXFLAGS="-ccbin mpicxx -gencode arch=compute_80,code=sm_80 -std=c++17 -cudart shared -lcublas"
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
module load GCC/9.3.0
|
module load GCC
|
||||||
module load GMP/6.2.0
|
module load GMP
|
||||||
module load MPFR/4.1.0
|
module load MPFR
|
||||||
module load OpenMPI/4.1.0rc1
|
module load OpenMPI
|
||||||
module load CUDA/11.3
|
module load CUDA
|
||||||
|
Loading…
x
Reference in New Issue
Block a user