mirror of
https://github.com/paboyle/Grid.git
synced 2024-11-12 16:55:37 +00:00
Fence propagation from SYCL
This commit is contained in:
parent
3f2fd49db4
commit
a00ae981e0
@ -37,9 +37,8 @@ Author: Christoph Lehner <christoph@lhnr.de>
|
|||||||
#include <hip/hip_runtime_api.h>
|
#include <hip/hip_runtime_api.h>
|
||||||
#endif
|
#endif
|
||||||
#ifdef GRID_SYCL
|
#ifdef GRID_SYCL
|
||||||
|
|
||||||
#endif
|
|
||||||
#define GRID_SYCL_LEVEL_ZERO_IPC
|
#define GRID_SYCL_LEVEL_ZERO_IPC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
@ -459,11 +459,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
|||||||
|
|
||||||
if( interior && exterior ) {
|
if( interior && exterior ) {
|
||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
||||||
#ifdef SYCL_HACK
|
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_TMP(HandDhopSiteSycl); return; }
|
|
||||||
#else
|
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||||
#endif
|
|
||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSite); return;}
|
||||||
#endif
|
#endif
|
||||||
@ -474,6 +470,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
|||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
||||||
#endif
|
#endif
|
||||||
} else if( exterior ) {
|
} else if( exterior ) {
|
||||||
|
acceleratorFenceComputeStream();
|
||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
@ -498,10 +495,9 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
|||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDag); return;}
|
||||||
#endif
|
#endif
|
||||||
acceleratorFenceComputeStream();
|
|
||||||
} else if( interior ) {
|
} else if( interior ) {
|
||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagInt); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALLNB(GenericDhopSiteDagInt); return;}
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagInt); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALLNB(HandDhopSiteDagInt); return;}
|
||||||
#ifndef GRID_CUDA
|
#ifndef GRID_CUDA
|
||||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
|
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
|
||||||
#endif
|
#endif
|
||||||
|
@ -585,6 +585,7 @@ public:
|
|||||||
decompress.Exchange(mp,vp0,vp1,type,o);
|
decompress.Exchange(mp,vp0,vp1,type,o);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if ( mm.size() ) acceleratorFenceComputeStream();
|
||||||
mergetime+=usecond();
|
mergetime+=usecond();
|
||||||
|
|
||||||
decompresstime-=usecond();
|
decompresstime-=usecond();
|
||||||
@ -595,7 +596,9 @@ public:
|
|||||||
decompress.Decompress(kp,mp,o);
|
decompress.Decompress(kp,mp,o);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
if ( dd.size() ) acceleratorFenceComputeStream();
|
||||||
decompresstime+=usecond();
|
decompresstime+=usecond();
|
||||||
|
|
||||||
}
|
}
|
||||||
////////////////////////////////////////
|
////////////////////////////////////////
|
||||||
// Set up routines
|
// Set up routines
|
||||||
|
Loading…
Reference in New Issue
Block a user