1
0
mirror of https://github.com/paboyle/Grid.git synced 2026-05-28 13:04:17 +01:00

tests/debug: extend hipfft fail reproducer with hipMemset and sync variants

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Peter Boyle
2026-05-19 22:02:02 -04:00
parent f08c755ae6
commit 707d059766
+40 -20
View File
@@ -1,24 +1,41 @@
/* /*
* Minimal program demonstrating hipfft HIPFFT_PARSE_ERROR on ROCm 7 / hipFFT 1.0.20. * Isolating the hipfft HIPFFT_PARSE_ERROR on ROCm 7 / hipFFT 1.0.20.
* *
* Bug: hipfftMakePlanMany returns HIPFFT_PARSE_ERROR (12) for transform sizes * Tests three orderings with an empty rocFFT cache to find which GPU
* smaller than 32 when a hipMalloc is issued before plan creation. * operation before plan creation triggers the failure:
* A) hipMalloc only — hypothesis: passes (no async GPU work)
* B) hipMalloc + hipMemset — hypothesis: fails (async GPU work in flight)
* C) hipMalloc + hipMemset — hypothesis: passes (work completed before plan)
* + hipDeviceSynchronize
* *
* Compile: * Compile:
* hipcc -o Test_hipfft_bug_fail Test_hipfft_bug_fail.cc -lhipfft * hipcc -o Test_hipfft_bug_fail Test_hipfft_bug_fail.cc -lhipfft
* *
* Run with empty rocFFT cache: * Run with empty cache:
* rm -rf ~/.cache/rocfft * rm -rf ~/.cache/
* ./Test_hipfft_bug_fail * ./Test_hipfft_bug_fail
*
* Expected (broken): hipfftMakePlanMany returns 12 (HIPFFT_PARSE_ERROR) for G < 32.
* See Test_hipfft_bug_pass.cc for the workaround.
*/ */
#include <cstdio> #include <cstdio>
#include <hipfft/hipfft.h> #include <hipfft/hipfft.h>
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
static const char *res(hipfftResult rv) {
return rv == HIPFFT_SUCCESS ? "SUCCESS" : "PARSE_ERROR";
}
static hipfftResult makePlan(int G, int howmany) {
int n[] = {G};
hipfftHandle p;
size_t workSize = 0;
hipfftCreate(&p);
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G, nullptr, 1, G,
HIPFFT_Z2Z, howmany, &workSize);
hipfftDestroy(p);
return rv;
}
int main(void) { int main(void) {
hipDeviceProp_t prop; hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, 0); hipGetDeviceProperties(&prop, 0);
@@ -30,22 +47,25 @@ int main(void) {
for (int G : {8, 16, 32}) { for (int G : {8, 16, 32}) {
int howmany = 512; int howmany = 512;
int n[] = {G};
long nelems = (long)G * howmany; long nelems = (long)G * howmany;
// hipMalloc BEFORE plan creation — triggers HIPFFT_PARSE_ERROR for G < 32
hipfftDoubleComplex *buf = nullptr; hipfftDoubleComplex *buf = nullptr;
hipMalloc(&buf, nelems * sizeof(hipfftDoubleComplex)); hipMalloc(&buf, nelems * sizeof(hipfftDoubleComplex));
hipfftHandle p; // A: hipMalloc only, no GPU work
size_t workSize = 0; hipfftResult rvA = makePlan(G, howmany);
hipfftCreate(&p); printf("G=%-4d A) hipMalloc only : %s\n", G, res(rvA));
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G, nullptr, 1, G, // B: hipMalloc + hipMemset (async GPU work in flight)
HIPFFT_Z2Z, howmany, &workSize); hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
printf("G=%-4d hipMalloc-then-plan: %d (%s)\n", hipfftResult rvB = makePlan(G, howmany);
G, (int)rv, rv == HIPFFT_SUCCESS ? "HIPFFT_SUCCESS" : "HIPFFT_PARSE_ERROR"); printf("G=%-4d B) hipMalloc + hipMemset : %s\n", G, res(rvB));
hipfftDestroy(p);
// C: hipMalloc + hipMemset + sync before plan
hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
hipDeviceSynchronize();
hipfftResult rvC = makePlan(G, howmany);
printf("G=%-4d C) hipMalloc + hipMemset + sync: %s\n\n", G, res(rvC));
hipFree(buf); hipFree(buf);
} }