1
0
mirror of https://github.com/paboyle/Grid.git synced 2026-05-27 12:34:16 +01:00

Remove hip specific files

This commit is contained in:
Peter Boyle
2026-05-21 12:34:15 -04:00
parent 7803580aa6
commit 4d527e81fa
4 changed files with 0 additions and 447 deletions
-76
View File
@@ -1,76 +0,0 @@
/*
* Isolating the hipfft HIPFFT_PARSE_ERROR on ROCm 7 / hipFFT 1.0.20.
*
* Tests three orderings with an empty rocFFT cache to find which GPU
* operation before plan creation triggers the failure:
* A) hipMalloc only — hypothesis: passes (no async GPU work)
* B) hipMalloc + hipMemset — hypothesis: fails (async GPU work in flight)
* C) hipMalloc + hipMemset — hypothesis: passes (work completed before plan)
* + hipDeviceSynchronize
*
* Compile:
* hipcc -o Test_hipfft_bug_fail Test_hipfft_bug_fail.cc -lhipfft
*
* Run with empty cache:
* rm -rf ~/.cache/
* ./Test_hipfft_bug_fail
*/
#include <cstdio>
#include <hipfft/hipfft.h>
#include <hip/hip_runtime.h>
static const char *res(hipfftResult rv) {
return rv == HIPFFT_SUCCESS ? "SUCCESS" : "PARSE_ERROR";
}
static hipfftResult makePlan(int G, int howmany) {
int n[] = {G};
hipfftHandle p;
size_t workSize = 0;
hipfftCreate(&p);
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G, nullptr, 1, G,
HIPFFT_Z2Z, howmany, &workSize);
hipfftDestroy(p);
return rv;
}
int main(void) {
hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, 0);
printf("Device: %s\n", prop.name);
#ifdef hipfftVersionMinor
printf("hipFFT version: %d.%d.%d\n\n",
hipfftVersionMajor, hipfftVersionMinor, hipfftVersionPatch);
#endif
for (int G : {4, 8, 16, 32}) {
int howmany = 512;
long nelems = (long)G * howmany;
hipfftDoubleComplex *buf = nullptr;
hipMalloc(&buf, nelems * sizeof(hipfftDoubleComplex));
// Tests ordered so each runs before a prior success can populate the cache.
// B first: hipMalloc + hipMemset (async GPU work in flight)
// If this fails, A (no hipMemset) will pass, confirming hipMemset is the trigger.
hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
hipfftResult rvB = makePlan(G, howmany);
printf("G=%-4d B) hipMalloc + hipMemset : %s\n", G, res(rvB));
// C: hipMalloc + hipMemset + sync — does syncing before plan creation fix it?
hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
hipDeviceSynchronize();
hipfftResult rvC = makePlan(G, howmany);
printf("G=%-4d C) hipMalloc + hipMemset + sync: %s\n", G, res(rvC));
// A last: hipMalloc only, no async GPU work — should always pass
hipfftResult rvA = makePlan(G, howmany);
printf("G=%-4d A) hipMalloc only : %s\n\n", G, res(rvA));
hipFree(buf);
}
return 0;
}
-61
View File
@@ -1,61 +0,0 @@
/*
* Minimal program demonstrating the workaround for the hipfft ROCm 7 bug.
*
* Workaround: create the hipfft plan BEFORE any hipMalloc. Plan creation
* for G < 32 then succeeds even with an empty rocFFT cache.
*
* Compile:
* hipcc -o Test_hipfft_bug_pass Test_hipfft_bug_pass.cc -lhipfft
*
* Run:
* rm -rf ~/.cache/rocfft
* ./Test_hipfft_bug_pass
*
* Expected: all G values succeed.
* Compare with Test_hipfft_bug_fail.cc which uses the opposite ordering.
*/
#include <cstdio>
#include <hipfft/hipfft.h>
#include <hip/hip_runtime.h>
int main(void) {
hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, 0);
printf("Device: %s\n", prop.name);
#ifdef hipfftVersionMinor
printf("hipFFT version: %d.%d.%d\n\n",
hipfftVersionMajor, hipfftVersionMinor, hipfftVersionPatch);
#endif
for (int G : {8, 16, 32}) {
int howmany = 512;
int n[] = {G};
long nelems = (long)G * howmany;
// Plan created BEFORE hipMalloc — succeeds for all G
hipfftHandle p;
size_t workSize = 0;
hipfftCreate(&p);
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G, nullptr, 1, G,
HIPFFT_Z2Z, howmany, &workSize);
printf("G=%-4d plan-then-hipMalloc: %d (%s)\n",
G, (int)rv, rv == HIPFFT_SUCCESS ? "HIPFFT_SUCCESS" : "HIPFFT_PARSE_ERROR");
if (rv == HIPFFT_SUCCESS) {
hipfftDoubleComplex *buf = nullptr;
hipMalloc(&buf, nelems * sizeof(hipfftDoubleComplex));
hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
rv = hipfftExecZ2Z(p, buf, buf, HIPFFT_FORWARD);
hipDeviceSynchronize();
printf("G=%-4d execFwd: %d (%s)\n",
G, (int)rv, rv == HIPFFT_SUCCESS ? "HIPFFT_SUCCESS" : "FAILED");
hipFree(buf);
}
hipfftDestroy(p);
}
return 0;
}
-142
View File
@@ -1,142 +0,0 @@
/*
* Minimal reproducer for hipfftMakePlanMany / hipfftPlanMany failures.
*
* Compile on Frontier (no Grid headers needed):
* hipcc -o Test_hipfft_minimal Test_hipfft_minimal.cc -lhipfft
*
* Run:
* ./Test_hipfft_minimal
*/
#include <cstdio>
#include <cstdlib>
#include <hipfft/hipfft.h>
#include <hip/hip_runtime.h>
static const char *hipfftResultString(hipfftResult r) {
switch (r) {
case HIPFFT_SUCCESS: return "HIPFFT_SUCCESS";
case HIPFFT_INVALID_PLAN: return "HIPFFT_INVALID_PLAN";
case HIPFFT_ALLOC_FAILED: return "HIPFFT_ALLOC_FAILED";
case HIPFFT_INVALID_TYPE: return "HIPFFT_INVALID_TYPE";
case HIPFFT_INVALID_VALUE: return "HIPFFT_INVALID_VALUE";
case HIPFFT_INTERNAL_ERROR: return "HIPFFT_INTERNAL_ERROR";
case HIPFFT_EXEC_FAILED: return "HIPFFT_EXEC_FAILED";
case HIPFFT_SETUP_FAILED: return "HIPFFT_SETUP_FAILED";
case HIPFFT_INVALID_SIZE: return "HIPFFT_INVALID_SIZE";
case HIPFFT_UNALIGNED_DATA: return "HIPFFT_UNALIGNED_DATA";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:return "HIPFFT_INCOMPLETE_PARAMETER_LIST";
case HIPFFT_INVALID_DEVICE: return "HIPFFT_INVALID_DEVICE";
case HIPFFT_PARSE_ERROR: return "HIPFFT_PARSE_ERROR";
case HIPFFT_NO_WORKSPACE: return "HIPFFT_NO_WORKSPACE";
case HIPFFT_NOT_IMPLEMENTED: return "HIPFFT_NOT_IMPLEMENTED";
case HIPFFT_NOT_SUPPORTED: return "HIPFFT_NOT_SUPPORTED";
default: return "UNKNOWN";
}
}
// Plan creation + execution for (G, howmany).
// Tests two orderings to isolate whether a prior hipMalloc poisons hipfft
// plan creation for small G on ROCm 7:
// A) plan BEFORE hipMalloc — hypothesis: succeeds
// B) hipMalloc BEFORE plan — hypothesis: fails for G < 32
static void tryPlanAndExec(int G, long howmany) {
int n[] = {G};
long nelems = (long)G * howmany;
printf("--- G=%-4d howmany=%-10ld total_elems=%-12ld ---\n",
G, howmany, nelems);
// --- A: create plan first, allocate buffer afterwards ---
{
hipfftHandle p;
size_t workSize = 0;
hipfftCreate(&p);
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G, nullptr, 1, G,
HIPFFT_Z2Z, (int)howmany, &workSize);
printf(" plan-first create : %d (%s)\n", (int)rv, hipfftResultString(rv));
if (rv == HIPFFT_SUCCESS) {
hipfftDoubleComplex *buf = nullptr;
hipMalloc(&buf, nelems * sizeof(hipfftDoubleComplex));
hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
rv = hipfftExecZ2Z(p, buf, buf, HIPFFT_FORWARD);
hipDeviceSynchronize();
printf(" plan-first execFwd: %d (%s)\n", (int)rv, hipfftResultString(rv));
hipFree(buf);
}
hipfftDestroy(p);
}
// --- B: hipMalloc first, create plan afterwards ---
{
hipfftDoubleComplex *buf = nullptr;
hipMalloc(&buf, nelems * sizeof(hipfftDoubleComplex));
hipMemset(buf, 0, nelems * sizeof(hipfftDoubleComplex));
hipfftHandle p;
size_t workSize = 0;
hipfftCreate(&p);
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G, nullptr, 1, G,
HIPFFT_Z2Z, (int)howmany, &workSize);
printf(" malloc-first create : %d (%s)\n", (int)rv, hipfftResultString(rv));
if (rv == HIPFFT_SUCCESS) {
rv = hipfftExecZ2Z(p, buf, buf, HIPFFT_FORWARD);
hipDeviceSynchronize();
printf(" malloc-first execFwd: %d (%s)\n", (int)rv, hipfftResultString(rv));
}
hipfftDestroy(p);
hipFree(buf);
}
printf("\n");
}
int main(void) {
// Print HIP device info
int device = 0;
hipGetDevice(&device);
hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, device);
printf("Device %d: %s warpSize=%d\n\n", device, prop.name, prop.warpSize);
#ifdef hipfftVersionMinor
printf("hipFFT version: %d.%d.%d\n\n",
hipfftVersionMajor, hipfftVersionMinor, hipfftVersionPatch);
#endif
// Original sweep with small howmany (these passed first time)
printf("=== Small howmany (original sweep) ===\n\n");
for (int G : {4, 8, 12, 16, 24, 32, 48, 64})
tryPlanAndExec(G, 512);
// Grid-realistic howmany values derived from actual lattice geometries.
// howmany = Ncomp * product(ldimensions[d] for d != dim)
// For LatticeComplexD: Ncomp=1.
printf("=== Grid-realistic parameters ===\n\n");
// --grid 16.16.16.16 4D FFT (KNOWN TO FAIL in Grid)
// Each dim: G=16, Nperp=16^3=4096
tryPlanAndExec(16, 4096);
// --grid 32.32.32.32 4D FFT (KNOWN TO SUCCEED in Grid)
// Each dim: G=32, Nperp=32^3=32768
tryPlanAndExec(32, 32768);
// --grid 32.32.32.32 Ls=8 5D DWF FFT (KNOWN TO FAIL on dim 0 in Grid)
// dim 0: G=8, Nperp=32^4=1048576
tryPlanAndExec(8, 1048576);
// dim 1-4: G=32, Nperp=8*32^3=262144
tryPlanAndExec(32, 262144);
// Extra intermediate cases to bracket the failure
tryPlanAndExec(16, 1024);
tryPlanAndExec(16, 2048);
tryPlanAndExec(16, 8192);
tryPlanAndExec(8, 4096);
tryPlanAndExec(8, 65536);
tryPlanAndExec(8, 262144);
return 0;
}
-168
View File
@@ -1,168 +0,0 @@
/*
* Reproducer for HIPFFT_PARSE_ERROR (error 12) from hipfftMakePlanMany on
* ROCm 7 / hipFFT 1.0.20 (Frontier, MI210 login and MI250X compute nodes).
*
* Observed failure: G < 32 returns HIPFFT_PARSE_ERROR from all three plan
* creation APIs (hipfftPlanMany, hipfftMakePlanMany, hipfftPlan1d) when a
* device buffer is allocated and zeroed with hipMalloc+hipMemset before the
* plan creation call. G >= 32 succeeds.
*
* Contrast with Test_hipfft_minimal.cc (plan-first ordering) which passes
* for all G even with an empty rocFFT cache.
*
* Compile on Frontier (no Grid headers needed):
* hipcc -o Test_hipfft_repro Test_hipfft_repro.cc -lhipfft
*
* Run with empty cache to reproduce the failure:
* rm -rf ~/.cache/rocfft
* ./Test_hipfft_repro
*/
#include <cstdio>
#include <cstdlib>
#include <hipfft/hipfft.h>
#include <hip/hip_runtime.h>
static const char *hipfftResultString(hipfftResult r) {
switch (r) {
case HIPFFT_SUCCESS: return "HIPFFT_SUCCESS";
case HIPFFT_INVALID_PLAN: return "HIPFFT_INVALID_PLAN";
case HIPFFT_ALLOC_FAILED: return "HIPFFT_ALLOC_FAILED";
case HIPFFT_INVALID_TYPE: return "HIPFFT_INVALID_TYPE";
case HIPFFT_INVALID_VALUE: return "HIPFFT_INVALID_VALUE";
case HIPFFT_INTERNAL_ERROR: return "HIPFFT_INTERNAL_ERROR";
case HIPFFT_EXEC_FAILED: return "HIPFFT_EXEC_FAILED";
case HIPFFT_SETUP_FAILED: return "HIPFFT_SETUP_FAILED";
case HIPFFT_INVALID_SIZE: return "HIPFFT_INVALID_SIZE";
case HIPFFT_UNALIGNED_DATA: return "HIPFFT_UNALIGNED_DATA";
case HIPFFT_INCOMPLETE_PARAMETER_LIST:return "HIPFFT_INCOMPLETE_PARAMETER_LIST";
case HIPFFT_INVALID_DEVICE: return "HIPFFT_INVALID_DEVICE";
case HIPFFT_PARSE_ERROR: return "HIPFFT_PARSE_ERROR";
case HIPFFT_NO_WORKSPACE: return "HIPFFT_NO_WORKSPACE";
case HIPFFT_NOT_IMPLEMENTED: return "HIPFFT_NOT_IMPLEMENTED";
case HIPFFT_NOT_SUPPORTED: return "HIPFFT_NOT_SUPPORTED";
default: return "UNKNOWN";
}
}
// Plan creation + execution for (G, howmany) using hipfftCreate+hipfftMakePlanMany.
// This is the path Grid's FFT.h now uses.
static void tryPlanAndExec(int G, long howmany) {
int n[] = {G};
long nelems = (long)G * howmany;
printf("--- G=%-4d howmany=%-10ld total_elems=%-12ld ---\n",
G, howmany, nelems);
// Allocate device buffer (hipfftDoubleComplex = 16 bytes each)
hipfftDoubleComplex *dbuf = nullptr;
hipError_t herr = hipMalloc(&dbuf, nelems * sizeof(hipfftDoubleComplex));
if (herr != hipSuccess) {
printf(" hipMalloc failed (%d) for %ld elems — skipping\n\n", (int)herr, nelems);
return;
}
hipMemset(dbuf, 0, nelems * sizeof(hipfftDoubleComplex));
// 1. hipfftPlanMany (one-step, nullptr embed) — current Grid path
{
hipfftHandle p;
hipfftResult rv = hipfftPlanMany(&p, 1, n,
nullptr, 1, G,
nullptr, 1, G,
HIPFFT_Z2Z, (int)howmany);
printf(" hipfftPlanMany create : %d (%s)\n", (int)rv, hipfftResultString(rv));
if (rv == HIPFFT_SUCCESS) {
rv = hipfftExecZ2Z(p, dbuf, dbuf, HIPFFT_FORWARD);
hipDeviceSynchronize();
printf(" hipfftPlanMany execFwd: %d (%s)\n", (int)rv, hipfftResultString(rv));
hipfftDestroy(p);
}
}
// 2. hipfftCreate + hipfftMakePlanMany (two-step) — also current Grid path
{
hipfftHandle p;
size_t workSize = 0;
hipfftResult rc = hipfftCreate(&p);
if (rc == HIPFFT_SUCCESS) {
hipfftResult rv = hipfftMakePlanMany(p, 1, n,
nullptr, 1, G,
nullptr, 1, G,
HIPFFT_Z2Z, (int)howmany, &workSize);
printf(" hipfftMakePlanMany : %d (%s) workSize=%zu\n",
(int)rv, hipfftResultString(rv), workSize);
if (rv == HIPFFT_SUCCESS) {
rv = hipfftExecZ2Z(p, dbuf, dbuf, HIPFFT_FORWARD);
hipDeviceSynchronize();
printf(" hipfftMakePlanMany exec : %d (%s)\n", (int)rv, hipfftResultString(rv));
}
hipfftDestroy(p);
} else {
printf(" hipfftCreate : %d (%s)\n", (int)rc, hipfftResultString(rc));
}
}
// 3. hipfftPlan1d (simplest API, batch = howmany)
{
hipfftHandle p;
hipfftResult rv = hipfftPlan1d(&p, G, HIPFFT_Z2Z, (int)howmany);
printf(" hipfftPlan1d create : %d (%s)\n", (int)rv, hipfftResultString(rv));
if (rv == HIPFFT_SUCCESS) {
rv = hipfftExecZ2Z(p, dbuf, dbuf, HIPFFT_FORWARD);
hipDeviceSynchronize();
printf(" hipfftPlan1d execFwd: %d (%s)\n", (int)rv, hipfftResultString(rv));
hipfftDestroy(p);
}
}
hipFree(dbuf);
printf("\n");
}
int main(void) {
// Print HIP device info
int device = 0;
hipGetDevice(&device);
hipDeviceProp_t prop;
hipGetDeviceProperties(&prop, device);
printf("Device %d: %s warpSize=%d\n\n", device, prop.name, prop.warpSize);
#ifdef hipfftVersionMinor
printf("hipFFT version: %d.%d.%d\n\n",
hipfftVersionMajor, hipfftVersionMinor, hipfftVersionPatch);
#endif
// Original sweep with small howmany (these passed first time)
printf("=== Small howmany (original sweep) ===\n\n");
for (int G : {4, 8, 12, 16, 24, 32, 48, 64})
tryPlanAndExec(G, 512);
// Grid-realistic howmany values derived from actual lattice geometries.
// howmany = Ncomp * product(ldimensions[d] for d != dim)
// For LatticeComplexD: Ncomp=1.
printf("=== Grid-realistic parameters ===\n\n");
// --grid 16.16.16.16 4D FFT (KNOWN TO FAIL in Grid)
// Each dim: G=16, Nperp=16^3=4096
tryPlanAndExec(16, 4096);
// --grid 32.32.32.32 4D FFT (KNOWN TO SUCCEED in Grid)
// Each dim: G=32, Nperp=32^3=32768
tryPlanAndExec(32, 32768);
// --grid 32.32.32.32 Ls=8 5D DWF FFT (KNOWN TO FAIL on dim 0 in Grid)
// dim 0: G=8, Nperp=32^4=1048576
tryPlanAndExec(8, 1048576);
// dim 1-4: G=32, Nperp=8*32^3=262144
tryPlanAndExec(32, 262144);
// Extra intermediate cases to bracket the failure
tryPlanAndExec(16, 1024);
tryPlanAndExec(16, 2048);
tryPlanAndExec(16, 8192);
tryPlanAndExec(8, 4096);
tryPlanAndExec(8, 65536);
tryPlanAndExec(8, 262144);
return 0;
}