1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-06 20:25:56 +01:00
This commit is contained in:
nmeyer-ur 2020-04-09 16:19:25 +02:00
parent d5708e0eb2
commit d79ab03a6c
4 changed files with 74 additions and 4 deletions

View File

@ -43,7 +43,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define LOAD_CHI(base) LOAD_CHI_A64FXd(base)
#define ZERO_PSI ZERO_PSI_A64FXd
#define ADD_RESULT(A,B)
#define ADD_RESULT(base,basep) LOAD_CHI_A64FXd(base); ADD_RESULT_INTERNAL_A64FXd; RESULT_A64FXd(base)
#define XP_PROJMEM(base) LOAD_CHIMU_A64FXd(base); XP_PROJ_A64FXd
#define YP_PROJMEM(base) LOAD_CHIMU_A64FXd(base); YP_PROJ_A64FXd
#define ZP_PROJMEM(base) LOAD_CHIMU_A64FXd(base); ZP_PROJ_A64FXd
@ -691,3 +691,23 @@ asm ( \
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
);
// ADD_RESULT_INTERNAL
#define ADD_RESULT_INTERNAL_A64FXd \
asm ( \
"fadd z0.d, p5/m, z0.d, z12.d \n\t" \
"fadd z1.d, p5/m, z1.d, z13.d \n\t" \
"fadd z2.d, p5/m, z2.d, z14.d \n\t" \
"fadd z3.d, p5/m, z3.d, z15.d \n\t" \
"fadd z4.d, p5/m, z4.d, z16.d \n\t" \
"fadd z5.d, p5/m, z5.d, z17.d \n\t" \
"fadd z6.d, p5/m, z6.d, z24.d \n\t" \
"fadd z7.d, p5/m, z7.d, z25.d \n\t" \
"fadd z8.d, p5/m, z8.d, z26.d \n\t" \
"fadd z9.d, p5/m, z9.d, z27.d \n\t" \
"fadd z10.d, p5/m, z10.d, z28.d \n\t" \
"fadd z11.d, p5/m, z11.d, z29.d \n\t" \
: \
: \
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
);

View File

@ -43,7 +43,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
#define ZERO_PSI ZERO_PSI_A64FXf
#define ADD_RESULT(A,B)
#define ADD_RESULT(base,basep) LOAD_CHI_A64FXf(base); ADD_RESULT_INTERNAL_A64FXf; RESULT_A64FXf(base)
#define XP_PROJMEM(base) LOAD_CHIMU_A64FXf(base); XP_PROJ_A64FXf
#define YP_PROJMEM(base) LOAD_CHIMU_A64FXf(base); YP_PROJ_A64FXf
#define ZP_PROJMEM(base) LOAD_CHIMU_A64FXf(base); ZP_PROJ_A64FXf
@ -705,3 +705,23 @@ asm ( \
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
);
// ADD_RESULT_INTERNAL
#define ADD_RESULT_INTERNAL_A64FXf \
asm ( \
"fadd z0.f, p5/m, z0.f, z12.f \n\t" \
"fadd z1.f, p5/m, z1.f, z13.f \n\t" \
"fadd z2.f, p5/m, z2.f, z14.f \n\t" \
"fadd z3.f, p5/m, z3.f, z15.f \n\t" \
"fadd z4.f, p5/m, z4.f, z16.f \n\t" \
"fadd z5.f, p5/m, z5.f, z17.f \n\t" \
"fadd z6.f, p5/m, z6.f, z24.f \n\t" \
"fadd z7.f, p5/m, z7.f, z25.f \n\t" \
"fadd z8.f, p5/m, z8.f, z26.f \n\t" \
"fadd z9.f, p5/m, z9.f, z27.f \n\t" \
"fadd z10.f, p5/m, z10.f, z28.f \n\t" \
"fadd z11.f, p5/m, z11.f, z29.f \n\t" \
: \
: \
: "p5","cc","z0","z1","z2","z3","z4","z5","z6","z7","z8","z9","z10","z11","z12","z13","z14","z15","z16","z17","z18","z19","z20","z21","z22","z23","z24","z25","z26","z27","z28","z29","z30","z31" \
);

View File

@ -43,7 +43,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define LOAD_CHI(base) LOAD_CHI_A64FXd(base)
#define ZERO_PSI ZERO_PSI_A64FXd
#define ADD_RESULT(A,B)
#define ADD_RESULT(base,basep) LOAD_CHI_A64FXd(base); ADD_RESULT_INTERNAL_A64FXd; RESULT_A64FXd(base)
#define XP_PROJMEM(base) LOAD_CHIMU_A64FXd(base); XP_PROJ_A64FXd
#define YP_PROJMEM(base) LOAD_CHIMU_A64FXd(base); YP_PROJ_A64FXd
#define ZP_PROJMEM(base) LOAD_CHIMU_A64FXd(base); ZP_PROJ_A64FXd
@ -567,3 +567,18 @@ Author: Nils Meyer <nils.meyer@ur.de>
result_31 = __svzero(result_31); \
result_32 = __svzero(result_32);
// ADD_RESULT_INTERNAL
#define ADD_RESULT_INTERNAL_A64FXd \
result_00 = svadd_x(pg1, result_00, Chimu_00); \
result_01 = svadd_x(pg1, result_01, Chimu_01); \
result_02 = svadd_x(pg1, result_02, Chimu_02); \
result_10 = svadd_x(pg1, result_10, Chimu_10); \
result_11 = svadd_x(pg1, result_11, Chimu_11); \
result_12 = svadd_x(pg1, result_12, Chimu_12); \
result_20 = svadd_x(pg1, result_20, Chimu_20); \
result_21 = svadd_x(pg1, result_21, Chimu_21); \
result_22 = svadd_x(pg1, result_22, Chimu_22); \
result_30 = svadd_x(pg1, result_30, Chimu_30); \
result_31 = svadd_x(pg1, result_31, Chimu_31); \
result_32 = svadd_x(pg1, result_32, Chimu_32);

View File

@ -43,7 +43,7 @@ Author: Nils Meyer <nils.meyer@ur.de>
#define MAYBEPERM(A,perm) if (perm) { A ; }
#define LOAD_CHI(base) LOAD_CHI_A64FXf(base)
#define ZERO_PSI ZERO_PSI_A64FXf
#define ADD_RESULT(A,B)
#define ADD_RESULT(base,basep) LOAD_CHI_A64FXf(base); ADD_RESULT_INTERNAL_A64FXf; RESULT_A64FXf(base)
#define XP_PROJMEM(base) LOAD_CHIMU_A64FXf(base); XP_PROJ_A64FXf
#define YP_PROJMEM(base) LOAD_CHIMU_A64FXf(base); YP_PROJ_A64FXf
#define ZP_PROJMEM(base) LOAD_CHIMU_A64FXf(base); ZP_PROJ_A64FXf
@ -576,3 +576,18 @@ Author: Nils Meyer <nils.meyer@ur.de>
result_31 = __svzero(result_31); \
result_32 = __svzero(result_32);
// ADD_RESULT_INTERNAL
#define ADD_RESULT_INTERNAL_A64FXf \
result_00 = svadd_x(pg1, result_00, Chimu_00); \
result_01 = svadd_x(pg1, result_01, Chimu_01); \
result_02 = svadd_x(pg1, result_02, Chimu_02); \
result_10 = svadd_x(pg1, result_10, Chimu_10); \
result_11 = svadd_x(pg1, result_11, Chimu_11); \
result_12 = svadd_x(pg1, result_12, Chimu_12); \
result_20 = svadd_x(pg1, result_20, Chimu_20); \
result_21 = svadd_x(pg1, result_21, Chimu_21); \
result_22 = svadd_x(pg1, result_22, Chimu_22); \
result_30 = svadd_x(pg1, result_30, Chimu_30); \
result_31 = svadd_x(pg1, result_31, Chimu_31); \
result_32 = svadd_x(pg1, result_32, Chimu_32);