Added the working results from the AMD Hackathon

This commit is contained in:
Srijit Paul
2023-05-16 09:47:38 +01:00
commit 6c79bf7b8e
25 changed files with 4443 additions and 0 deletions
+135
View File
@@ -0,0 +1,135 @@
{
"SU4": [
{
"GBps": 66.53081457635281,
"GFlops": 89.29614774427355,
"L": 8,
"size_MB": 1.5
},
{
"GBps": 555.556774884554,
"GFlops": 745.6556810001229,
"L": 16,
"size_MB": 24.0
},
{
"GBps": 742.2236948591938,
"GFlops": 996.1957799176627,
"L": 24,
"size_MB": 121.5
},
{
"GBps": 826.9383434771103,
"GFlops": 1109.8978565758136,
"L": 32,
"size_MB": 384.0
},
{
"GBps": 842.2222703204826,
"GFlops": 1130.41159593417,
"L": 40,
"size_MB": 937.5
},
{
"GBps": 850.748827016926,
"GFlops": 1141.8557466087684,
"L": 48,
"size_MB": 1944.0
}
],
"axpy": [
{
"GBps": 35.82987973103704,
"GFlops": 3.206003368008695,
"L": 8,
"size_MB": 0.75
},
{
"GBps": 382.8855274375769,
"GFlops": 34.26001705116882,
"L": 16,
"size_MB": 12.0
},
{
"GBps": 837.408777973824,
"GFlops": 74.93006905793541,
"L": 24,
"size_MB": 60.75
},
{
"GBps": 1044.6773688059338,
"GFlops": 93.47614862276701,
"L": 32,
"size_MB": 192.0
},
{
"GBps": 1094.1823360678761,
"GFlops": 97.90577810984188,
"L": 40,
"size_MB": 468.75
},
{
"GBps": 1118.5658428592028,
"GFlops": 100.08757736464482,
"L": 48,
"size_MB": 972.0
},
{
"GBps": 1117.1386444994318,
"GFlops": 99.9598738171423,
"L": 56,
"size_MB": 1800.75
},
{
"GBps": 1130.3969388850894,
"GFlops": 101.14620591687438,
"L": 64,
"size_MB": 3072.0
}
],
"flops": {
"comparison_point_Gflops": 1735.1326862618785,
"results": [
{
"Gflops_dwf4": 900.4946564885497,
"Gflops_staggered": 67.41176470588235,
"Gflops_wilson": 90.09269442262372,
"L": 8
},
{
"Gflops_dwf4": 1550.326392108142,
"Gflops_staggered": 315.7179146516448,
"Gflops_wilson": 409.4796356156333,
"L": 12
},
{
"Gflops_dwf4": 1699.1907401453284,
"Gflops_staggered": 499.0448649798001,
"Gflops_wilson": 727.2638879714644,
"L": 16
},
{
"Gflops_dwf4": 1799.952141959478,
"Gflops_staggered": 747.3489951882253,
"Gflops_wilson": 1205.3232994907394,
"L": 24
},
{
"Gflops_dwf4": 1670.313230564279,
"Gflops_staggered": 809.4875619745635,
"Gflops_wilson": 1172.5954144104264,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
1
],
"nodes": 1,
"ranks": 1
}
}
+247
View File
@@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 132.89061835546906,
"GFlops": 178.36276868186152,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 1115.8992219013821,
"GFlops": 1497.7345824057134,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 1529.8814171914853,
"GFlops": 2053.372079248613,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 1654.336076127236,
"GFlops": 2220.4122948623262,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 1684.3951354059177,
"GFlops": 2260.756881284346,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 1701.2803710051417,
"GFlops": 2283.4198608730717,
"L": 48,
"size_MB": 3888.0
}
],
"axpy": [
{
"GBps": 72.90804934027936,
"GFlops": 6.523701823576131,
"L": 8,
"size_MB": 1.5
},
{
"GBps": 768.9192583517447,
"GFlops": 68.80173058094414,
"L": 16,
"size_MB": 24.0
},
{
"GBps": 1671.395675609412,
"GFlops": 149.5539534462135,
"L": 24,
"size_MB": 121.5
},
{
"GBps": 2005.7565212158895,
"GFlops": 179.47205546585369,
"L": 32,
"size_MB": 384.0
},
{
"GBps": 2189.105793295759,
"GFlops": 195.8778706185296,
"L": 40,
"size_MB": 937.5
},
{
"GBps": 2228.4889350615904,
"GFlops": 199.4018144914041,
"L": 48,
"size_MB": 1944.0
},
{
"GBps": 2235.692776103882,
"GFlops": 200.0464032764505,
"L": 56,
"size_MB": 3601.5
},
{
"GBps": 2259.686995689647,
"GFlops": 202.19336970174015,
"L": 64,
"size_MB": 6144.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.05625709399294237,
"max": 174.04084158415841,
"mean": 172.69858034091467
},
"time_usec": 101.785
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.08879742168931712,
"max": 174.04084158415841,
"mean": 172.56295096451186
},
"time_usec": 101.865
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.022661574665316316,
"max": 185.394287109375,
"mean": 184.92042851131475
},
"time_usec": 320.82
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.01801934346116572,
"max": 185.39428710937497,
"mean": 184.94637011924246
},
"time_usec": 320.775
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.00908164542769838,
"max": 188.5053619302949,
"mean": 188.15351989242637
},
"time_usec": 747.395
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.00977151592705336,
"max": 188.5053619302949,
"mean": 188.12834868460658
},
"time_usec": 747.495
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.004645137235265924,
"max": 189.41945043103448,
"mean": 189.36590558876455
},
"time_usec": 1450.41
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 6.57927354071246,
"max": 189.41945043103448,
"mean": 179.56563159907688
},
"time_usec": 1529.57
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.0025216686764181592,
"max": 189.99574659727782,
"mean": 189.91743796593076
},
"time_usec": 2499.03
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.0029097878818057873,
"max": 189.99574659727784,
"mean": 189.9109784863842
},
"time_usec": 2499.115
}
],
"flops": {
"comparison_point_Gflops": 2967.4156661909574,
"results": [
{
"Gflops_dwf4": 666.5081421393546,
"Gflops_staggered": 21.14174015655824,
"Gflops_wilson": 65.28883512417278,
"L": 8
},
{
"Gflops_dwf4": 1872.5305322568,
"Gflops_staggered": 103.94394142193528,
"Gflops_wilson": 322.3361554476059,
"L": 12
},
{
"Gflops_dwf4": 2378.065733107743,
"Gflops_staggered": 302.54207956687776,
"Gflops_wilson": 859.270520750005,
"L": 16
},
{
"Gflops_dwf4": 3017.7954712341593,
"Gflops_staggered": 805.6876803824425,
"Gflops_wilson": 1573.6522138072685,
"L": 24
},
{
"Gflops_dwf4": 2917.035861147756,
"Gflops_staggered": 1173.8616609748283,
"Gflops_wilson": 1952.2605662484034,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
2
],
"nodes": 1,
"ranks": 2
}
}
+247
View File
@@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 258.80472857326896,
"GFlops": 347.3618266476084,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 2205.1617118588697,
"GFlops": 2959.7179483828813,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 3072.390061538835,
"GFlops": 4123.692135895226,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 3306.845905879817,
"GFlops": 4438.373443332908,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 3368.350185489805,
"GFlops": 4520.923090048202,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 3401.890050913554,
"GFlops": 4565.939535394216,
"L": 48,
"size_MB": 7776.0
}
],
"axpy": [
{
"GBps": 144.92557588593007,
"GFlops": 12.96772101633408,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 1525.0954455566346,
"GFlops": 136.46323045717278,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 3349.410690756415,
"GFlops": 299.7001953681577,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 4001.347120197133,
"GFlops": 358.03447960813475,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 4373.946464587173,
"GFlops": 391.3741045803486,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 4472.851142619264,
"GFlops": 400.223945363041,
"L": 48,
"size_MB": 3888.0
},
{
"GBps": 4474.64035673346,
"GFlops": 400.38404153191635,
"L": 56,
"size_MB": 7203.0
},
{
"GBps": 4520.778628772496,
"GFlops": 404.51242422986655,
"L": 64,
"size_MB": 12288.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.07403621163429362,
"max": 251.11607142857142,
"mean": 246.10605530276513
},
"time_usec": 142.85
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.05078117997981672,
"max": 240.79623287671234,
"mean": 240.37639738812348
},
"time_usec": 146.255
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.05243158900728836,
"max": 261.34877477973566,
"mean": 259.53091506627584
},
"time_usec": 457.18
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.05276589391871642,
"max": 254.61876341201716,
"mean": 253.79909038406007
},
"time_usec": 467.505
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.034606553242195574,
"max": 264.08450704225345,
"mean": 263.0926600655743
},
"time_usec": 1069.015
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.1319150423789226,
"max": 258.0275229357798,
"mean": 256.99247982894576
},
"time_usec": 1094.39
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.015978094946140938,
"max": 266.5290665938864,
"mean": 264.2093825678961
},
"time_usec": 2079.095
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.009886226966989408,
"max": 259.35618803116154,
"mean": 258.3413626594304
},
"time_usec": 2126.32
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.009038617805051087,
"max": 265.14490223463685,
"mean": 264.87447870958556
},
"time_usec": 3583.655
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.011755047936156046,
"max": 259.2785441136302,
"mean": 259.0142943487046
},
"time_usec": 3664.735
}
],
"flops": {
"comparison_point_Gflops": 5739.959058528701,
"results": [
{
"Gflops_dwf4": 1303.838629455651,
"Gflops_staggered": 45.61681616310823,
"Gflops_wilson": 122.82790780695686,
"L": 8
},
{
"Gflops_dwf4": 3626.7536546219576,
"Gflops_staggered": 222.3231637149513,
"Gflops_wilson": 602.8115590932687,
"L": 12
},
{
"Gflops_dwf4": 4685.72914071401,
"Gflops_staggered": 635.7870784234051,
"Gflops_wilson": 1585.9766281938169,
"L": 16
},
{
"Gflops_dwf4": 5931.296254256527,
"Gflops_staggered": 1539.663555337785,
"Gflops_wilson": 3106.4995401978545,
"L": 24
},
{
"Gflops_dwf4": 5548.621862800875,
"Gflops_staggered": 2327.4564566018657,
"Gflops_wilson": 3871.899246657765,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
4
],
"nodes": 1,
"ranks": 4
}
}
+357
View File
@@ -0,0 +1,357 @@
{
"SU4": [
{
"GBps": 266.1848261241281,
"GFlops": 357.2672259045552,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 2218.808483408791,
"GFlops": 2978.034335102536,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 2916.4890368817046,
"GFlops": 3914.445322671706,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 3244.2460692554414,
"GFlops": 4354.353364883959,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 3374.5218625301145,
"GFlops": 4529.206574751202,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 3394.431233013875,
"GFlops": 4555.928479473609,
"L": 48,
"size_MB": 7776.0
}
],
"axpy": [
{
"GBps": 141.29030070343728,
"GFlops": 12.64244209923477,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 1521.8306608296007,
"GFlops": 136.1711024648584,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 3348.7625319112517,
"GFlops": 299.64219909643714,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 4183.327348851153,
"GFlops": 374.3177948287102,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 4378.710152219234,
"GFlops": 391.8003521342666,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 4336.452251813764,
"GFlops": 388.01917921261816,
"L": 48,
"size_MB": 3888.0
},
{
"GBps": 4469.4883078108915,
"GFlops": 399.92304399796166,
"L": 56,
"size_MB": 7203.0
},
{
"GBps": 4517.408964798091,
"GFlops": 404.21091180135454,
"L": 64,
"size_MB": 12288.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 2,
"rate_GBps": {
"error": 0.09195178515748137,
"max": 249.33510638297872,
"mean": 247.17886521830837
},
"time_usec": 142.23
},
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.10825452600087834,
"max": 348.08168316831683,
"mean": 344.93965855573
},
"time_usec": 101.92
},
{
"L": 16,
"bytes": 4718592,
"dir": 6,
"rate_GBps": {
"error": 0.10591381085828204,
"max": 249.33510638297872,
"mean": 247.29187915450356
},
"time_usec": 142.165
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.10943725965663166,
"max": 348.08168316831683,
"mean": 344.80433503334643
},
"time_usec": 101.96
},
{
"L": 24,
"bytes": 15925248,
"dir": 2,
"rate_GBps": {
"error": 1.3232642449040395,
"max": 260.7743818681319,
"mean": 257.60107630182046
},
"time_usec": 460.605
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.04258490698369876,
"max": 370.78857421875,
"mean": 369.7659402901351
},
"time_usec": 320.885
},
{
"L": 24,
"bytes": 15925248,
"dir": 6,
"rate_GBps": {
"error": 0.38957868571862236,
"max": 260.77438186813185,
"mean": 258.7696281554986
},
"time_usec": 458.525
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.04510765547263382,
"max": 370.78857421874994,
"mean": 369.7140926370236
},
"time_usec": 320.93
},
{
"L": 32,
"bytes": 37748736,
"dir": 2,
"rate_GBps": {
"error": 0.04128040141253437,
"max": 263.3426966292135,
"mean": 262.2622155911973
},
"time_usec": 1072.4
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.019030171173062077,
"max": 376.50602409638554,
"mean": 376.03786450603667
},
"time_usec": 747.93
},
{
"L": 32,
"bytes": 37748736,
"dir": 6,
"rate_GBps": {
"error": 14.333669622239718,
"max": 263.3426966292135,
"mean": 246.90111665145025
},
"time_usec": 1139.12
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.04165357676010385,
"max": 376.50602409638554,
"mean": 375.9272873086948
},
"time_usec": 748.15
},
{
"L": 40,
"bytes": 73728000,
"dir": 2,
"rate_GBps": {
"error": 0.008823799570124044,
"max": 263.8407330691643,
"mean": 263.62484432777194
},
"time_usec": 2083.705
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.008612844793861731,
"max": 378.83890086206895,
"mean": 378.61303860111036
},
"time_usec": 1450.865
},
{
"L": 40,
"bytes": 73728000,
"dir": 6,
"rate_GBps": {
"error": 0.016608523471027856,
"max": 263.9675186208553,
"mean": 263.6128257270371
},
"time_usec": 2083.8
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.007834602992146382,
"max": 378.83890086206895,
"mean": 378.6104290514345
},
"time_usec": 1450.875
},
{
"L": 48,
"bytes": 127401984,
"dir": 2,
"rate_GBps": {
"error": 0.0075998552608142755,
"max": 264.553720735786,
"mean": 264.31025769184185
},
"time_usec": 3591.305
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.00462628204154535,
"max": 379.99149319455563,
"mean": 379.81511787960756
},
"time_usec": 2499.16
},
{
"L": 48,
"bytes": 127401984,
"dir": 6,
"rate_GBps": {
"error": 0.007229146291832468,
"max": 264.6274742124338,
"mean": 264.31062567872357
},
"time_usec": 3591.3
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.005141655059361934,
"max": 379.9914931945557,
"mean": 379.81435799557056
},
"time_usec": 2499.165
}
],
"flops": {
"comparison_point_Gflops": 5098.280833532175,
"results": [
{
"Gflops_dwf4": 794.9684468940287,
"Gflops_staggered": 25.398732773127435,
"Gflops_wilson": 82.30088654337784,
"L": 8
},
{
"Gflops_dwf4": 2766.7442233712513,
"Gflops_staggered": 123.79572508426367,
"Gflops_wilson": 388.0367023572493,
"L": 12
},
{
"Gflops_dwf4": 3917.4114123908494,
"Gflops_staggered": 360.4491008480393,
"Gflops_wilson": 1119.746558014772,
"L": 16
},
{
"Gflops_dwf4": 5085.322691715904,
"Gflops_staggered": 1142.7657184076463,
"Gflops_wilson": 2488.4866397303394,
"L": 24
},
{
"Gflops_dwf4": 5111.238975348448,
"Gflops_staggered": 1963.1988001901661,
"Gflops_wilson": 3410.556163528253,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
2,
2
],
"nodes": 1,
"ranks": 4
}
}
+247
View File
@@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 357.4010431521607,
"GFlops": 479.69555996712967,
"L": 8,
"size_MB": 9.0
},
{
"GBps": 2964.027681822972,
"GFlops": 3978.2506118338624,
"L": 16,
"size_MB": 144.0
},
{
"GBps": 4300.052851129693,
"GFlops": 5771.433239585496,
"L": 24,
"size_MB": 729.0
},
{
"GBps": 4872.567325646307,
"GFlops": 6539.849159752834,
"L": 32,
"size_MB": 2304.0
},
{
"GBps": 5014.291917307407,
"GFlops": 6730.06868669764,
"L": 40,
"size_MB": 5625.0
},
{
"GBps": 5087.723804015017,
"GFlops": 6828.627296664129,
"L": 48,
"size_MB": 11664.0
}
],
"axpy": [
{
"GBps": 191.2791174636559,
"GFlops": 17.11536570654468,
"L": 8,
"size_MB": 4.5
},
{
"GBps": 2189.0619869385964,
"GFlops": 195.87395089204273,
"L": 16,
"size_MB": 72.0
},
{
"GBps": 4600.586453416044,
"GFlops": 411.6535074967195,
"L": 24,
"size_MB": 364.5
},
{
"GBps": 6066.590946624111,
"GFlops": 542.8293690408383,
"L": 32,
"size_MB": 1152.0
},
{
"GBps": 6435.680928548642,
"GFlops": 575.8549815751528,
"L": 40,
"size_MB": 2812.5
},
{
"GBps": 6423.987885321036,
"GFlops": 574.8087057782093,
"L": 48,
"size_MB": 5832.0
},
{
"GBps": 6651.207054039447,
"GFlops": 595.139932833832,
"L": 56,
"size_MB": 10804.5
},
{
"GBps": 6750.396585799414,
"GFlops": 604.0152618966363,
"L": 64,
"size_MB": 18432.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.23180937816140193,
"max": 301.3392857142857,
"mean": 284.1444851554502
},
"time_usec": 185.59
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.0720558271254274,
"max": 288.1659836065574,
"mean": 287.1226145427817
},
"time_usec": 183.665
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.08914672001181612,
"max": 294.1793646694215,
"mean": 291.0166629195111
},
"time_usec": 611.575
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.02033965870016051,
"max": 300.13240408937605,
"mean": 299.43809148264984
},
"time_usec": 594.375
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.11175876047399072,
"max": 297.3044397463002,
"mean": 293.6948288825151
},
"time_usec": 1436.44
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.01769364687591873,
"max": 302.8535534816941,
"mean": 302.39876137467337
},
"time_usec": 1395.095
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 4.399658341772243,
"max": 426.48789305124217,
"mean": 290.9026944191917
},
"time_usec": 2832.475
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.010262975788769115,
"max": 303.93751729066764,
"mean": 303.5115263803478
},
"time_usec": 2714.805
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.04733542501238632,
"max": 296.2605337078652,
"mean": 294.4266465533552
},
"time_usec": 4835.935
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.005712416455895626,
"max": 304.23677884615387,
"mean": 304.07369658580546
},
"time_usec": 4682.51
}
],
"flops": {
"comparison_point_Gflops": 8645.41832237255,
"results": [
{
"Gflops_dwf4": 1800.6620388878794,
"Gflops_staggered": 66.91525617022894,
"Gflops_wilson": 198.73254554643688,
"L": 8
},
{
"Gflops_dwf4": 5046.479674060661,
"Gflops_staggered": 310.38726587194475,
"Gflops_wilson": 888.2136619568682,
"L": 12
},
{
"Gflops_dwf4": 6946.311220935582,
"Gflops_staggered": 862.8639792893744,
"Gflops_wilson": 2194.256560154122,
"L": 16
},
{
"Gflops_dwf4": 8923.959284555593,
"Gflops_staggered": 2010.9408836077712,
"Gflops_wilson": 4488.6948258506145,
"L": 24
},
{
"Gflops_dwf4": 8366.877360189508,
"Gflops_staggered": 3236.252709292388,
"Gflops_wilson": 5745.6687676840165,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
6
],
"nodes": 1,
"ranks": 6
}
}
+357
View File
@@ -0,0 +1,357 @@
{
"SU4": [
{
"GBps": 358.7697863751814,
"GFlops": 481.5326560232221,
"L": 8,
"size_MB": 9.0
},
{
"GBps": 3277.0383987985815,
"GFlops": 4398.366484555036,
"L": 16,
"size_MB": 144.0
},
{
"GBps": 4641.25930543841,
"GFlops": 6229.3927903480135,
"L": 24,
"size_MB": 729.0
},
{
"GBps": 4957.208883318319,
"GFlops": 6653.453135404018,
"L": 32,
"size_MB": 2304.0
},
{
"GBps": 5052.323232718746,
"GFlops": 6781.113454171254,
"L": 40,
"size_MB": 5625.0
},
{
"GBps": 5111.066489576509,
"GFlops": 6859.957318878948,
"L": 48,
"size_MB": 11664.0
}
],
"axpy": [
{
"GBps": 212.79755008545004,
"GFlops": 19.040802464290206,
"L": 8,
"size_MB": 4.5
},
{
"GBps": 2239.4521802311115,
"GFlops": 200.38278906351087,
"L": 16,
"size_MB": 72.0
},
{
"GBps": 5003.072969176254,
"GFlops": 447.6673912940339,
"L": 24,
"size_MB": 364.5
},
{
"GBps": 6260.956674179814,
"GFlops": 560.220919943234,
"L": 32,
"size_MB": 1152.0
},
{
"GBps": 6339.748894764766,
"GFlops": 567.2711284972254,
"L": 40,
"size_MB": 2812.5
},
{
"GBps": 6712.262796318121,
"GFlops": 600.60310817383,
"L": 48,
"size_MB": 5832.0
},
{
"GBps": 6707.207092952936,
"GFlops": 600.1507314944187,
"L": 56,
"size_MB": 10804.5
},
{
"GBps": 6777.5805167152075,
"GFlops": 606.4476388603874,
"L": 64,
"size_MB": 18432.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 2,
"rate_GBps": {
"error": 1.022994453232109,
"max": 349.23427152317885,
"mean": 321.77670317600763
},
"time_usec": 163.885
},
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 1.3634069536717788,
"max": 319.60227272727275,
"mean": 273.5965913512672
},
"time_usec": 192.745
},
{
"L": 16,
"bytes": 4718592,
"dir": 6,
"rate_GBps": {
"error": 1.033374997957526,
"max": 349.23427152317873,
"mean": 321.8159765660757
},
"time_usec": 163.865
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.8946713335979535,
"max": 277.5493421052632,
"mean": 261.8000049645038
},
"time_usec": 201.43
},
{
"L": 24,
"bytes": 15925248,
"dir": 2,
"rate_GBps": {
"error": 1.132373084736386,
"max": 364.7100730020492,
"mean": 336.4019839244705
},
"time_usec": 529.065
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 1.3763342157193355,
"max": 329.58984375000006,
"mean": 286.25184457704404
},
"time_usec": 621.755
},
{
"L": 24,
"bytes": 15925248,
"dir": 6,
"rate_GBps": {
"error": 1.1706725283255819,
"max": 366.96601159793806,
"mean": 335.8972476220133
},
"time_usec": 529.86
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.7245132697488086,
"max": 290.81456801470586,
"mean": 273.5395614001383
},
"time_usec": 650.65
},
{
"L": 32,
"bytes": 37748736,
"dir": 2,
"rate_GBps": {
"error": 1.1347849184170162,
"max": 369.41768826619966,
"mean": 340.002417794971
},
"time_usec": 1240.8
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 1.2934932973816882,
"max": 330.88235294117646,
"mean": 289.3945266277263
},
"time_usec": 1457.785
},
{
"L": 32,
"bytes": 37748736,
"dir": 6,
"rate_GBps": {
"error": 1.1400178079013707,
"max": 369.41768826619966,
"mean": 339.97775789957205
},
"time_usec": 1240.89
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.6655050724500384,
"max": 294.3998604326588,
"mean": 274.9840305570402
},
"time_usec": 1534.18
},
{
"L": 40,
"bytes": 73728000,
"dir": 2,
"rate_GBps": {
"error": 1.1580061344215817,
"max": 369.99308907723395,
"mean": 341.6741760070162
},
"time_usec": 2411.58
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 1.237808834778611,
"max": 326.19739088479815,
"mean": 289.9226295719639
},
"time_usec": 2842.05
},
{
"L": 40,
"bytes": 73728000,
"dir": 6,
"rate_GBps": {
"error": 1.1555010647779187,
"max": 370.49218047437046,
"mean": 341.65150860996624
},
"time_usec": 2411.74
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.21759524648814468,
"max": 284.915148469917,
"mean": 275.82309600044186
},
"time_usec": 2987.33
},
{
"L": 48,
"bytes": 127401984,
"dir": 2,
"rate_GBps": {
"error": 1.1492026962470794,
"max": 371.07847928068804,
"mean": 341.99590348018035
},
"time_usec": 4163.29
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 1.188423218598923,
"max": 324.26056137554093,
"mean": 291.58641440578003
},
"time_usec": 4883.04
},
{
"L": 48,
"bytes": 127401984,
"dir": 6,
"rate_GBps": {
"error": 1.1605366789704334,
"max": 370.88515889554566,
"mean": 342.08382185468326
},
"time_usec": 4162.22
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 4.979712514429859,
"max": 414.626710832848,
"mean": 266.3140556540118
},
"time_usec": 5346.425
}
],
"flops": {
"comparison_point_Gflops": 7494.348906712593,
"results": [
{
"Gflops_dwf4": 1062.0966590565613,
"Gflops_staggered": 37.17051075623598,
"Gflops_wilson": 116.09729212946039,
"L": 8
},
{
"Gflops_dwf4": 3672.4475061677776,
"Gflops_staggered": 171.9754714887417,
"Gflops_wilson": 543.8738534795675,
"L": 12
},
{
"Gflops_dwf4": 5567.914366086386,
"Gflops_staggered": 470.0381099405445,
"Gflops_wilson": 1539.9924935600807,
"L": 16
},
{
"Gflops_dwf4": 7427.264147643517,
"Gflops_staggered": 1388.1232831701386,
"Gflops_wilson": 3379.021526181515,
"L": 24
},
{
"Gflops_dwf4": 7561.433665781668,
"Gflops_staggered": 2602.387615970339,
"Gflops_wilson": 4936.722401653414,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
2,
3
],
"nodes": 1,
"ranks": 6
}
}
+247
View File
@@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 533.0451116736091,
"GFlops": 715.4410381033807,
"L": 8,
"size_MB": 12.0
},
{
"GBps": 4469.567213462115,
"GFlops": 5998.951565341761,
"L": 16,
"size_MB": 192.0
},
{
"GBps": 6185.220631656113,
"GFlops": 8301.662603596083,
"L": 24,
"size_MB": 972.0
},
{
"GBps": 6611.964226824657,
"GFlops": 8874.42816141682,
"L": 32,
"size_MB": 3072.0
},
{
"GBps": 6754.492030874098,
"GFlops": 9065.725741780272,
"L": 40,
"size_MB": 7500.0
},
{
"GBps": 6793.3111325316295,
"GFlops": 9117.827858055021,
"L": 48,
"size_MB": 15552.0
}
],
"axpy": [
{
"GBps": 285.8864982954018,
"GFlops": 25.580690844723133,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 3045.4379339749057,
"GFlops": 272.5011735087506,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 6694.684891454758,
"GFlops": 599.0302638713229,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 8366.596364016663,
"GFlops": 748.6303700475851,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 8757.573729403726,
"GFlops": 783.6144325020367,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 8661.613670162333,
"GFlops": 775.0280717486198,
"L": 48,
"size_MB": 7776.0
},
{
"GBps": 8945.21348352366,
"GFlops": 800.4041534890073,
"L": 56,
"size_MB": 14406.0
},
{
"GBps": 9038.85010761881,
"GFlops": 808.7826167847682,
"L": 64,
"size_MB": 24576.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.34865458387881154,
"max": 431.36503067484665,
"mean": 375.751503006012
},
"time_usec": 187.125
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 1.7026849845201326,
"max": 502.23214285714283,
"mean": 486.6590531561462
},
"time_usec": 144.48
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.3305149138573203,
"max": 453.7374521988528,
"mean": 382.9904092897145
},
"time_usec": 619.61
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.07177810715329609,
"max": 519.2662746170679,
"mean": 516.7391150500289
},
"time_usec": 459.235
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.3505158151745815,
"max": 459.93458708094846,
"mean": 385.65037793737037
},
"time_usec": 1458.575
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.05229598091368635,
"max": 526.1927034611787,
"mean": 524.4192930361779
},
"time_usec": 1072.615
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.3292408794418583,
"max": 459.10272147931465,
"mean": 386.53290497928595
},
"time_usec": 2842.275
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 15.163663479257934,
"max": 549.0418853073463,
"mean": 505.73238896867923
},
"time_usec": 2172.36
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.5633684126271865,
"max": 461.79457552906837,
"mean": 388.1900261426014
},
"time_usec": 4890.485
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.028472934949022477,
"max": 529.5502092050209,
"mean": 528.4182003406927
},
"time_usec": 3592.68
}
],
"flops": {
"comparison_point_Gflops": 11493.800236285882,
"results": [
{
"Gflops_dwf4": 2246.9867854895488,
"Gflops_staggered": 84.85512852959255,
"Gflops_wilson": 237.77747062888736,
"L": 8
},
{
"Gflops_dwf4": 6598.469072470059,
"Gflops_staggered": 409.58058567021146,
"Gflops_wilson": 1130.4581186873809,
"L": 12
},
{
"Gflops_dwf4": 9063.438545342291,
"Gflops_staggered": 1129.8800379112695,
"Gflops_wilson": 2841.669376693767,
"L": 16
},
{
"Gflops_dwf4": 11850.835518731748,
"Gflops_staggered": 2651.024919468422,
"Gflops_wilson": 5889.787064860617,
"L": 24
},
{
"Gflops_dwf4": 11136.764953840015,
"Gflops_staggered": 4278.104069949589,
"Gflops_wilson": 7615.951579083893,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
8
],
"nodes": 1,
"ranks": 8
}
}
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
View File
BIN
View File
Binary file not shown.
BIN
View File
Binary file not shown.
Executable
+34
View File
@@ -0,0 +1,34 @@
#!/bin/bash
export global_rank=${OMPI_COMM_WORLD_RANK}
export local_rank=${OMPI_COMM_WORLD_LOCAL_RANK}
export ranks_per_node=${OMPI_COMM_WORLD_LOCAL_SIZE}
if [ -z "${NUM_CPUS}" ]; then
let NUM_CPUS=96
fi
if [ -z "${RANK_STRIDE}" ]; then
let RANK_STRIDE=$(( ${NUM_CPUS}/${ranks_per_node} ))
fi
if [ -z "${OMP_STRIDE}" ]; then
let OMP_STRIDE=1
fi
if [ -z "${NUM_GPUS}" ]; then
let NUM_GPUS=8
fi
if [ -z "${GPU_START}" ]; then
let GPU_START=0
fi
if [ -z "${GPU_STRIDE}" ]; then
let GPU_STRIDE=1
fi
cpu_list=($(seq 0 95))
let cpus_per_gpu=${NUM_CPUS}/${NUM_GPUS}
let cpu_start_index=$(( ($RANK_STRIDE*${local_rank})+${GPU_START}*$cpus_per_gpu ))
let cpu_start=${cpu_list[$cpu_start_index]}
let cpu_stop=$(($cpu_start+$OMP_NUM_THREADS*$OMP_STRIDE-1))
gpu_list=(0 1 2 3 4 5 6 7)
let ranks_per_gpu=$(((${ranks_per_node}+${NUM_GPUS}-1)/${NUM_GPUS}))
let my_gpu_index=$(($local_rank*$GPU_STRIDE/$ranks_per_gpu))+${GPU_START}
let my_gpu=${gpu_list[${my_gpu_index}]}
export GOMP_CPU_AFFINITY=$cpu_start-$cpu_stop:$OMP_STRIDE
export ROCR_VISIBLE_DEVICES=$my_gpu
"$@"
+12
View File
@@ -0,0 +1,12 @@
--------------------------------------------------------------------------
WARNING: There was an error initializing an OpenFabrics device.
Local host: ubb-r09-09
Local device: mlx5_0
--------------------------------------------------------------------------
--------------------------------------------------------------------------
WARNING: There was an error initializing an OpenFabrics device.
Local host: ubb-r09-09
Local device: mlx5_0
--------------------------------------------------------------------------
+899
View File
@@ -0,0 +1,899 @@
[1680706877.725400] [ubb-r09-09:4143038:0] parser.c:1908 UCX WARN unused env variable: UCX_HOME (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)
MPI 000 - OMP 001 - HWT 001 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 000 - HWT 000 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 002 - HWT 002 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 003 - HWT 003 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 004 - HWT 004 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 005 - HWT 005 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 007 - HWT 007 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 006 - HWT 006 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
world_rank 0 has 1 devices
AcceleratorHipInit: ========================
AcceleratorHipInit: Device Number : 0
AcceleratorHipInit: ========================
AcceleratorHipInit: Device identifier: AMD Instinct MI250X/MI250
AcceleratorHipInit: totalGlobalMem: 68702699520
AcceleratorHipInit: isMultiGpuBoard: 0
AcceleratorHipInit: warpSize: 64
AcceleratorHipInit: using default device
AcceleratorHipInit: assume user or srun sets ROCR_VISIBLE_DEVICES and numa binding
AcceleratorHipInit: Configure options --enable-setdevice=no
local rank 0 device 0 bus id: 0000:29:00.0
AcceleratorHipInit: ================================================
[1680706879.831292] [ubb-r09-09:4143063:0] parser.c:1908 UCX WARN unused env variable: UCX_HOME (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)
SharedMemoryMpi: World communicator of size 1
SharedMemoryMpi: Node communicator of size 1
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x7fc341800000 - 7fc3c17fffff for comms buffers
Setting up IPC
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|_ | | | | | | | | | | | | _|__
__|_ _|__
__|_ GGGG RRRR III DDDD _|__
__|_ G R R I D D _|__
__|_ G R R I D D _|__
__|_ G GG RRRR I D D _|__
__|_ G G R R I D D _|__
__|_ GGGG R R III DDDD _|__
__|_ _|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
| | | | | | | | | | | | | |
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Current Grid git commit hash=af64c1c6b6dd52109e4cc87e4977ad03f6426060: (HEAD -> develop, origin/develop, origin/HEAD) uncommited changes
Grid : Message : ================================================
Grid : Message : MPI is initialised and logging filters activated
Grid : Message : ================================================
Grid : Message : Requested 2147483648 byte stencil comms buffers
Grid : Message : MemoryManager Cache 54962159616 bytes
Grid : Message : MemoryManager::Init() setting up
Grid : Message : MemoryManager::Init() cache pool for recent host allocations: SMALL 8 LARGE 2 HUGE 0
Grid : Message : MemoryManager::Init() cache pool for recent device allocations: SMALL 16 LARGE 8 Huge 0
Grid : Message : MemoryManager::Init() cache pool for recent shared allocations: SMALL 16 LARGE 8 Huge 0
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
Grid : Message : MemoryManager::Init() Using hipMalloc
Grid : Message : 0.537200 s : ===============================================================================
Grid : Message : 0.538200 s : Grid Default Decomposition patterns
Grid : Message : 0.538400 s : ------------------------------------------
Grid : Message : 0.538700 s : * OpenMP threads : 1
Grid : Message : 0.538800 s : * MPI tasks : 1 1 1 1
Grid : Message : 0.539600 s : * vReal : 512bits ; 1 2 2 2
Grid : Message : 0.540000 s : * vRealF : 512bits ; 2 2 2 2
Grid : Message : 0.540300 s : * vRealD : 512bits ; 1 2 2 2
Grid : Message : 0.540600 s : * vComplex : 512bits ; 1 1 2 2
Grid : Message : 0.540800 s : * vComplexF : 512bits ; 1 2 2 2
Grid : Message : 0.541100 s : * vComplexD : 512bits ; 1 1 2 2
Grid : Message : 0.541300 s : * ranks : 1
Grid : Message : 0.541600 s : * nodes : 1
Grid : Message : 0.541700 s : * ranks/node : 1
Grid : Message : 0.542700 s : ===============================================================================
Grid : Message : 0.542900 s : Memory benchmark
Grid : Message : 0.543000 s : ===============================================================================
Grid : Message : 0.543300 s : Benchmarking a*x + y bandwidth
Grid : Message : 0.543400 s : ------------------------------------------
Grid : Message : 0.543800 s : L size (MB/node) time (usec) GB/s/node Gflop/s/node
Grid : Message : 2.104895 s : 8 0.75 20.44 35.83 3.21
Grid : Message : 2.501739 s : 16 12.00 30.61 382.89 34.26
Grid : Message : 2.790328 s : 24 60.75 70.84 837.41 74.93
Grid : Message : 3.134382 s : 32 192.00 179.48 1044.68 93.48
Grid : Message : 3.615671 s : 40 468.75 418.36 1094.18 97.91
Grid : Message : 4.312240 s : 48 972.00 848.60 1118.57 100.09
Grid : Message : 5.311103 s : 56 1800.75 1574.15 1117.14 99.96
Grid : Message : 6.743824 s : 64 3072.00 2653.93 1130.40 101.15
Grid : Message : 6.746637 s : ===============================================================================
Grid : Message : 6.746646 s : SU(4) benchmark
Grid : Message : 6.746647 s : ===============================================================================
Grid : Message : 6.746648 s : Benchmarking z = y*x SU(4) bandwidth
Grid : Message : 6.746649 s : ------------------------------------------
Grid : Message : 6.746651 s : L size (MB/node) time (usec) GB/s/node Gflop/s/node
Grid : Message : 7.700260 s : 8 1.50 22.02 66.53 89.30
Grid : Message : 7.939874 s : 16 24.00 42.19 555.56 745.66
Grid : Message : 8.237210 s : 24 121.50 159.86 742.22 996.20
Grid : Message : 8.662510 s : 32 384.00 453.48 826.94 1109.90
Grid : Message : 9.326550 s : 40 937.50 1087.04 842.22 1130.41
Grid : Message : 10.371306 s : 48 1944.00 2231.49 850.75 1141.86
Grid : Message : 10.373198 s : ===============================================================================
Grid : Message : 10.373205 s : Communications benchmark
Grid : Message : 10.373206 s : ===============================================================================
Grid : Message : 10.373209 s : Benchmarking threaded STENCIL halo exchange in 0 dimensions
Grid : Message : 10.373210 s : ------------------------------------------
Grid : Message : 10.373212 s : L dir payload (B) time (usec) rate (GB/s/node) std dev max
Grid : Message : 10.376833 s : ===============================================================================
Grid : Message : 10.376840 s : Wilson dslash 4D vectorised
Grid : Message : 10.376886 s : ===============================================================================
Grid : Message : 10.376889 s : Benchmark DWF on 8^4 local volume
Grid : Message : 10.376890 s : * Nc : 3
Grid : Message : 10.376891 s : * Global volume : 8 8 8 8
Grid : Message : 10.376902 s : * Ls : 1
Grid : Message : 10.376903 s : * ranks : 1
Grid : Message : 10.376906 s : * nodes : 1
Grid : Message : 10.376908 s : * ranks/node : 1
Grid : Message : 10.376909 s : * ranks geom : 1 1 1 1
Grid : Message : 10.376912 s : * Using 1 threads
Grid : Message : 10.376913 s : ===============================================================================
Grid : Message : 10.378509 s : Initialised RNGs
Grid : Message : 10.491571 s : ------------------------------------------
Grid : Message : 10.491577 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.491578 s : * Using sequential Comms/Compute
Grid : Message : 10.491579 s : * SINGLE precision
Grid : Message : 10.491582 s : ------------------------------------------
Grid : Message : 10.517414 s : Deo FlopsPerSite is 1344
Grid : Message : 10.517445 s : Deo Gflop/s = 89.3 (0.1) 58.6-94.9
Grid : Message : 10.517450 s : Deo Gflop/s per rank 89.3
Grid : Message : 10.517453 s : Deo Gflop/s per node 89.3
Grid : Message : 10.517455 s : ------------------------------------------
Grid : Message : 10.517457 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.517458 s : * Using Overlapped Comms/Compute
Grid : Message : 10.517461 s : * SINGLE precision
Grid : Message : 10.517463 s : ------------------------------------------
Grid : Message : 10.536046 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.536050 s : Deo Gflop/s = 75.9 (0.1) 56.2-78.6
Grid : Message : 10.536052 s : Deo Gflop/s per rank 75.9
Grid : Message : 10.536053 s : Deo Gflop/s per node 75.9
Grid : Message : 10.536054 s : ------------------------------------------
Grid : Message : 10.536055 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.536056 s : * Using sequential Comms/Compute
Grid : Message : 10.536057 s : * SINGLE precision
Grid : Message : 10.536057 s : ------------------------------------------
Grid : Message : 10.551674 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.551678 s : Deo Gflop/s = 90.1 (0.1) 81.0-94.9
Grid : Message : 10.551680 s : Deo Gflop/s per rank 90.1
Grid : Message : 10.551681 s : Deo Gflop/s per node 90.1
Grid : Message : 10.551682 s : ------------------------------------------
Grid : Message : 10.551683 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.551684 s : * Using Overlapped Comms/Compute
Grid : Message : 10.551685 s : * SINGLE precision
Grid : Message : 10.551686 s : ------------------------------------------
Grid : Message : 10.570207 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.570211 s : Deo Gflop/s = 75.9 (0.1) 72.4-78.6
Grid : Message : 10.570213 s : Deo Gflop/s per rank 75.9
Grid : Message : 10.570214 s : Deo Gflop/s per node 75.9
Grid : Message : 10.570215 s : ------------------------------------------
Grid : Message : 10.570216 s : 8^4 x 1 Deo Best Gflop/s = 90.1 ; 90.1 per node
Grid : Message : 10.570218 s : 8^4 x 1 Deo Worst Gflop/s = 75.9 ; 75.9 per node
Grid : Message : 10.570219 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 10.570220 s : 89.3 ; 75.9 ; 90.1 ; 75.9 ;
Grid : Message : 10.571167 s : ===============================================================================
Grid : Message : 10.571172 s : Benchmark DWF on 12^4 local volume
Grid : Message : 10.571173 s : * Nc : 3
Grid : Message : 10.571174 s : * Global volume : 12 12 12 12
Grid : Message : 10.571178 s : * Ls : 1
Grid : Message : 10.571179 s : * ranks : 1
Grid : Message : 10.571180 s : * nodes : 1
Grid : Message : 10.571181 s : * ranks/node : 1
Grid : Message : 10.571182 s : * ranks geom : 1 1 1 1
Grid : Message : 10.571183 s : * Using 1 threads
Grid : Message : 10.571184 s : ===============================================================================
Grid : Message : 10.576804 s : Initialised RNGs
Grid : Message : 10.843984 s : ------------------------------------------
Grid : Message : 10.843998 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.843999 s : * Using sequential Comms/Compute
Grid : Message : 10.844000 s : * SINGLE precision
Grid : Message : 10.844001 s : ------------------------------------------
Grid : Message : 10.862118 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.862131 s : Deo Gflop/s = 408.9 (0.5) 296.5-422.3
Grid : Message : 10.862134 s : Deo Gflop/s per rank 408.9
Grid : Message : 10.862135 s : Deo Gflop/s per node 408.9
Grid : Message : 10.862136 s : ------------------------------------------
Grid : Message : 10.862137 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.862138 s : * Using Overlapped Comms/Compute
Grid : Message : 10.862139 s : * SINGLE precision
Grid : Message : 10.862140 s : ------------------------------------------
Grid : Message : 10.884288 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.884292 s : Deo Gflop/s = 321.3 (0.3) 290.3-331.8
Grid : Message : 10.884294 s : Deo Gflop/s per rank 321.3
Grid : Message : 10.884295 s : Deo Gflop/s per node 321.3
Grid : Message : 10.884296 s : ------------------------------------------
Grid : Message : 10.884297 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.884298 s : * Using sequential Comms/Compute
Grid : Message : 10.884299 s : * SINGLE precision
Grid : Message : 10.884299 s : ------------------------------------------
Grid : Message : 10.901666 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.901669 s : Deo Gflop/s = 409.5 (0.4) 366.7-422.3
Grid : Message : 10.901671 s : Deo Gflop/s per rank 409.5
Grid : Message : 10.901672 s : Deo Gflop/s per node 409.5
Grid : Message : 10.901673 s : ------------------------------------------
Grid : Message : 10.901674 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.901675 s : * Using Overlapped Comms/Compute
Grid : Message : 10.901675 s : * SINGLE precision
Grid : Message : 10.901675 s : ------------------------------------------
Grid : Message : 10.923814 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.923817 s : Deo Gflop/s = 321.4 (0.3) 290.3-331.8
Grid : Message : 10.923819 s : Deo Gflop/s per rank 321.4
Grid : Message : 10.923820 s : Deo Gflop/s per node 321.4
Grid : Message : 10.923821 s : ------------------------------------------
Grid : Message : 10.923822 s : 12^4 x 1 Deo Best Gflop/s = 409.5 ; 409.5 per node
Grid : Message : 10.923824 s : 12^4 x 1 Deo Worst Gflop/s = 321.3 ; 321.3 per node
Grid : Message : 10.923826 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 10.923827 s : 408.9 ; 321.3 ; 409.5 ; 321.4 ;
Grid : Message : 10.926507 s : ===============================================================================
Grid : Message : 10.926512 s : Benchmark DWF on 16^4 local volume
Grid : Message : 10.926513 s : * Nc : 3
Grid : Message : 10.926514 s : * Global volume : 16 16 16 16
Grid : Message : 10.926522 s : * Ls : 1
Grid : Message : 10.926523 s : * ranks : 1
Grid : Message : 10.926524 s : * nodes : 1
Grid : Message : 10.926525 s : * ranks/node : 1
Grid : Message : 10.926526 s : * ranks geom : 1 1 1 1
Grid : Message : 10.926527 s : * Using 1 threads
Grid : Message : 10.926528 s : ===============================================================================
Grid : Message : 10.942650 s : Initialised RNGs
Grid : Message : 11.759317 s : ------------------------------------------
Grid : Message : 11.759335 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.759336 s : * Using sequential Comms/Compute
Grid : Message : 11.759337 s : * SINGLE precision
Grid : Message : 11.759338 s : ------------------------------------------
Grid : Message : 11.792221 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.792236 s : Deo Gflop/s = 725.8 (0.9) 478.7-746.4
Grid : Message : 11.792239 s : Deo Gflop/s per rank 725.8
Grid : Message : 11.792240 s : Deo Gflop/s per node 725.8
Grid : Message : 11.792241 s : ------------------------------------------
Grid : Message : 11.792242 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.792243 s : * Using Overlapped Comms/Compute
Grid : Message : 11.792244 s : * SINGLE precision
Grid : Message : 11.792245 s : ------------------------------------------
Grid : Message : 11.823738 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.823742 s : Deo Gflop/s = 713.9 (0.5) 667.3-734.0
Grid : Message : 11.823744 s : Deo Gflop/s per rank 713.9
Grid : Message : 11.823745 s : Deo Gflop/s per node 713.9
Grid : Message : 11.823746 s : ------------------------------------------
Grid : Message : 11.823747 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.823748 s : * Using sequential Comms/Compute
Grid : Message : 11.823749 s : * SINGLE precision
Grid : Message : 11.823750 s : ------------------------------------------
Grid : Message : 11.854663 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.854666 s : Deo Gflop/s = 727.3 (0.5) 677.5-746.4
Grid : Message : 11.854668 s : Deo Gflop/s per rank 727.3
Grid : Message : 11.854669 s : Deo Gflop/s per node 727.3
Grid : Message : 11.854670 s : ------------------------------------------
Grid : Message : 11.854671 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.854672 s : * Using Overlapped Comms/Compute
Grid : Message : 11.854673 s : * SINGLE precision
Grid : Message : 11.854674 s : ------------------------------------------
Grid : Message : 11.886128 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.886131 s : Deo Gflop/s = 714.5 (0.5) 667.3-746.4
Grid : Message : 11.886133 s : Deo Gflop/s per rank 714.5
Grid : Message : 11.886134 s : Deo Gflop/s per node 714.5
Grid : Message : 11.886135 s : ------------------------------------------
Grid : Message : 11.886136 s : 16^4 x 1 Deo Best Gflop/s = 727.3 ; 727.3 per node
Grid : Message : 11.886138 s : 16^4 x 1 Deo Worst Gflop/s = 713.9 ; 713.9 per node
Grid : Message : 11.886140 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 11.886141 s : 725.8 ; 713.9 ; 727.3 ; 714.5 ;
Grid : Message : 11.892130 s : ===============================================================================
Grid : Message : 11.892137 s : Benchmark DWF on 24^4 local volume
Grid : Message : 11.892138 s : * Nc : 3
Grid : Message : 11.892139 s : * Global volume : 24 24 24 24
Grid : Message : 11.892147 s : * Ls : 1
Grid : Message : 11.892148 s : * ranks : 1
Grid : Message : 11.892149 s : * nodes : 1
Grid : Message : 11.892150 s : * ranks/node : 1
Grid : Message : 11.892151 s : * ranks geom : 1 1 1 1
Grid : Message : 11.892152 s : * Using 1 threads
Grid : Message : 11.892153 s : ===============================================================================
Grid : Message : 11.978452 s : Initialised RNGs
Grid : Message : 16.753360 s : ------------------------------------------
Grid : Message : 16.753560 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.753570 s : * Using sequential Comms/Compute
Grid : Message : 16.753580 s : * SINGLE precision
Grid : Message : 16.753590 s : ------------------------------------------
Grid : Message : 16.178475 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.178489 s : Deo Gflop/s = 1204.9 (0.4) 1161.2-1225.0
Grid : Message : 16.178492 s : Deo Gflop/s per rank 1204.9
Grid : Message : 16.178493 s : Deo Gflop/s per node 1204.9
Grid : Message : 16.178494 s : ------------------------------------------
Grid : Message : 16.178495 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.178496 s : * Using Overlapped Comms/Compute
Grid : Message : 16.178497 s : * SINGLE precision
Grid : Message : 16.178498 s : ------------------------------------------
Grid : Message : 16.313480 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.313484 s : Deo Gflop/s = 842.5 (0.3) 816.7-864.2
Grid : Message : 16.313486 s : Deo Gflop/s per rank 842.5
Grid : Message : 16.313487 s : Deo Gflop/s per node 842.5
Grid : Message : 16.313488 s : ------------------------------------------
Grid : Message : 16.313489 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.313490 s : * Using sequential Comms/Compute
Grid : Message : 16.313491 s : * SINGLE precision
Grid : Message : 16.313491 s : ------------------------------------------
Grid : Message : 16.407847 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.407850 s : Deo Gflop/s = 1205.3 (0.4) 1167.3-1225.0
Grid : Message : 16.407852 s : Deo Gflop/s per rank 1205.3
Grid : Message : 16.407853 s : Deo Gflop/s per node 1205.3
Grid : Message : 16.407854 s : ------------------------------------------
Grid : Message : 16.407855 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.407856 s : * Using Overlapped Comms/Compute
Grid : Message : 16.407857 s : * SINGLE precision
Grid : Message : 16.407858 s : ------------------------------------------
Grid : Message : 16.543002 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.543005 s : Deo Gflop/s = 841.5 (0.3) 825.8-860.8
Grid : Message : 16.543007 s : Deo Gflop/s per rank 841.5
Grid : Message : 16.543008 s : Deo Gflop/s per node 841.5
Grid : Message : 16.543009 s : ------------------------------------------
Grid : Message : 16.543010 s : 24^4 x 1 Deo Best Gflop/s = 1205.3 ; 1205.3 per node
Grid : Message : 16.543012 s : 24^4 x 1 Deo Worst Gflop/s = 841.5 ; 841.5 per node
Grid : Message : 16.543013 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 16.543014 s : 1204.9 ; 842.5 ; 1205.3 ; 841.5 ;
Grid : Message : 16.575736 s : ===============================================================================
Grid : Message : 16.575752 s : Benchmark DWF on 32^4 local volume
Grid : Message : 16.575753 s : * Nc : 3
Grid : Message : 16.575754 s : * Global volume : 32 32 32 32
Grid : Message : 16.575767 s : * Ls : 1
Grid : Message : 16.575768 s : * ranks : 1
Grid : Message : 16.575769 s : * nodes : 1
Grid : Message : 16.575770 s : * ranks/node : 1
Grid : Message : 16.575771 s : * ranks geom : 1 1 1 1
Grid : Message : 16.575773 s : * Using 1 threads
Grid : Message : 16.575774 s : ===============================================================================
Grid : Message : 16.839329 s : Initialised RNGs
Grid : Message : 29.647706 s : ------------------------------------------
Grid : Message : 29.647727 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 29.647728 s : * Using sequential Comms/Compute
Grid : Message : 29.647729 s : * SINGLE precision
Grid : Message : 29.647730 s : ------------------------------------------
Grid : Message : 29.981303 s : Deo FlopsPerSite is 1344.0
Grid : Message : 29.981317 s : Deo Gflop/s = 1171.5 (0.4) 1142.0-1198.4
Grid : Message : 29.981320 s : Deo Gflop/s per rank 1171.5
Grid : Message : 29.981321 s : Deo Gflop/s per node 1171.5
Grid : Message : 29.981322 s : ------------------------------------------
Grid : Message : 29.981323 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 29.981324 s : * Using Overlapped Comms/Compute
Grid : Message : 29.981325 s : * SINGLE precision
Grid : Message : 29.981325 s : ------------------------------------------
Grid : Message : 30.405186 s : Deo FlopsPerSite is 1344.0
Grid : Message : 30.405194 s : Deo Gflop/s = 847.8 (0.2) 833.9-861.4
Grid : Message : 30.405196 s : Deo Gflop/s per rank 847.8
Grid : Message : 30.405197 s : Deo Gflop/s per node 847.8
Grid : Message : 30.405198 s : ------------------------------------------
Grid : Message : 30.405199 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 30.405200 s : * Using sequential Comms/Compute
Grid : Message : 30.405201 s : * SINGLE precision
Grid : Message : 30.405202 s : ------------------------------------------
Grid : Message : 30.711705 s : Deo FlopsPerSite is 1344.0
Grid : Message : 30.711710 s : Deo Gflop/s = 1172.6 (0.4) 1143.9-1200.4
Grid : Message : 30.711712 s : Deo Gflop/s per rank 1172.6
Grid : Message : 30.711713 s : Deo Gflop/s per node 1172.6
Grid : Message : 30.711714 s : ------------------------------------------
Grid : Message : 30.711715 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 30.711716 s : * Using Overlapped Comms/Compute
Grid : Message : 30.711717 s : * SINGLE precision
Grid : Message : 30.711717 s : ------------------------------------------
Grid : Message : 31.144787 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.144798 s : Deo Gflop/s = 829.5 (17.9) 72.9-862.5
Grid : Message : 31.144801 s : Deo Gflop/s per rank 829.5
Grid : Message : 31.144802 s : Deo Gflop/s per node 829.5
Grid : Message : 31.144803 s : ------------------------------------------
Grid : Message : 31.144804 s : 32^4 x 1 Deo Best Gflop/s = 1172.6 ; 1172.6 per node
Grid : Message : 31.144806 s : 32^4 x 1 Deo Worst Gflop/s = 829.5 ; 829.5 per node
Grid : Message : 31.144808 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 31.144809 s : 1171.5 ; 847.8 ; 1172.6 ; 829.5 ;
Grid : Message : 31.236007 s : ===============================================================================
Grid : Message : 31.236024 s : Domain wall dslash 4D vectorised
Grid : Message : 31.236205 s : ===============================================================================
Grid : Message : 31.236208 s : Benchmark DWF on 8^4 local volume
Grid : Message : 31.236210 s : * Nc : 3
Grid : Message : 31.236211 s : * Global volume : 8 8 8 8
Grid : Message : 31.236221 s : * Ls : 12
Grid : Message : 31.236222 s : * ranks : 1
Grid : Message : 31.236223 s : * nodes : 1
Grid : Message : 31.236224 s : * ranks/node : 1
Grid : Message : 31.236225 s : * ranks geom : 1 1 1 1
Grid : Message : 31.236226 s : * Using 1 threads
Grid : Message : 31.236227 s : ===============================================================================
Grid : Message : 31.242721 s : Initialised RNGs
Grid : Message : 31.459446 s : ------------------------------------------
Grid : Message : 31.459454 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.459455 s : * Using sequential Comms/Compute
Grid : Message : 31.459456 s : * SINGLE precision
Grid : Message : 31.459457 s : ------------------------------------------
Grid : Message : 31.479412 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.479425 s : Deo Gflop/s = 898.8 (0.9) 786.4-917.5
Grid : Message : 31.479428 s : Deo Gflop/s per rank 898.8
Grid : Message : 31.479429 s : Deo Gflop/s per node 898.8
Grid : Message : 31.479430 s : ------------------------------------------
Grid : Message : 31.479431 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.479432 s : * Using Overlapped Comms/Compute
Grid : Message : 31.479433 s : * SINGLE precision
Grid : Message : 31.479433 s : ------------------------------------------
Grid : Message : 31.503228 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.503233 s : Deo Gflop/s = 708.8 (0.6) 647.6-734.0
Grid : Message : 31.503235 s : Deo Gflop/s per rank 708.8
Grid : Message : 31.503236 s : Deo Gflop/s per node 708.8
Grid : Message : 31.503237 s : ------------------------------------------
Grid : Message : 31.503238 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.503238 s : * Using sequential Comms/Compute
Grid : Message : 31.503239 s : * SINGLE precision
Grid : Message : 31.503239 s : ------------------------------------------
Grid : Message : 31.521974 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.521977 s : Deo Gflop/s = 900.5 (0.9) 805.6-917.5
Grid : Message : 31.521979 s : Deo Gflop/s per rank 900.5
Grid : Message : 31.521980 s : Deo Gflop/s per node 900.5
Grid : Message : 31.521981 s : ------------------------------------------
Grid : Message : 31.521982 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.521983 s : * Using Overlapped Comms/Compute
Grid : Message : 31.521984 s : * SINGLE precision
Grid : Message : 31.521985 s : ------------------------------------------
Grid : Message : 31.545784 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.545787 s : Deo Gflop/s = 708.6 (0.6) 647.6-734.0
Grid : Message : 31.545789 s : Deo Gflop/s per rank 708.6
Grid : Message : 31.545790 s : Deo Gflop/s per node 708.6
Grid : Message : 31.545791 s : ------------------------------------------
Grid : Message : 31.545792 s : 8^4 x 12 Deo Best Gflop/s = 900.5 ; 900.5 per node
Grid : Message : 31.545794 s : 8^4 x 12 Deo Worst Gflop/s = 708.6 ; 708.6 per node
Grid : Message : 31.545796 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 31.545797 s : 898.8 ; 708.8 ; 900.5 ; 708.6 ;
Grid : Message : 31.548767 s : ===============================================================================
Grid : Message : 31.548772 s : Benchmark DWF on 12^4 local volume
Grid : Message : 31.548773 s : * Nc : 3
Grid : Message : 31.548774 s : * Global volume : 12 12 12 12
Grid : Message : 31.548780 s : * Ls : 12
Grid : Message : 31.548781 s : * ranks : 1
Grid : Message : 31.548782 s : * nodes : 1
Grid : Message : 31.548783 s : * ranks/node : 1
Grid : Message : 31.548784 s : * ranks geom : 1 1 1 1
Grid : Message : 31.548785 s : * Using 1 threads
Grid : Message : 31.548786 s : ===============================================================================
Grid : Message : 31.581443 s : Initialised RNGs
Grid : Message : 32.627696 s : ------------------------------------------
Grid : Message : 32.627712 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.627713 s : * Using sequential Comms/Compute
Grid : Message : 32.627714 s : * SINGLE precision
Grid : Message : 32.627715 s : ------------------------------------------
Grid : Message : 32.684713 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.684728 s : Deo Gflop/s = 1549.8 (0.6) 1479.8-1577.5
Grid : Message : 32.684731 s : Deo Gflop/s per rank 1549.8
Grid : Message : 32.684732 s : Deo Gflop/s per node 1549.8
Grid : Message : 32.684733 s : ------------------------------------------
Grid : Message : 32.684734 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.684735 s : * Using Overlapped Comms/Compute
Grid : Message : 32.684736 s : * SINGLE precision
Grid : Message : 32.684736 s : ------------------------------------------
Grid : Message : 32.741454 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.741459 s : Deo Gflop/s = 1504.6 (0.8) 1429.2-1534.1
Grid : Message : 32.741461 s : Deo Gflop/s per rank 1504.6
Grid : Message : 32.741462 s : Deo Gflop/s per node 1504.6
Grid : Message : 32.741463 s : ------------------------------------------
Grid : Message : 32.741464 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.741465 s : * Using sequential Comms/Compute
Grid : Message : 32.741466 s : * SINGLE precision
Grid : Message : 32.741467 s : ------------------------------------------
Grid : Message : 32.796496 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.796501 s : Deo Gflop/s = 1550.3 (0.6) 1466.8-1577.5
Grid : Message : 32.796503 s : Deo Gflop/s per rank 1550.3
Grid : Message : 32.796504 s : Deo Gflop/s per node 1550.3
Grid : Message : 32.796505 s : ------------------------------------------
Grid : Message : 32.796506 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.796507 s : * Using Overlapped Comms/Compute
Grid : Message : 32.796508 s : * SINGLE precision
Grid : Message : 32.796508 s : ------------------------------------------
Grid : Message : 32.853215 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.853220 s : Deo Gflop/s = 1504.4 (0.8) 1429.2-1548.3
Grid : Message : 32.853222 s : Deo Gflop/s per rank 1504.4
Grid : Message : 32.853223 s : Deo Gflop/s per node 1504.4
Grid : Message : 32.853224 s : ------------------------------------------
Grid : Message : 32.853225 s : 12^4 x 12 Deo Best Gflop/s = 1550.3 ; 1550.3 per node
Grid : Message : 32.853227 s : 12^4 x 12 Deo Worst Gflop/s = 1504.4 ; 1504.4 per node
Grid : Message : 32.853228 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 32.853229 s : 1549.8 ; 1504.6 ; 1550.3 ; 1504.4 ;
Grid : Message : 32.864215 s : ===============================================================================
Grid : Message : 32.864221 s : Benchmark DWF on 16^4 local volume
Grid : Message : 32.864222 s : * Nc : 3
Grid : Message : 32.864223 s : * Global volume : 16 16 16 16
Grid : Message : 32.864230 s : * Ls : 12
Grid : Message : 32.864231 s : * ranks : 1
Grid : Message : 32.864232 s : * nodes : 1
Grid : Message : 32.864233 s : * ranks/node : 1
Grid : Message : 32.864234 s : * ranks geom : 1 1 1 1
Grid : Message : 32.864235 s : * Using 1 threads
Grid : Message : 32.864236 s : ===============================================================================
Grid : Message : 32.970228 s : Initialised RNGs
Grid : Message : 36.263248 s : ------------------------------------------
Grid : Message : 36.263263 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.263264 s : * Using sequential Comms/Compute
Grid : Message : 36.263265 s : * SINGLE precision
Grid : Message : 36.263266 s : ------------------------------------------
Grid : Message : 36.426972 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.426985 s : Deo Gflop/s = 1699.0 (0.5) 1661.9-1727.1
Grid : Message : 36.426988 s : Deo Gflop/s per rank 1699.0
Grid : Message : 36.426989 s : Deo Gflop/s per node 1699.0
Grid : Message : 36.426990 s : ------------------------------------------
Grid : Message : 36.426991 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.426992 s : * Using Overlapped Comms/Compute
Grid : Message : 36.426993 s : * SINGLE precision
Grid : Message : 36.426994 s : ------------------------------------------
Grid : Message : 36.609302 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.609306 s : Deo Gflop/s = 1478.6 (0.5) 1440.0-1514.3
Grid : Message : 36.609308 s : Deo Gflop/s per rank 1478.6
Grid : Message : 36.609309 s : Deo Gflop/s per node 1478.6
Grid : Message : 36.609310 s : ------------------------------------------
Grid : Message : 36.609311 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.609312 s : * Using sequential Comms/Compute
Grid : Message : 36.609313 s : * SINGLE precision
Grid : Message : 36.609314 s : ------------------------------------------
Grid : Message : 36.767966 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.767972 s : Deo Gflop/s = 1699.2 (0.5) 1656.7-1732.7
Grid : Message : 36.767974 s : Deo Gflop/s per rank 1699.2
Grid : Message : 36.767975 s : Deo Gflop/s per node 1699.2
Grid : Message : 36.767976 s : ------------------------------------------
Grid : Message : 36.767977 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.767978 s : * Using Overlapped Comms/Compute
Grid : Message : 36.767978 s : * SINGLE precision
Grid : Message : 36.767978 s : ------------------------------------------
Grid : Message : 36.950363 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.950367 s : Deo Gflop/s = 1477.9 (0.5) 1443.9-1509.9
Grid : Message : 36.950369 s : Deo Gflop/s per rank 1477.9
Grid : Message : 36.950370 s : Deo Gflop/s per node 1477.9
Grid : Message : 36.950371 s : ------------------------------------------
Grid : Message : 36.950372 s : 16^4 x 12 Deo Best Gflop/s = 1699.2 ; 1699.2 per node
Grid : Message : 36.950374 s : 16^4 x 12 Deo Worst Gflop/s = 1477.9 ; 1477.9 per node
Grid : Message : 36.950376 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 36.950377 s : 1699.0 ; 1478.6 ; 1699.2 ; 1477.9 ;
Grid : Message : 36.988167 s : ===============================================================================
Grid : Message : 36.988172 s : Benchmark DWF on 24^4 local volume
Grid : Message : 36.988173 s : * Nc : 3
Grid : Message : 36.988174 s : * Global volume : 24 24 24 24
Grid : Message : 36.988182 s : * Ls : 12
Grid : Message : 36.988183 s : * ranks : 1
Grid : Message : 36.988184 s : * nodes : 1
Grid : Message : 36.988185 s : * ranks/node : 1
Grid : Message : 36.988188 s : * ranks geom : 1 1 1 1
Grid : Message : 36.988190 s : * Using 1 threads
Grid : Message : 36.988191 s : ===============================================================================
Grid : Message : 37.567321 s : Initialised RNGs
Grid : Message : 54.225331 s : ------------------------------------------
Grid : Message : 54.225352 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 54.225353 s : * Using sequential Comms/Compute
Grid : Message : 54.225354 s : * SINGLE precision
Grid : Message : 54.225355 s : ------------------------------------------
Grid : Message : 55.805700 s : Deo FlopsPerSite is 1344.0
Grid : Message : 55.807400 s : Deo Gflop/s = 1800.0 (0.2) 1788.4-1809.0
Grid : Message : 55.807700 s : Deo Gflop/s per rank 1800.0
Grid : Message : 55.807800 s : Deo Gflop/s per node 1800.0
Grid : Message : 55.807900 s : ------------------------------------------
Grid : Message : 55.808000 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 55.808100 s : * Using Overlapped Comms/Compute
Grid : Message : 55.808200 s : * SINGLE precision
Grid : Message : 55.808300 s : ------------------------------------------
Grid : Message : 55.989838 s : Deo FlopsPerSite is 1344.0
Grid : Message : 55.989846 s : Deo Gflop/s = 1389.9 (0.2) 1376.3-1403.0
Grid : Message : 55.989849 s : Deo Gflop/s per rank 1389.9
Grid : Message : 55.989850 s : Deo Gflop/s per node 1389.9
Grid : Message : 55.989851 s : ------------------------------------------
Grid : Message : 55.989852 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 55.989853 s : * Using sequential Comms/Compute
Grid : Message : 55.989854 s : * SINGLE precision
Grid : Message : 55.989854 s : ------------------------------------------
Grid : Message : 56.748218 s : Deo FlopsPerSite is 1344.0
Grid : Message : 56.748226 s : Deo Gflop/s = 1799.3 (0.2) 1786.0-1809.0
Grid : Message : 56.748228 s : Deo Gflop/s per rank 1799.3
Grid : Message : 56.748229 s : Deo Gflop/s per node 1799.3
Grid : Message : 56.748230 s : ------------------------------------------
Grid : Message : 56.748231 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 56.748232 s : * Using Overlapped Comms/Compute
Grid : Message : 56.748233 s : * SINGLE precision
Grid : Message : 56.748234 s : ------------------------------------------
Grid : Message : 57.729778 s : Deo FlopsPerSite is 1344.0
Grid : Message : 57.729790 s : Deo Gflop/s = 1390.2 (0.2) 1375.5-1403.7
Grid : Message : 57.729793 s : Deo Gflop/s per rank 1390.2
Grid : Message : 57.729794 s : Deo Gflop/s per node 1390.2
Grid : Message : 57.729795 s : ------------------------------------------
Grid : Message : 57.729796 s : 24^4 x 12 Deo Best Gflop/s = 1800.0 ; 1800.0 per node
Grid : Message : 57.729798 s : 24^4 x 12 Deo Worst Gflop/s = 1389.9 ; 1389.9 per node
Grid : Message : 57.729801 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 57.729802 s : 1800.0 ; 1389.9 ; 1799.3 ; 1390.2 ;
Grid : Message : 57.919179 s : ===============================================================================
Grid : Message : 57.919195 s : Benchmark DWF on 32^4 local volume
Grid : Message : 57.919197 s : * Nc : 3
Grid : Message : 57.919198 s : * Global volume : 32 32 32 32
Grid : Message : 57.919207 s : * Ls : 12
Grid : Message : 57.919208 s : * ranks : 1
Grid : Message : 57.919209 s : * nodes : 1
Grid : Message : 57.919210 s : * ranks/node : 1
Grid : Message : 57.919211 s : * ranks geom : 1 1 1 1
Grid : Message : 57.919212 s : * Using 1 threads
Grid : Message : 57.919213 s : ===============================================================================
Grid : Message : 59.798510 s : Initialised RNGs
Grid : Message : 112.360179 s : ------------------------------------------
Grid : Message : 112.360198 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 112.360199 s : * Using sequential Comms/Compute
Grid : Message : 112.360200 s : * SINGLE precision
Grid : Message : 112.360201 s : ------------------------------------------
Grid : Message : 115.191800 s : Deo FlopsPerSite is 1344.0
Grid : Message : 115.192040 s : Deo Gflop/s = 1670.1 (0.3) 1650.5-1685.1
Grid : Message : 115.192080 s : Deo Gflop/s per rank 1670.1
Grid : Message : 115.192090 s : Deo Gflop/s per node 1670.1
Grid : Message : 115.192100 s : ------------------------------------------
Grid : Message : 115.192110 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 115.192120 s : * Using Overlapped Comms/Compute
Grid : Message : 115.192130 s : * SINGLE precision
Grid : Message : 115.192140 s : ------------------------------------------
Grid : Message : 117.996882 s : Deo FlopsPerSite is 1344.0
Grid : Message : 117.996897 s : Deo Gflop/s = 1448.3 (0.1) 1440.3-1455.6
Grid : Message : 117.996900 s : Deo Gflop/s per rank 1448.3
Grid : Message : 117.996901 s : Deo Gflop/s per node 1448.3
Grid : Message : 117.996902 s : ------------------------------------------
Grid : Message : 117.996903 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 117.996904 s : * Using sequential Comms/Compute
Grid : Message : 117.996905 s : * SINGLE precision
Grid : Message : 117.996906 s : ------------------------------------------
Grid : Message : 120.578643 s : Deo FlopsPerSite is 1344.0
Grid : Message : 120.578657 s : Deo Gflop/s = 1670.3 (0.3) 1635.5-1685.7
Grid : Message : 120.578660 s : Deo Gflop/s per rank 1670.3
Grid : Message : 120.578661 s : Deo Gflop/s per node 1670.3
Grid : Message : 120.578663 s : ------------------------------------------
Grid : Message : 120.578664 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 120.578665 s : * Using Overlapped Comms/Compute
Grid : Message : 120.578666 s : * SINGLE precision
Grid : Message : 120.578667 s : ------------------------------------------
Grid : Message : 123.556314 s : Deo FlopsPerSite is 1344.0
Grid : Message : 123.556328 s : Deo Gflop/s = 1448.3 (0.1) 1440.7-1455.6
Grid : Message : 123.556331 s : Deo Gflop/s per rank 1448.3
Grid : Message : 123.556332 s : Deo Gflop/s per node 1448.3
Grid : Message : 123.556333 s : ------------------------------------------
Grid : Message : 123.556334 s : 32^4 x 12 Deo Best Gflop/s = 1670.3 ; 1670.3 per node
Grid : Message : 123.556336 s : 32^4 x 12 Deo Worst Gflop/s = 1448.3 ; 1448.3 per node
Grid : Message : 123.556338 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 123.556339 s : 1670.1 ; 1448.3 ; 1670.3 ; 1448.3 ;
Grid : Message : 124.246882 s : ===============================================================================
Grid : Message : 124.246903 s : Improved Staggered dslash 4D vectorised
Grid : Message : 124.247016 s : ===============================================================================
Grid : Message : 124.247019 s : Benchmark ImprovedStaggered on 8^4 local volume
Grid : Message : 124.247020 s : * Global volume : 8 8 8 8
Grid : Message : 124.247033 s : * ranks : 1
Grid : Message : 124.247034 s : * nodes : 1
Grid : Message : 124.247035 s : * ranks/node : 1
Grid : Message : 124.247039 s : * ranks geom : 1 1 1 1
Grid : Message : 124.247043 s : * Using 1 threads
Grid : Message : 124.247044 s : ===============================================================================
Grid : Message : 124.247555 s : Initialised RNGs
Grid : Message : 124.319977 s : ------------------------------------------
Grid : Message : 124.319984 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.319985 s : * Using sequential Comms/Compute
Grid : Message : 124.319986 s : * SINGLE precision
Grid : Message : 124.319987 s : ------------------------------------------
Grid : Message : 124.344334 s : Deo Gflop/s = 67.3 (0.1) 51.0-71.1
Grid : Message : 124.344347 s : Deo Gflop/s per rank 67.3
Grid : Message : 124.344349 s : Deo Gflop/s per node 67.3
Grid : Message : 124.344350 s : ------------------------------------------
Grid : Message : 124.344351 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.344352 s : * Using Overlapped Comms/Compute
Grid : Message : 124.344353 s : * SINGLE precision
Grid : Message : 124.344354 s : ------------------------------------------
Grid : Message : 124.375542 s : Deo Gflop/s = 38.5 (0.0) 33.5-39.8
Grid : Message : 124.375547 s : Deo Gflop/s per rank 38.5
Grid : Message : 124.375548 s : Deo Gflop/s per node 38.5
Grid : Message : 124.375549 s : ------------------------------------------
Grid : Message : 124.375550 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.375551 s : * Using sequential Comms/Compute
Grid : Message : 124.375552 s : * SINGLE precision
Grid : Message : 124.375552 s : ------------------------------------------
Grid : Message : 124.393356 s : Deo Gflop/s = 67.4 (0.1) 63.4-71.1
Grid : Message : 124.393361 s : Deo Gflop/s per rank 67.4
Grid : Message : 124.393362 s : Deo Gflop/s per node 67.4
Grid : Message : 124.393363 s : ------------------------------------------
Grid : Message : 124.393364 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.393365 s : * Using Overlapped Comms/Compute
Grid : Message : 124.393366 s : * SINGLE precision
Grid : Message : 124.393366 s : ------------------------------------------
Grid : Message : 124.424488 s : Deo Gflop/s = 38.5 (0.0) 36.1-39.8
Grid : Message : 124.424493 s : Deo Gflop/s per rank 38.5
Grid : Message : 124.424494 s : Deo Gflop/s per node 38.5
Grid : Message : 124.424495 s : ------------------------------------------
Grid : Message : 124.424496 s : 8^4 Deo Best Gflop/s = 67.4 ; 67.4 per node
Grid : Message : 124.424498 s : 8^4 Deo Worst Gflop/s = 38.5 ; 38.5 per node
Grid : Message : 124.424500 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 124.424501 s : 67.3 ; 38.5 ; 67.4 ; 38.5 ;
Grid : Message : 124.425032 s : ===============================================================================
Grid : Message : 124.425036 s : Benchmark ImprovedStaggered on 12^4 local volume
Grid : Message : 124.425037 s : * Global volume : 12 12 12 12
Grid : Message : 124.425040 s : * ranks : 1
Grid : Message : 124.425041 s : * nodes : 1
Grid : Message : 124.425042 s : * ranks/node : 1
Grid : Message : 124.425043 s : * ranks geom : 1 1 1 1
Grid : Message : 124.425044 s : * Using 1 threads
Grid : Message : 124.425045 s : ===============================================================================
Grid : Message : 124.427370 s : Initialised RNGs
Grid : Message : 124.662237 s : ------------------------------------------
Grid : Message : 124.662245 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.662246 s : * Using sequential Comms/Compute
Grid : Message : 124.662247 s : * SINGLE precision
Grid : Message : 124.662248 s : ------------------------------------------
Grid : Message : 124.682724 s : Deo Gflop/s = 315.4 (0.3) 289.8-321.1
Grid : Message : 124.682733 s : Deo Gflop/s per rank 315.4
Grid : Message : 124.682734 s : Deo Gflop/s per node 315.4
Grid : Message : 124.682735 s : ------------------------------------------
Grid : Message : 124.682736 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.682737 s : * Using Overlapped Comms/Compute
Grid : Message : 124.682738 s : * SINGLE precision
Grid : Message : 124.682738 s : ------------------------------------------
Grid : Message : 124.717283 s : Deo Gflop/s = 175.7 (0.1) 156.3-180.0
Grid : Message : 124.717288 s : Deo Gflop/s per rank 175.7
Grid : Message : 124.717289 s : Deo Gflop/s per node 175.7
Grid : Message : 124.717290 s : ------------------------------------------
Grid : Message : 124.717291 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.717292 s : * Using sequential Comms/Compute
Grid : Message : 124.717293 s : * SINGLE precision
Grid : Message : 124.717293 s : ------------------------------------------
Grid : Message : 124.736518 s : Deo Gflop/s = 315.7 (0.4) 289.8-330.0
Grid : Message : 124.736522 s : Deo Gflop/s per rank 315.7
Grid : Message : 124.736523 s : Deo Gflop/s per node 315.7
Grid : Message : 124.736524 s : ------------------------------------------
Grid : Message : 124.736525 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.736526 s : * Using Overlapped Comms/Compute
Grid : Message : 124.736527 s : * SINGLE precision
Grid : Message : 124.736527 s : ------------------------------------------
Grid : Message : 124.771049 s : Deo Gflop/s = 175.7 (0.1) 160.6-180.0
Grid : Message : 124.771055 s : Deo Gflop/s per rank 175.7
Grid : Message : 124.771056 s : Deo Gflop/s per node 175.7
Grid : Message : 124.771057 s : ------------------------------------------
Grid : Message : 124.771058 s : 12^4 Deo Best Gflop/s = 315.7 ; 315.7 per node
Grid : Message : 124.771060 s : 12^4 Deo Worst Gflop/s = 175.7 ; 175.7 per node
Grid : Message : 124.771061 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 124.771062 s : 315.4 ; 175.7 ; 315.7 ; 175.7 ;
Grid : Message : 124.772087 s : ===============================================================================
Grid : Message : 124.772091 s : Benchmark ImprovedStaggered on 16^4 local volume
Grid : Message : 124.772092 s : * Global volume : 16 16 16 16
Grid : Message : 124.772097 s : * ranks : 1
Grid : Message : 124.772098 s : * nodes : 1
Grid : Message : 124.772099 s : * ranks/node : 1
Grid : Message : 124.772100 s : * ranks geom : 1 1 1 1
Grid : Message : 124.772101 s : * Using 1 threads
Grid : Message : 124.772102 s : ===============================================================================
Grid : Message : 124.779417 s : Initialised RNGs
Grid : Message : 125.477612 s : ------------------------------------------
Grid : Message : 125.477627 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.477628 s : * Using sequential Comms/Compute
Grid : Message : 125.477629 s : * SINGLE precision
Grid : Message : 125.477630 s : ------------------------------------------
Grid : Message : 125.519431 s : Deo Gflop/s = 498.8 (0.3) 469.4-507.5
Grid : Message : 125.519447 s : Deo Gflop/s per rank 498.8
Grid : Message : 125.519448 s : Deo Gflop/s per node 498.8
Grid : Message : 125.519449 s : ------------------------------------------
Grid : Message : 125.519450 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.519451 s : * Using Overlapped Comms/Compute
Grid : Message : 125.519452 s : * SINGLE precision
Grid : Message : 125.519453 s : ------------------------------------------
Grid : Message : 125.563771 s : Deo Gflop/s = 432.5 (0.4) 399.5-441.8
Grid : Message : 125.563776 s : Deo Gflop/s per rank 432.5
Grid : Message : 125.563777 s : Deo Gflop/s per node 432.5
Grid : Message : 125.563778 s : ------------------------------------------
Grid : Message : 125.563779 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.563780 s : * Using sequential Comms/Compute
Grid : Message : 125.563781 s : * SINGLE precision
Grid : Message : 125.563781 s : ------------------------------------------
Grid : Message : 125.602189 s : Deo Gflop/s = 499.0 (0.3) 475.3-507.5
Grid : Message : 125.602193 s : Deo Gflop/s per rank 499.0
Grid : Message : 125.602194 s : Deo Gflop/s per node 499.0
Grid : Message : 125.602195 s : ------------------------------------------
Grid : Message : 125.602196 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.602197 s : * Using Overlapped Comms/Compute
Grid : Message : 125.602198 s : * SINGLE precision
Grid : Message : 125.602198 s : ------------------------------------------
Grid : Message : 125.646629 s : Deo Gflop/s = 431.3 (0.3) 399.5-441.8
Grid : Message : 125.646634 s : Deo Gflop/s per rank 431.3
Grid : Message : 125.646635 s : Deo Gflop/s per node 431.3
Grid : Message : 125.646636 s : ------------------------------------------
Grid : Message : 125.646637 s : 16^4 Deo Best Gflop/s = 499.0 ; 499.0 per node
Grid : Message : 125.646639 s : 16^4 Deo Worst Gflop/s = 431.3 ; 431.3 per node
Grid : Message : 125.646640 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 125.646641 s : 498.8 ; 432.5 ; 499.0 ; 431.3 ;
Grid : Message : 125.649458 s : ===============================================================================
Grid : Message : 125.649463 s : Benchmark ImprovedStaggered on 24^4 local volume
Grid : Message : 125.649464 s : * Global volume : 24 24 24 24
Grid : Message : 125.649471 s : * ranks : 1
Grid : Message : 125.649472 s : * nodes : 1
Grid : Message : 125.649473 s : * ranks/node : 1
Grid : Message : 125.649474 s : * ranks geom : 1 1 1 1
Grid : Message : 125.649475 s : * Using 1 threads
Grid : Message : 125.649478 s : ===============================================================================
Grid : Message : 125.686019 s : Initialised RNGs
Grid : Message : 129.152249 s : ------------------------------------------
Grid : Message : 129.152265 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.152266 s : * Using sequential Comms/Compute
Grid : Message : 129.152267 s : * SINGLE precision
Grid : Message : 129.152268 s : ------------------------------------------
Grid : Message : 129.300262 s : Deo Gflop/s = 747.3 (0.2) 731.2-763.5
Grid : Message : 129.300278 s : Deo Gflop/s per rank 747.3
Grid : Message : 129.300279 s : Deo Gflop/s per node 747.3
Grid : Message : 129.300280 s : ------------------------------------------
Grid : Message : 129.300281 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.300282 s : * Using Overlapped Comms/Compute
Grid : Message : 129.300283 s : * SINGLE precision
Grid : Message : 129.300284 s : ------------------------------------------
Grid : Message : 129.447275 s : Deo Gflop/s = 659.7 (0.2) 646.6-671.8
Grid : Message : 129.447281 s : Deo Gflop/s per rank 659.7
Grid : Message : 129.447282 s : Deo Gflop/s per node 659.7
Grid : Message : 129.447283 s : ------------------------------------------
Grid : Message : 129.447284 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.447285 s : * Using sequential Comms/Compute
Grid : Message : 129.447286 s : * SINGLE precision
Grid : Message : 129.447287 s : ------------------------------------------
Grid : Message : 129.577059 s : Deo Gflop/s = 747.3 (0.2) 731.2-763.5
Grid : Message : 129.577065 s : Deo Gflop/s per rank 747.3
Grid : Message : 129.577066 s : Deo Gflop/s per node 747.3
Grid : Message : 129.577067 s : ------------------------------------------
Grid : Message : 129.577068 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.577069 s : * Using Overlapped Comms/Compute
Grid : Message : 129.577070 s : * SINGLE precision
Grid : Message : 129.577070 s : ------------------------------------------
Grid : Message : 129.724027 s : Deo Gflop/s = 659.9 (0.2) 644.4-674.1
Grid : Message : 129.724032 s : Deo Gflop/s per rank 659.9
Grid : Message : 129.724033 s : Deo Gflop/s per node 659.9
Grid : Message : 129.724034 s : ------------------------------------------
Grid : Message : 129.724035 s : 24^4 Deo Best Gflop/s = 747.3 ; 747.3 per node
Grid : Message : 129.724037 s : 24^4 Deo Worst Gflop/s = 659.7 ; 659.7 per node
Grid : Message : 129.724038 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 129.724039 s : 747.3 ; 659.7 ; 747.3 ; 659.9 ;
Grid : Message : 129.737401 s : ===============================================================================
Grid : Message : 129.737409 s : Benchmark ImprovedStaggered on 32^4 local volume
Grid : Message : 129.737410 s : * Global volume : 32 32 32 32
Grid : Message : 129.737418 s : * ranks : 1
Grid : Message : 129.737419 s : * nodes : 1
Grid : Message : 129.737420 s : * ranks/node : 1
Grid : Message : 129.737421 s : * ranks geom : 1 1 1 1
Grid : Message : 129.737422 s : * Using 1 threads
Grid : Message : 129.737423 s : ===============================================================================
Grid : Message : 129.853540 s : Initialised RNGs
Grid : Message : 140.878355 s : ------------------------------------------
Grid : Message : 140.878368 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 140.878369 s : * Using sequential Comms/Compute
Grid : Message : 140.878370 s : * SINGLE precision
Grid : Message : 140.878371 s : ------------------------------------------
Grid : Message : 141.304260 s : Deo Gflop/s = 809.5 (0.2) 797.9-823.1
Grid : Message : 141.304277 s : Deo Gflop/s per rank 809.5
Grid : Message : 141.304278 s : Deo Gflop/s per node 809.5
Grid : Message : 141.304279 s : ------------------------------------------
Grid : Message : 141.304280 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 141.304281 s : * Using Overlapped Comms/Compute
Grid : Message : 141.304282 s : * SINGLE precision
Grid : Message : 141.304283 s : ------------------------------------------
Grid : Message : 141.724304 s : Deo Gflop/s = 729.6 (0.1) 721.3-733.6
Grid : Message : 141.724313 s : Deo Gflop/s per rank 729.6
Grid : Message : 141.724314 s : Deo Gflop/s per node 729.6
Grid : Message : 141.724315 s : ------------------------------------------
Grid : Message : 141.724316 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 141.724317 s : * Using sequential Comms/Compute
Grid : Message : 141.724318 s : * SINGLE precision
Grid : Message : 141.724319 s : ------------------------------------------
Grid : Message : 142.103287 s : Deo Gflop/s = 808.6 (0.2) 796.9-819.7
Grid : Message : 142.103294 s : Deo Gflop/s per rank 808.6
Grid : Message : 142.103295 s : Deo Gflop/s per node 808.6
Grid : Message : 142.103296 s : ------------------------------------------
Grid : Message : 142.103297 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 142.103298 s : * Using Overlapped Comms/Compute
Grid : Message : 142.103299 s : * SINGLE precision
Grid : Message : 142.103300 s : ------------------------------------------
Grid : Message : 142.523689 s : Deo Gflop/s = 729.0 (0.1) 721.3-733.6
Grid : Message : 142.523695 s : Deo Gflop/s per rank 729.0
Grid : Message : 142.523696 s : Deo Gflop/s per node 729.0
Grid : Message : 142.523697 s : ------------------------------------------
Grid : Message : 142.523698 s : 32^4 Deo Best Gflop/s = 809.5 ; 809.5 per node
Grid : Message : 142.523700 s : 32^4 Deo Worst Gflop/s = 729.0 ; 729.0 per node
Grid : Message : 142.523702 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 142.523703 s : 809.5 ; 729.6 ; 808.6 ; 729.0 ;
Grid : Message : 142.574389 s : ===============================================================================
Grid : Message : 142.574401 s : Gflop/s/node Summary table Ls=12
Grid : Message : 142.574402 s : ===============================================================================
Grid : Message : 142.574406 s : L Wilson DWF Staggered
Grid : Message : 142.574411 s : 8 90.09 900.49 67.41
Grid : Message : 142.574422 s : 12 409.48 1550.33 315.72
Grid : Message : 142.574425 s : 16 727.26 1699.19 499.04
Grid : Message : 142.574434 s : 24 1205.32 1799.95 747.35
Grid : Message : 142.574438 s : 32 1172.60 1670.31 809.49
Grid : Message : 142.574440 s : ===============================================================================
Grid : Message : 142.574442 s : Comparison point result: 1735.1 Gflop/s per node
Grid : Message : 142.574448 s : Comparison point is 0.5*(1670.3+1800.0)
Grid : Message : 142.574450 s : ===============================================================================
Grid : Message : 142.574458 s : writing benchmark results to 1GPU_1_1_1_1_test_05_04_23.json
Executable
+18
View File
@@ -0,0 +1,18 @@
#!/bin/bash -l
#SBATCH -p 1CN96C8G1H_MI250_Ubuntu22
##SBATCH -p 1CN_MI250_Hackathon_Ubuntu22
#SBATCH -N 1
#SBATCH --cpus-per-task=12
#SBATCH --gres=gpu:1
#SBATCH --ntasks-per-node=1
#SBATCH --time=00:10:00
#SBATCH -e job.err
#SBATCH -o job.out
module load rocm@5.4.3
spack load gmp mpfr openmpi fftw hdf5 c-lime
OMP_NUM_THREADS=8 mpirun -np 1 --bind-to none ./helper.sh ./hello_jobstep
OMP_NUM_THREADS=8 mpirun -np 1 --bind-to none ./helper.sh omnitrace ./Benchmark_Grid --accelerator-threads 8 --mpi 1.1.1.1 --shm 2048 --comms-overlap --comms-concurrent --shm-mpi 0 --json-out 1GPU_1_1_1_1_test_05_04_23.json
+1
View File
@@ -0,0 +1 @@
/var/spool/slurm-llnl/job36500/slurm_script: line 11: mpirun: command not found
+247
View File
@@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 72.95795546516696,
"GFlops": 97.92251022059891,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 582.8988697806803,
"GFlops": 782.3536195573076,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 831.1497694944638,
"GFlops": 1115.5503368927066,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 871.4805098225537,
"GFlops": 1169.6813402466482,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 856.24886270296,
"GFlops": 1149.2377695457521,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 848.7965671453275,
"GFlops": 1139.235467764453,
"L": 48,
"size_MB": 3888.0
}
],
"axpy": [
{
"GBps": 36.664104253158314,
"GFlops": 3.2806485146760305,
"L": 8,
"size_MB": 1.5
},
{
"GBps": 475.5541394610069,
"GFlops": 42.55186409296766,
"L": 16,
"size_MB": 24.0
},
{
"GBps": 1062.0541626046584,
"GFlops": 95.03099781182652,
"L": 24,
"size_MB": 121.5
},
{
"GBps": 1091.2166155916666,
"GFlops": 97.6404099337086,
"L": 32,
"size_MB": 384.0
},
{
"GBps": 1083.0714951643492,
"GFlops": 96.91159689501464,
"L": 40,
"size_MB": 937.5
},
{
"GBps": 1091.7986736212977,
"GFlops": 97.69249160457608,
"L": 48,
"size_MB": 1944.0
},
{
"GBps": 1116.5673848828242,
"GFlops": 99.90875837191614,
"L": 56,
"size_MB": 3601.5
},
{
"GBps": 1102.0528489443711,
"GFlops": 98.61001968082712,
"L": 64,
"size_MB": 6144.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.5819955845789178,
"max": 319.60227272727275,
"mean": 299.4569846678024
},
"time_usec": 58.7
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.6045177667005093,
"max": 313.8950892857143,
"mean": 300.7120862201694
},
"time_usec": 58.455
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.3219677699052459,
"max": 624.4860197368421,
"mean": 617.4985362997658
},
"time_usec": 96.075
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.3217013534900953,
"max": 624.4860197368422,
"mean": 616.3117792956576
},
"time_usec": 96.26
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.16708382399125116,
"max": 803.5714285714286,
"mean": 800.4610655737705
},
"time_usec": 175.68
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.16094436467098314,
"max": 803.5714285714286,
"mean": 800.7801378053641
},
"time_usec": 175.61
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 3.6932378948517726,
"max": 1060.4563827220077,
"mean": 999.66588944495
},
"time_usec": 274.75
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.7732919743918085,
"max": 1060.456382722008,
"mean": 1033.1905246675572
},
"time_usec": 265.835
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 3.4290882620704632,
"max": 1103.7427325581398,
"mean": 1019.9634122754235
},
"time_usec": 465.32
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 1.7805341746498673,
"max": 1103.7427325581393,
"mean": 1052.9793335256138
},
"time_usec": 450.73
}
],
"flops": {
"comparison_point_Gflops": 1611.577549655192,
"results": [
{
"Gflops_dwf4": 323.4349167180752,
"Gflops_staggered": 12.066444909437706,
"Gflops_wilson": 31.980666449783893,
"L": 8
},
{
"Gflops_dwf4": 865.5026086956523,
"Gflops_staggered": 56.31257464596485,
"Gflops_wilson": 154.20341946550104,
"L": 12
},
{
"Gflops_dwf4": 1274.4032988497431,
"Gflops_staggered": 151.36310980156634,
"Gflops_wilson": 406.90922194195747,
"L": 16
},
{
"Gflops_dwf4": 1635.941519395089,
"Gflops_staggered": 410.01962234851345,
"Gflops_wilson": 832.8886796893387,
"L": 24
},
{
"Gflops_dwf4": 1587.2135799152945,
"Gflops_staggered": 601.6161489936918,
"Gflops_wilson": 1019.3722000081013,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
2
],
"nodes": 1,
"ranks": 2
}
}
+247
View File
@@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 82.16994221079263,
"GFlops": 110.28662953423883,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 583.033802716333,
"GFlops": 782.5347234778643,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 686.9200472555763,
"GFlops": 921.9684806029609,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 721.9096917712991,
"GFlops": 968.9307865072406,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 708.3069986388066,
"GFlops": 950.6735608379971,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 725.7398604285772,
"GFlops": 974.0715518576075,
"L": 48,
"size_MB": 7776.0
}
],
"axpy": [
{
"GBps": 41.59376781166453,
"GFlops": 3.7217473430940964,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 413.54098604712715,
"GFlops": 37.00302105475007,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 736.025296235133,
"GFlops": 65.85842867413767,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 845.301987657182,
"GFlops": 75.63634150482068,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 915.650883948628,
"GFlops": 81.93105418985103,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 918.5294036587682,
"GFlops": 82.18861977351649,
"L": 48,
"size_MB": 3888.0
},
{
"GBps": 915.8908049272835,
"GFlops": 81.9525219556208,
"L": 56,
"size_MB": 7203.0
},
{
"GBps": 933.3426149071149,
"GFlops": 83.51408347894127,
"L": 64,
"size_MB": 12288.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 4.118706014626012,
"max": 434.0277777777777,
"mean": 284.4817122511733
},
"time_usec": 123.58
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 4.1521797501551525,
"max": 434.02777777777777,
"mean": 284.9197665937272
},
"time_usec": 123.39
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 5.296617827547019,
"max": 885.4652518656717,
"mean": 650.613279322257
},
"time_usec": 182.37
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 5.353297810533095,
"max": 885.4652518656717,
"mean": 652.2943581638262
},
"time_usec": 181.9
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 4.726418547916897,
"max": 1019.021739130435,
"mean": 843.5180325410513
},
"time_usec": 333.425
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 6.106965199705783,
"max": 1041.6666666666667,
"mean": 865.5044544629257
},
"time_usec": 324.955
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 3.5355426867815396,
"max": 1081.3315083661416,
"mean": 974.9938432388778
},
"time_usec": 563.405
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 3.2512421053107325,
"max": 1083.4643121301774,
"mean": 989.6255573571138
},
"time_usec": 555.075
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 2.5924773984958884,
"max": 1084.8214285714287,
"mean": 1003.6093592230957
},
"time_usec": 945.805
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 2.2239838373071463,
"max": 1102.4608013937282,
"mean": 1023.0965520214704
},
"time_usec": 927.79
}
],
"flops": {
"comparison_point_Gflops": 870.0277887227762,
"results": [
{
"Gflops_dwf4": 314.95374405232997,
"Gflops_staggered": 3.4646403436580835,
"Gflops_wilson": 30.77048282338211,
"L": 8
},
{
"Gflops_dwf4": 627.6042246860386,
"Gflops_staggered": 47.91260849841221,
"Gflops_wilson": 150.31760175186892,
"L": 12
},
{
"Gflops_dwf4": 790.2716382423531,
"Gflops_staggered": 133.10150763030714,
"Gflops_wilson": 339.1803331715996,
"L": 16
},
{
"Gflops_dwf4": 867.4337937916907,
"Gflops_staggered": 348.15413029524154,
"Gflops_wilson": 618.5334424546753,
"L": 24
},
{
"Gflops_dwf4": 872.6217836538618,
"Gflops_staggered": 471.57989384559534,
"Gflops_wilson": 675.8167829404576,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
4
],
"nodes": 1,
"ranks": 4
}
}
+16
View File
@@ -0,0 +1,16 @@
#! /usr/bin/env bash
if [[ -n ${OMPI_COMM_WORLD_RANK+z} ]]; then
# mpich
export MPI_RANK=${OMPI_COMM_WORLD_RANK}
elif [[ -n ${MV2_COMM_WORLD_RANK+z} ]]; then
# ompi
export MPI_RANK=${MV2_COMM_WORLD_RANK}
elif [[ -n ${SLURM_PROCID+z} ]]; then
# mpich via srun
export MPI_RANK=${SLURM_PROCID}
fi
if [[ ${MPI_RANK} == "0" ]]; then
eval "omniperf profile -n testbench -- $*"
else
"$*"
fi