Added the working results from the AMD Hackathon

This commit is contained in:
Srijit Paul
2023-05-16 09:47:38 +01:00
commit 6c79bf7b8e
25 changed files with 4443 additions and 0 deletions

View File

@ -0,0 +1,135 @@
{
"SU4": [
{
"GBps": 66.53081457635281,
"GFlops": 89.29614774427355,
"L": 8,
"size_MB": 1.5
},
{
"GBps": 555.556774884554,
"GFlops": 745.6556810001229,
"L": 16,
"size_MB": 24.0
},
{
"GBps": 742.2236948591938,
"GFlops": 996.1957799176627,
"L": 24,
"size_MB": 121.5
},
{
"GBps": 826.9383434771103,
"GFlops": 1109.8978565758136,
"L": 32,
"size_MB": 384.0
},
{
"GBps": 842.2222703204826,
"GFlops": 1130.41159593417,
"L": 40,
"size_MB": 937.5
},
{
"GBps": 850.748827016926,
"GFlops": 1141.8557466087684,
"L": 48,
"size_MB": 1944.0
}
],
"axpy": [
{
"GBps": 35.82987973103704,
"GFlops": 3.206003368008695,
"L": 8,
"size_MB": 0.75
},
{
"GBps": 382.8855274375769,
"GFlops": 34.26001705116882,
"L": 16,
"size_MB": 12.0
},
{
"GBps": 837.408777973824,
"GFlops": 74.93006905793541,
"L": 24,
"size_MB": 60.75
},
{
"GBps": 1044.6773688059338,
"GFlops": 93.47614862276701,
"L": 32,
"size_MB": 192.0
},
{
"GBps": 1094.1823360678761,
"GFlops": 97.90577810984188,
"L": 40,
"size_MB": 468.75
},
{
"GBps": 1118.5658428592028,
"GFlops": 100.08757736464482,
"L": 48,
"size_MB": 972.0
},
{
"GBps": 1117.1386444994318,
"GFlops": 99.9598738171423,
"L": 56,
"size_MB": 1800.75
},
{
"GBps": 1130.3969388850894,
"GFlops": 101.14620591687438,
"L": 64,
"size_MB": 3072.0
}
],
"flops": {
"comparison_point_Gflops": 1735.1326862618785,
"results": [
{
"Gflops_dwf4": 900.4946564885497,
"Gflops_staggered": 67.41176470588235,
"Gflops_wilson": 90.09269442262372,
"L": 8
},
{
"Gflops_dwf4": 1550.326392108142,
"Gflops_staggered": 315.7179146516448,
"Gflops_wilson": 409.4796356156333,
"L": 12
},
{
"Gflops_dwf4": 1699.1907401453284,
"Gflops_staggered": 499.0448649798001,
"Gflops_wilson": 727.2638879714644,
"L": 16
},
{
"Gflops_dwf4": 1799.952141959478,
"Gflops_staggered": 747.3489951882253,
"Gflops_wilson": 1205.3232994907394,
"L": 24
},
{
"Gflops_dwf4": 1670.313230564279,
"Gflops_staggered": 809.4875619745635,
"Gflops_wilson": 1172.5954144104264,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
1
],
"nodes": 1,
"ranks": 1
}
}

View File

@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 132.89061835546906,
"GFlops": 178.36276868186152,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 1115.8992219013821,
"GFlops": 1497.7345824057134,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 1529.8814171914853,
"GFlops": 2053.372079248613,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 1654.336076127236,
"GFlops": 2220.4122948623262,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 1684.3951354059177,
"GFlops": 2260.756881284346,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 1701.2803710051417,
"GFlops": 2283.4198608730717,
"L": 48,
"size_MB": 3888.0
}
],
"axpy": [
{
"GBps": 72.90804934027936,
"GFlops": 6.523701823576131,
"L": 8,
"size_MB": 1.5
},
{
"GBps": 768.9192583517447,
"GFlops": 68.80173058094414,
"L": 16,
"size_MB": 24.0
},
{
"GBps": 1671.395675609412,
"GFlops": 149.5539534462135,
"L": 24,
"size_MB": 121.5
},
{
"GBps": 2005.7565212158895,
"GFlops": 179.47205546585369,
"L": 32,
"size_MB": 384.0
},
{
"GBps": 2189.105793295759,
"GFlops": 195.8778706185296,
"L": 40,
"size_MB": 937.5
},
{
"GBps": 2228.4889350615904,
"GFlops": 199.4018144914041,
"L": 48,
"size_MB": 1944.0
},
{
"GBps": 2235.692776103882,
"GFlops": 200.0464032764505,
"L": 56,
"size_MB": 3601.5
},
{
"GBps": 2259.686995689647,
"GFlops": 202.19336970174015,
"L": 64,
"size_MB": 6144.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.05625709399294237,
"max": 174.04084158415841,
"mean": 172.69858034091467
},
"time_usec": 101.785
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.08879742168931712,
"max": 174.04084158415841,
"mean": 172.56295096451186
},
"time_usec": 101.865
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.022661574665316316,
"max": 185.394287109375,
"mean": 184.92042851131475
},
"time_usec": 320.82
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.01801934346116572,
"max": 185.39428710937497,
"mean": 184.94637011924246
},
"time_usec": 320.775
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.00908164542769838,
"max": 188.5053619302949,
"mean": 188.15351989242637
},
"time_usec": 747.395
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.00977151592705336,
"max": 188.5053619302949,
"mean": 188.12834868460658
},
"time_usec": 747.495
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.004645137235265924,
"max": 189.41945043103448,
"mean": 189.36590558876455
},
"time_usec": 1450.41
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 6.57927354071246,
"max": 189.41945043103448,
"mean": 179.56563159907688
},
"time_usec": 1529.57
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.0025216686764181592,
"max": 189.99574659727782,
"mean": 189.91743796593076
},
"time_usec": 2499.03
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.0029097878818057873,
"max": 189.99574659727784,
"mean": 189.9109784863842
},
"time_usec": 2499.115
}
],
"flops": {
"comparison_point_Gflops": 2967.4156661909574,
"results": [
{
"Gflops_dwf4": 666.5081421393546,
"Gflops_staggered": 21.14174015655824,
"Gflops_wilson": 65.28883512417278,
"L": 8
},
{
"Gflops_dwf4": 1872.5305322568,
"Gflops_staggered": 103.94394142193528,
"Gflops_wilson": 322.3361554476059,
"L": 12
},
{
"Gflops_dwf4": 2378.065733107743,
"Gflops_staggered": 302.54207956687776,
"Gflops_wilson": 859.270520750005,
"L": 16
},
{
"Gflops_dwf4": 3017.7954712341593,
"Gflops_staggered": 805.6876803824425,
"Gflops_wilson": 1573.6522138072685,
"L": 24
},
{
"Gflops_dwf4": 2917.035861147756,
"Gflops_staggered": 1173.8616609748283,
"Gflops_wilson": 1952.2605662484034,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
2
],
"nodes": 1,
"ranks": 2
}
}

View File

@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 258.80472857326896,
"GFlops": 347.3618266476084,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 2205.1617118588697,
"GFlops": 2959.7179483828813,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 3072.390061538835,
"GFlops": 4123.692135895226,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 3306.845905879817,
"GFlops": 4438.373443332908,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 3368.350185489805,
"GFlops": 4520.923090048202,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 3401.890050913554,
"GFlops": 4565.939535394216,
"L": 48,
"size_MB": 7776.0
}
],
"axpy": [
{
"GBps": 144.92557588593007,
"GFlops": 12.96772101633408,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 1525.0954455566346,
"GFlops": 136.46323045717278,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 3349.410690756415,
"GFlops": 299.7001953681577,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 4001.347120197133,
"GFlops": 358.03447960813475,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 4373.946464587173,
"GFlops": 391.3741045803486,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 4472.851142619264,
"GFlops": 400.223945363041,
"L": 48,
"size_MB": 3888.0
},
{
"GBps": 4474.64035673346,
"GFlops": 400.38404153191635,
"L": 56,
"size_MB": 7203.0
},
{
"GBps": 4520.778628772496,
"GFlops": 404.51242422986655,
"L": 64,
"size_MB": 12288.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.07403621163429362,
"max": 251.11607142857142,
"mean": 246.10605530276513
},
"time_usec": 142.85
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.05078117997981672,
"max": 240.79623287671234,
"mean": 240.37639738812348
},
"time_usec": 146.255
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.05243158900728836,
"max": 261.34877477973566,
"mean": 259.53091506627584
},
"time_usec": 457.18
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.05276589391871642,
"max": 254.61876341201716,
"mean": 253.79909038406007
},
"time_usec": 467.505
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.034606553242195574,
"max": 264.08450704225345,
"mean": 263.0926600655743
},
"time_usec": 1069.015
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.1319150423789226,
"max": 258.0275229357798,
"mean": 256.99247982894576
},
"time_usec": 1094.39
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.015978094946140938,
"max": 266.5290665938864,
"mean": 264.2093825678961
},
"time_usec": 2079.095
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.009886226966989408,
"max": 259.35618803116154,
"mean": 258.3413626594304
},
"time_usec": 2126.32
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.009038617805051087,
"max": 265.14490223463685,
"mean": 264.87447870958556
},
"time_usec": 3583.655
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.011755047936156046,
"max": 259.2785441136302,
"mean": 259.0142943487046
},
"time_usec": 3664.735
}
],
"flops": {
"comparison_point_Gflops": 5739.959058528701,
"results": [
{
"Gflops_dwf4": 1303.838629455651,
"Gflops_staggered": 45.61681616310823,
"Gflops_wilson": 122.82790780695686,
"L": 8
},
{
"Gflops_dwf4": 3626.7536546219576,
"Gflops_staggered": 222.3231637149513,
"Gflops_wilson": 602.8115590932687,
"L": 12
},
{
"Gflops_dwf4": 4685.72914071401,
"Gflops_staggered": 635.7870784234051,
"Gflops_wilson": 1585.9766281938169,
"L": 16
},
{
"Gflops_dwf4": 5931.296254256527,
"Gflops_staggered": 1539.663555337785,
"Gflops_wilson": 3106.4995401978545,
"L": 24
},
{
"Gflops_dwf4": 5548.621862800875,
"Gflops_staggered": 2327.4564566018657,
"Gflops_wilson": 3871.899246657765,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
4
],
"nodes": 1,
"ranks": 4
}
}

View File

@ -0,0 +1,357 @@
{
"SU4": [
{
"GBps": 266.1848261241281,
"GFlops": 357.2672259045552,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 2218.808483408791,
"GFlops": 2978.034335102536,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 2916.4890368817046,
"GFlops": 3914.445322671706,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 3244.2460692554414,
"GFlops": 4354.353364883959,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 3374.5218625301145,
"GFlops": 4529.206574751202,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 3394.431233013875,
"GFlops": 4555.928479473609,
"L": 48,
"size_MB": 7776.0
}
],
"axpy": [
{
"GBps": 141.29030070343728,
"GFlops": 12.64244209923477,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 1521.8306608296007,
"GFlops": 136.1711024648584,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 3348.7625319112517,
"GFlops": 299.64219909643714,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 4183.327348851153,
"GFlops": 374.3177948287102,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 4378.710152219234,
"GFlops": 391.8003521342666,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 4336.452251813764,
"GFlops": 388.01917921261816,
"L": 48,
"size_MB": 3888.0
},
{
"GBps": 4469.4883078108915,
"GFlops": 399.92304399796166,
"L": 56,
"size_MB": 7203.0
},
{
"GBps": 4517.408964798091,
"GFlops": 404.21091180135454,
"L": 64,
"size_MB": 12288.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 2,
"rate_GBps": {
"error": 0.09195178515748137,
"max": 249.33510638297872,
"mean": 247.17886521830837
},
"time_usec": 142.23
},
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.10825452600087834,
"max": 348.08168316831683,
"mean": 344.93965855573
},
"time_usec": 101.92
},
{
"L": 16,
"bytes": 4718592,
"dir": 6,
"rate_GBps": {
"error": 0.10591381085828204,
"max": 249.33510638297872,
"mean": 247.29187915450356
},
"time_usec": 142.165
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.10943725965663166,
"max": 348.08168316831683,
"mean": 344.80433503334643
},
"time_usec": 101.96
},
{
"L": 24,
"bytes": 15925248,
"dir": 2,
"rate_GBps": {
"error": 1.3232642449040395,
"max": 260.7743818681319,
"mean": 257.60107630182046
},
"time_usec": 460.605
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.04258490698369876,
"max": 370.78857421875,
"mean": 369.7659402901351
},
"time_usec": 320.885
},
{
"L": 24,
"bytes": 15925248,
"dir": 6,
"rate_GBps": {
"error": 0.38957868571862236,
"max": 260.77438186813185,
"mean": 258.7696281554986
},
"time_usec": 458.525
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.04510765547263382,
"max": 370.78857421874994,
"mean": 369.7140926370236
},
"time_usec": 320.93
},
{
"L": 32,
"bytes": 37748736,
"dir": 2,
"rate_GBps": {
"error": 0.04128040141253437,
"max": 263.3426966292135,
"mean": 262.2622155911973
},
"time_usec": 1072.4
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.019030171173062077,
"max": 376.50602409638554,
"mean": 376.03786450603667
},
"time_usec": 747.93
},
{
"L": 32,
"bytes": 37748736,
"dir": 6,
"rate_GBps": {
"error": 14.333669622239718,
"max": 263.3426966292135,
"mean": 246.90111665145025
},
"time_usec": 1139.12
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.04165357676010385,
"max": 376.50602409638554,
"mean": 375.9272873086948
},
"time_usec": 748.15
},
{
"L": 40,
"bytes": 73728000,
"dir": 2,
"rate_GBps": {
"error": 0.008823799570124044,
"max": 263.8407330691643,
"mean": 263.62484432777194
},
"time_usec": 2083.705
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.008612844793861731,
"max": 378.83890086206895,
"mean": 378.61303860111036
},
"time_usec": 1450.865
},
{
"L": 40,
"bytes": 73728000,
"dir": 6,
"rate_GBps": {
"error": 0.016608523471027856,
"max": 263.9675186208553,
"mean": 263.6128257270371
},
"time_usec": 2083.8
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.007834602992146382,
"max": 378.83890086206895,
"mean": 378.6104290514345
},
"time_usec": 1450.875
},
{
"L": 48,
"bytes": 127401984,
"dir": 2,
"rate_GBps": {
"error": 0.0075998552608142755,
"max": 264.553720735786,
"mean": 264.31025769184185
},
"time_usec": 3591.305
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.00462628204154535,
"max": 379.99149319455563,
"mean": 379.81511787960756
},
"time_usec": 2499.16
},
{
"L": 48,
"bytes": 127401984,
"dir": 6,
"rate_GBps": {
"error": 0.007229146291832468,
"max": 264.6274742124338,
"mean": 264.31062567872357
},
"time_usec": 3591.3
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.005141655059361934,
"max": 379.9914931945557,
"mean": 379.81435799557056
},
"time_usec": 2499.165
}
],
"flops": {
"comparison_point_Gflops": 5098.280833532175,
"results": [
{
"Gflops_dwf4": 794.9684468940287,
"Gflops_staggered": 25.398732773127435,
"Gflops_wilson": 82.30088654337784,
"L": 8
},
{
"Gflops_dwf4": 2766.7442233712513,
"Gflops_staggered": 123.79572508426367,
"Gflops_wilson": 388.0367023572493,
"L": 12
},
{
"Gflops_dwf4": 3917.4114123908494,
"Gflops_staggered": 360.4491008480393,
"Gflops_wilson": 1119.746558014772,
"L": 16
},
{
"Gflops_dwf4": 5085.322691715904,
"Gflops_staggered": 1142.7657184076463,
"Gflops_wilson": 2488.4866397303394,
"L": 24
},
{
"Gflops_dwf4": 5111.238975348448,
"Gflops_staggered": 1963.1988001901661,
"Gflops_wilson": 3410.556163528253,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
2,
2
],
"nodes": 1,
"ranks": 4
}
}

View File

@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 357.4010431521607,
"GFlops": 479.69555996712967,
"L": 8,
"size_MB": 9.0
},
{
"GBps": 2964.027681822972,
"GFlops": 3978.2506118338624,
"L": 16,
"size_MB": 144.0
},
{
"GBps": 4300.052851129693,
"GFlops": 5771.433239585496,
"L": 24,
"size_MB": 729.0
},
{
"GBps": 4872.567325646307,
"GFlops": 6539.849159752834,
"L": 32,
"size_MB": 2304.0
},
{
"GBps": 5014.291917307407,
"GFlops": 6730.06868669764,
"L": 40,
"size_MB": 5625.0
},
{
"GBps": 5087.723804015017,
"GFlops": 6828.627296664129,
"L": 48,
"size_MB": 11664.0
}
],
"axpy": [
{
"GBps": 191.2791174636559,
"GFlops": 17.11536570654468,
"L": 8,
"size_MB": 4.5
},
{
"GBps": 2189.0619869385964,
"GFlops": 195.87395089204273,
"L": 16,
"size_MB": 72.0
},
{
"GBps": 4600.586453416044,
"GFlops": 411.6535074967195,
"L": 24,
"size_MB": 364.5
},
{
"GBps": 6066.590946624111,
"GFlops": 542.8293690408383,
"L": 32,
"size_MB": 1152.0
},
{
"GBps": 6435.680928548642,
"GFlops": 575.8549815751528,
"L": 40,
"size_MB": 2812.5
},
{
"GBps": 6423.987885321036,
"GFlops": 574.8087057782093,
"L": 48,
"size_MB": 5832.0
},
{
"GBps": 6651.207054039447,
"GFlops": 595.139932833832,
"L": 56,
"size_MB": 10804.5
},
{
"GBps": 6750.396585799414,
"GFlops": 604.0152618966363,
"L": 64,
"size_MB": 18432.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.23180937816140193,
"max": 301.3392857142857,
"mean": 284.1444851554502
},
"time_usec": 185.59
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.0720558271254274,
"max": 288.1659836065574,
"mean": 287.1226145427817
},
"time_usec": 183.665
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.08914672001181612,
"max": 294.1793646694215,
"mean": 291.0166629195111
},
"time_usec": 611.575
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.02033965870016051,
"max": 300.13240408937605,
"mean": 299.43809148264984
},
"time_usec": 594.375
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.11175876047399072,
"max": 297.3044397463002,
"mean": 293.6948288825151
},
"time_usec": 1436.44
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.01769364687591873,
"max": 302.8535534816941,
"mean": 302.39876137467337
},
"time_usec": 1395.095
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 4.399658341772243,
"max": 426.48789305124217,
"mean": 290.9026944191917
},
"time_usec": 2832.475
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.010262975788769115,
"max": 303.93751729066764,
"mean": 303.5115263803478
},
"time_usec": 2714.805
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.04733542501238632,
"max": 296.2605337078652,
"mean": 294.4266465533552
},
"time_usec": 4835.935
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.005712416455895626,
"max": 304.23677884615387,
"mean": 304.07369658580546
},
"time_usec": 4682.51
}
],
"flops": {
"comparison_point_Gflops": 8645.41832237255,
"results": [
{
"Gflops_dwf4": 1800.6620388878794,
"Gflops_staggered": 66.91525617022894,
"Gflops_wilson": 198.73254554643688,
"L": 8
},
{
"Gflops_dwf4": 5046.479674060661,
"Gflops_staggered": 310.38726587194475,
"Gflops_wilson": 888.2136619568682,
"L": 12
},
{
"Gflops_dwf4": 6946.311220935582,
"Gflops_staggered": 862.8639792893744,
"Gflops_wilson": 2194.256560154122,
"L": 16
},
{
"Gflops_dwf4": 8923.959284555593,
"Gflops_staggered": 2010.9408836077712,
"Gflops_wilson": 4488.6948258506145,
"L": 24
},
{
"Gflops_dwf4": 8366.877360189508,
"Gflops_staggered": 3236.252709292388,
"Gflops_wilson": 5745.6687676840165,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
6
],
"nodes": 1,
"ranks": 6
}
}

View File

@ -0,0 +1,357 @@
{
"SU4": [
{
"GBps": 358.7697863751814,
"GFlops": 481.5326560232221,
"L": 8,
"size_MB": 9.0
},
{
"GBps": 3277.0383987985815,
"GFlops": 4398.366484555036,
"L": 16,
"size_MB": 144.0
},
{
"GBps": 4641.25930543841,
"GFlops": 6229.3927903480135,
"L": 24,
"size_MB": 729.0
},
{
"GBps": 4957.208883318319,
"GFlops": 6653.453135404018,
"L": 32,
"size_MB": 2304.0
},
{
"GBps": 5052.323232718746,
"GFlops": 6781.113454171254,
"L": 40,
"size_MB": 5625.0
},
{
"GBps": 5111.066489576509,
"GFlops": 6859.957318878948,
"L": 48,
"size_MB": 11664.0
}
],
"axpy": [
{
"GBps": 212.79755008545004,
"GFlops": 19.040802464290206,
"L": 8,
"size_MB": 4.5
},
{
"GBps": 2239.4521802311115,
"GFlops": 200.38278906351087,
"L": 16,
"size_MB": 72.0
},
{
"GBps": 5003.072969176254,
"GFlops": 447.6673912940339,
"L": 24,
"size_MB": 364.5
},
{
"GBps": 6260.956674179814,
"GFlops": 560.220919943234,
"L": 32,
"size_MB": 1152.0
},
{
"GBps": 6339.748894764766,
"GFlops": 567.2711284972254,
"L": 40,
"size_MB": 2812.5
},
{
"GBps": 6712.262796318121,
"GFlops": 600.60310817383,
"L": 48,
"size_MB": 5832.0
},
{
"GBps": 6707.207092952936,
"GFlops": 600.1507314944187,
"L": 56,
"size_MB": 10804.5
},
{
"GBps": 6777.5805167152075,
"GFlops": 606.4476388603874,
"L": 64,
"size_MB": 18432.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 2,
"rate_GBps": {
"error": 1.022994453232109,
"max": 349.23427152317885,
"mean": 321.77670317600763
},
"time_usec": 163.885
},
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 1.3634069536717788,
"max": 319.60227272727275,
"mean": 273.5965913512672
},
"time_usec": 192.745
},
{
"L": 16,
"bytes": 4718592,
"dir": 6,
"rate_GBps": {
"error": 1.033374997957526,
"max": 349.23427152317873,
"mean": 321.8159765660757
},
"time_usec": 163.865
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.8946713335979535,
"max": 277.5493421052632,
"mean": 261.8000049645038
},
"time_usec": 201.43
},
{
"L": 24,
"bytes": 15925248,
"dir": 2,
"rate_GBps": {
"error": 1.132373084736386,
"max": 364.7100730020492,
"mean": 336.4019839244705
},
"time_usec": 529.065
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 1.3763342157193355,
"max": 329.58984375000006,
"mean": 286.25184457704404
},
"time_usec": 621.755
},
{
"L": 24,
"bytes": 15925248,
"dir": 6,
"rate_GBps": {
"error": 1.1706725283255819,
"max": 366.96601159793806,
"mean": 335.8972476220133
},
"time_usec": 529.86
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.7245132697488086,
"max": 290.81456801470586,
"mean": 273.5395614001383
},
"time_usec": 650.65
},
{
"L": 32,
"bytes": 37748736,
"dir": 2,
"rate_GBps": {
"error": 1.1347849184170162,
"max": 369.41768826619966,
"mean": 340.002417794971
},
"time_usec": 1240.8
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 1.2934932973816882,
"max": 330.88235294117646,
"mean": 289.3945266277263
},
"time_usec": 1457.785
},
{
"L": 32,
"bytes": 37748736,
"dir": 6,
"rate_GBps": {
"error": 1.1400178079013707,
"max": 369.41768826619966,
"mean": 339.97775789957205
},
"time_usec": 1240.89
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.6655050724500384,
"max": 294.3998604326588,
"mean": 274.9840305570402
},
"time_usec": 1534.18
},
{
"L": 40,
"bytes": 73728000,
"dir": 2,
"rate_GBps": {
"error": 1.1580061344215817,
"max": 369.99308907723395,
"mean": 341.6741760070162
},
"time_usec": 2411.58
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 1.237808834778611,
"max": 326.19739088479815,
"mean": 289.9226295719639
},
"time_usec": 2842.05
},
{
"L": 40,
"bytes": 73728000,
"dir": 6,
"rate_GBps": {
"error": 1.1555010647779187,
"max": 370.49218047437046,
"mean": 341.65150860996624
},
"time_usec": 2411.74
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.21759524648814468,
"max": 284.915148469917,
"mean": 275.82309600044186
},
"time_usec": 2987.33
},
{
"L": 48,
"bytes": 127401984,
"dir": 2,
"rate_GBps": {
"error": 1.1492026962470794,
"max": 371.07847928068804,
"mean": 341.99590348018035
},
"time_usec": 4163.29
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 1.188423218598923,
"max": 324.26056137554093,
"mean": 291.58641440578003
},
"time_usec": 4883.04
},
{
"L": 48,
"bytes": 127401984,
"dir": 6,
"rate_GBps": {
"error": 1.1605366789704334,
"max": 370.88515889554566,
"mean": 342.08382185468326
},
"time_usec": 4162.22
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 4.979712514429859,
"max": 414.626710832848,
"mean": 266.3140556540118
},
"time_usec": 5346.425
}
],
"flops": {
"comparison_point_Gflops": 7494.348906712593,
"results": [
{
"Gflops_dwf4": 1062.0966590565613,
"Gflops_staggered": 37.17051075623598,
"Gflops_wilson": 116.09729212946039,
"L": 8
},
{
"Gflops_dwf4": 3672.4475061677776,
"Gflops_staggered": 171.9754714887417,
"Gflops_wilson": 543.8738534795675,
"L": 12
},
{
"Gflops_dwf4": 5567.914366086386,
"Gflops_staggered": 470.0381099405445,
"Gflops_wilson": 1539.9924935600807,
"L": 16
},
{
"Gflops_dwf4": 7427.264147643517,
"Gflops_staggered": 1388.1232831701386,
"Gflops_wilson": 3379.021526181515,
"L": 24
},
{
"Gflops_dwf4": 7561.433665781668,
"Gflops_staggered": 2602.387615970339,
"Gflops_wilson": 4936.722401653414,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
2,
3
],
"nodes": 1,
"ranks": 6
}
}

View File

@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 533.0451116736091,
"GFlops": 715.4410381033807,
"L": 8,
"size_MB": 12.0
},
{
"GBps": 4469.567213462115,
"GFlops": 5998.951565341761,
"L": 16,
"size_MB": 192.0
},
{
"GBps": 6185.220631656113,
"GFlops": 8301.662603596083,
"L": 24,
"size_MB": 972.0
},
{
"GBps": 6611.964226824657,
"GFlops": 8874.42816141682,
"L": 32,
"size_MB": 3072.0
},
{
"GBps": 6754.492030874098,
"GFlops": 9065.725741780272,
"L": 40,
"size_MB": 7500.0
},
{
"GBps": 6793.3111325316295,
"GFlops": 9117.827858055021,
"L": 48,
"size_MB": 15552.0
}
],
"axpy": [
{
"GBps": 285.8864982954018,
"GFlops": 25.580690844723133,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 3045.4379339749057,
"GFlops": 272.5011735087506,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 6694.684891454758,
"GFlops": 599.0302638713229,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 8366.596364016663,
"GFlops": 748.6303700475851,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 8757.573729403726,
"GFlops": 783.6144325020367,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 8661.613670162333,
"GFlops": 775.0280717486198,
"L": 48,
"size_MB": 7776.0
},
{
"GBps": 8945.21348352366,
"GFlops": 800.4041534890073,
"L": 56,
"size_MB": 14406.0
},
{
"GBps": 9038.85010761881,
"GFlops": 808.7826167847682,
"L": 64,
"size_MB": 24576.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.34865458387881154,
"max": 431.36503067484665,
"mean": 375.751503006012
},
"time_usec": 187.125
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 1.7026849845201326,
"max": 502.23214285714283,
"mean": 486.6590531561462
},
"time_usec": 144.48
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.3305149138573203,
"max": 453.7374521988528,
"mean": 382.9904092897145
},
"time_usec": 619.61
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.07177810715329609,
"max": 519.2662746170679,
"mean": 516.7391150500289
},
"time_usec": 459.235
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.3505158151745815,
"max": 459.93458708094846,
"mean": 385.65037793737037
},
"time_usec": 1458.575
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.05229598091368635,
"max": 526.1927034611787,
"mean": 524.4192930361779
},
"time_usec": 1072.615
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 0.3292408794418583,
"max": 459.10272147931465,
"mean": 386.53290497928595
},
"time_usec": 2842.275
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 15.163663479257934,
"max": 549.0418853073463,
"mean": 505.73238896867923
},
"time_usec": 2172.36
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 0.5633684126271865,
"max": 461.79457552906837,
"mean": 388.1900261426014
},
"time_usec": 4890.485
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 0.028472934949022477,
"max": 529.5502092050209,
"mean": 528.4182003406927
},
"time_usec": 3592.68
}
],
"flops": {
"comparison_point_Gflops": 11493.800236285882,
"results": [
{
"Gflops_dwf4": 2246.9867854895488,
"Gflops_staggered": 84.85512852959255,
"Gflops_wilson": 237.77747062888736,
"L": 8
},
{
"Gflops_dwf4": 6598.469072470059,
"Gflops_staggered": 409.58058567021146,
"Gflops_wilson": 1130.4581186873809,
"L": 12
},
{
"Gflops_dwf4": 9063.438545342291,
"Gflops_staggered": 1129.8800379112695,
"Gflops_wilson": 2841.669376693767,
"L": 16
},
{
"Gflops_dwf4": 11850.835518731748,
"Gflops_staggered": 2651.024919468422,
"Gflops_wilson": 5889.787064860617,
"L": 24
},
{
"Gflops_dwf4": 11136.764953840015,
"Gflops_staggered": 4278.104069949589,
"Gflops_wilson": 7615.951579083893,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
8
],
"nodes": 1,
"ranks": 8
}
}

BIN
bin/Benchmark_Grid Executable file

Binary file not shown.

BIN
bin/Benchmark_IO Executable file

Binary file not shown.

0
bin/Icon Normal file
View File

BIN
bin/hello_jobstep Executable file

Binary file not shown.

BIN
bin/hello_mpi_omp Executable file

Binary file not shown.

34
bin/helper.sh Executable file
View File

@ -0,0 +1,34 @@
#!/bin/bash
export global_rank=${OMPI_COMM_WORLD_RANK}
export local_rank=${OMPI_COMM_WORLD_LOCAL_RANK}
export ranks_per_node=${OMPI_COMM_WORLD_LOCAL_SIZE}
if [ -z "${NUM_CPUS}" ]; then
let NUM_CPUS=96
fi
if [ -z "${RANK_STRIDE}" ]; then
let RANK_STRIDE=$(( ${NUM_CPUS}/${ranks_per_node} ))
fi
if [ -z "${OMP_STRIDE}" ]; then
let OMP_STRIDE=1
fi
if [ -z "${NUM_GPUS}" ]; then
let NUM_GPUS=8
fi
if [ -z "${GPU_START}" ]; then
let GPU_START=0
fi
if [ -z "${GPU_STRIDE}" ]; then
let GPU_STRIDE=1
fi
cpu_list=($(seq 0 95))
let cpus_per_gpu=${NUM_CPUS}/${NUM_GPUS}
let cpu_start_index=$(( ($RANK_STRIDE*${local_rank})+${GPU_START}*$cpus_per_gpu ))
let cpu_start=${cpu_list[$cpu_start_index]}
let cpu_stop=$(($cpu_start+$OMP_NUM_THREADS*$OMP_STRIDE-1))
gpu_list=(0 1 2 3 4 5 6 7)
let ranks_per_gpu=$(((${ranks_per_node}+${NUM_GPUS}-1)/${NUM_GPUS}))
let my_gpu_index=$(($local_rank*$GPU_STRIDE/$ranks_per_gpu))+${GPU_START}
let my_gpu=${gpu_list[${my_gpu_index}]}
export GOMP_CPU_AFFINITY=$cpu_start-$cpu_stop:$OMP_STRIDE
export ROCR_VISIBLE_DEVICES=$my_gpu
"$@"

12
bin/job.err Normal file
View File

@ -0,0 +1,12 @@
--------------------------------------------------------------------------
WARNING: There was an error initializing an OpenFabrics device.
Local host: ubb-r09-09
Local device: mlx5_0
--------------------------------------------------------------------------
--------------------------------------------------------------------------
WARNING: There was an error initializing an OpenFabrics device.
Local host: ubb-r09-09
Local device: mlx5_0
--------------------------------------------------------------------------

899
bin/job.out Normal file
View File

@ -0,0 +1,899 @@
[1680706877.725400] [ubb-r09-09:4143038:0] parser.c:1908 UCX WARN unused env variable: UCX_HOME (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)
MPI 000 - OMP 001 - HWT 001 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 000 - HWT 000 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 002 - HWT 002 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 003 - HWT 003 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 004 - HWT 004 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 005 - HWT 005 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 007 - HWT 007 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
MPI 000 - OMP 006 - HWT 006 - Node ubb-r09-09 - RT_GPU_ID 0 - GPU_ID 0 - Bus_ID 29
world_rank 0 has 1 devices
AcceleratorHipInit: ========================
AcceleratorHipInit: Device Number : 0
AcceleratorHipInit: ========================
AcceleratorHipInit: Device identifier: AMD Instinct MI250X/MI250
AcceleratorHipInit: totalGlobalMem: 68702699520
AcceleratorHipInit: isMultiGpuBoard: 0
AcceleratorHipInit: warpSize: 64
AcceleratorHipInit: using default device
AcceleratorHipInit: assume user or srun sets ROCR_VISIBLE_DEVICES and numa binding
AcceleratorHipInit: Configure options --enable-setdevice=no
local rank 0 device 0 bus id: 0000:29:00.0
AcceleratorHipInit: ================================================
[1680706879.831292] [ubb-r09-09:4143063:0] parser.c:1908 UCX WARN unused env variable: UCX_HOME (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning)
SharedMemoryMpi: World communicator of size 1
SharedMemoryMpi: Node communicator of size 1
0SharedMemoryMpi: SharedMemoryMPI.cc acceleratorAllocDevice 2147483648bytes at 0x7fc341800000 - 7fc3c17fffff for comms buffers
Setting up IPC
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|_ | | | | | | | | | | | | _|__
__|_ _|__
__|_ GGGG RRRR III DDDD _|__
__|_ G R R I D D _|__
__|_ G R R I D D _|__
__|_ G GG RRRR I D D _|__
__|_ G G R R I D D _|__
__|_ GGGG R R III DDDD _|__
__|_ _|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
__|__|__|__|__|__|__|__|__|__|__|__|__|__|__
| | | | | | | | | | | | | |
Copyright (C) 2015 Peter Boyle, Azusa Yamaguchi, Guido Cossu, Antonin Portelli and other authors
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Current Grid git commit hash=af64c1c6b6dd52109e4cc87e4977ad03f6426060: (HEAD -> develop, origin/develop, origin/HEAD) uncommited changes
Grid : Message : ================================================
Grid : Message : MPI is initialised and logging filters activated
Grid : Message : ================================================
Grid : Message : Requested 2147483648 byte stencil comms buffers
Grid : Message : MemoryManager Cache 54962159616 bytes
Grid : Message : MemoryManager::Init() setting up
Grid : Message : MemoryManager::Init() cache pool for recent host allocations: SMALL 8 LARGE 2 HUGE 0
Grid : Message : MemoryManager::Init() cache pool for recent device allocations: SMALL 16 LARGE 8 Huge 0
Grid : Message : MemoryManager::Init() cache pool for recent shared allocations: SMALL 16 LARGE 8 Huge 0
Grid : Message : MemoryManager::Init() Non unified: Caching accelerator data in dedicated memory
Grid : Message : MemoryManager::Init() Using hipMalloc
Grid : Message : 0.537200 s : ===============================================================================
Grid : Message : 0.538200 s : Grid Default Decomposition patterns
Grid : Message : 0.538400 s : ------------------------------------------
Grid : Message : 0.538700 s : * OpenMP threads : 1
Grid : Message : 0.538800 s : * MPI tasks : 1 1 1 1
Grid : Message : 0.539600 s : * vReal : 512bits ; 1 2 2 2
Grid : Message : 0.540000 s : * vRealF : 512bits ; 2 2 2 2
Grid : Message : 0.540300 s : * vRealD : 512bits ; 1 2 2 2
Grid : Message : 0.540600 s : * vComplex : 512bits ; 1 1 2 2
Grid : Message : 0.540800 s : * vComplexF : 512bits ; 1 2 2 2
Grid : Message : 0.541100 s : * vComplexD : 512bits ; 1 1 2 2
Grid : Message : 0.541300 s : * ranks : 1
Grid : Message : 0.541600 s : * nodes : 1
Grid : Message : 0.541700 s : * ranks/node : 1
Grid : Message : 0.542700 s : ===============================================================================
Grid : Message : 0.542900 s : Memory benchmark
Grid : Message : 0.543000 s : ===============================================================================
Grid : Message : 0.543300 s : Benchmarking a*x + y bandwidth
Grid : Message : 0.543400 s : ------------------------------------------
Grid : Message : 0.543800 s : L size (MB/node) time (usec) GB/s/node Gflop/s/node
Grid : Message : 2.104895 s : 8 0.75 20.44 35.83 3.21
Grid : Message : 2.501739 s : 16 12.00 30.61 382.89 34.26
Grid : Message : 2.790328 s : 24 60.75 70.84 837.41 74.93
Grid : Message : 3.134382 s : 32 192.00 179.48 1044.68 93.48
Grid : Message : 3.615671 s : 40 468.75 418.36 1094.18 97.91
Grid : Message : 4.312240 s : 48 972.00 848.60 1118.57 100.09
Grid : Message : 5.311103 s : 56 1800.75 1574.15 1117.14 99.96
Grid : Message : 6.743824 s : 64 3072.00 2653.93 1130.40 101.15
Grid : Message : 6.746637 s : ===============================================================================
Grid : Message : 6.746646 s : SU(4) benchmark
Grid : Message : 6.746647 s : ===============================================================================
Grid : Message : 6.746648 s : Benchmarking z = y*x SU(4) bandwidth
Grid : Message : 6.746649 s : ------------------------------------------
Grid : Message : 6.746651 s : L size (MB/node) time (usec) GB/s/node Gflop/s/node
Grid : Message : 7.700260 s : 8 1.50 22.02 66.53 89.30
Grid : Message : 7.939874 s : 16 24.00 42.19 555.56 745.66
Grid : Message : 8.237210 s : 24 121.50 159.86 742.22 996.20
Grid : Message : 8.662510 s : 32 384.00 453.48 826.94 1109.90
Grid : Message : 9.326550 s : 40 937.50 1087.04 842.22 1130.41
Grid : Message : 10.371306 s : 48 1944.00 2231.49 850.75 1141.86
Grid : Message : 10.373198 s : ===============================================================================
Grid : Message : 10.373205 s : Communications benchmark
Grid : Message : 10.373206 s : ===============================================================================
Grid : Message : 10.373209 s : Benchmarking threaded STENCIL halo exchange in 0 dimensions
Grid : Message : 10.373210 s : ------------------------------------------
Grid : Message : 10.373212 s : L dir payload (B) time (usec) rate (GB/s/node) std dev max
Grid : Message : 10.376833 s : ===============================================================================
Grid : Message : 10.376840 s : Wilson dslash 4D vectorised
Grid : Message : 10.376886 s : ===============================================================================
Grid : Message : 10.376889 s : Benchmark DWF on 8^4 local volume
Grid : Message : 10.376890 s : * Nc : 3
Grid : Message : 10.376891 s : * Global volume : 8 8 8 8
Grid : Message : 10.376902 s : * Ls : 1
Grid : Message : 10.376903 s : * ranks : 1
Grid : Message : 10.376906 s : * nodes : 1
Grid : Message : 10.376908 s : * ranks/node : 1
Grid : Message : 10.376909 s : * ranks geom : 1 1 1 1
Grid : Message : 10.376912 s : * Using 1 threads
Grid : Message : 10.376913 s : ===============================================================================
Grid : Message : 10.378509 s : Initialised RNGs
Grid : Message : 10.491571 s : ------------------------------------------
Grid : Message : 10.491577 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.491578 s : * Using sequential Comms/Compute
Grid : Message : 10.491579 s : * SINGLE precision
Grid : Message : 10.491582 s : ------------------------------------------
Grid : Message : 10.517414 s : Deo FlopsPerSite is 1344
Grid : Message : 10.517445 s : Deo Gflop/s = 89.3 (0.1) 58.6-94.9
Grid : Message : 10.517450 s : Deo Gflop/s per rank 89.3
Grid : Message : 10.517453 s : Deo Gflop/s per node 89.3
Grid : Message : 10.517455 s : ------------------------------------------
Grid : Message : 10.517457 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.517458 s : * Using Overlapped Comms/Compute
Grid : Message : 10.517461 s : * SINGLE precision
Grid : Message : 10.517463 s : ------------------------------------------
Grid : Message : 10.536046 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.536050 s : Deo Gflop/s = 75.9 (0.1) 56.2-78.6
Grid : Message : 10.536052 s : Deo Gflop/s per rank 75.9
Grid : Message : 10.536053 s : Deo Gflop/s per node 75.9
Grid : Message : 10.536054 s : ------------------------------------------
Grid : Message : 10.536055 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.536056 s : * Using sequential Comms/Compute
Grid : Message : 10.536057 s : * SINGLE precision
Grid : Message : 10.536057 s : ------------------------------------------
Grid : Message : 10.551674 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.551678 s : Deo Gflop/s = 90.1 (0.1) 81.0-94.9
Grid : Message : 10.551680 s : Deo Gflop/s per rank 90.1
Grid : Message : 10.551681 s : Deo Gflop/s per node 90.1
Grid : Message : 10.551682 s : ------------------------------------------
Grid : Message : 10.551683 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.551684 s : * Using Overlapped Comms/Compute
Grid : Message : 10.551685 s : * SINGLE precision
Grid : Message : 10.551686 s : ------------------------------------------
Grid : Message : 10.570207 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.570211 s : Deo Gflop/s = 75.9 (0.1) 72.4-78.6
Grid : Message : 10.570213 s : Deo Gflop/s per rank 75.9
Grid : Message : 10.570214 s : Deo Gflop/s per node 75.9
Grid : Message : 10.570215 s : ------------------------------------------
Grid : Message : 10.570216 s : 8^4 x 1 Deo Best Gflop/s = 90.1 ; 90.1 per node
Grid : Message : 10.570218 s : 8^4 x 1 Deo Worst Gflop/s = 75.9 ; 75.9 per node
Grid : Message : 10.570219 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 10.570220 s : 89.3 ; 75.9 ; 90.1 ; 75.9 ;
Grid : Message : 10.571167 s : ===============================================================================
Grid : Message : 10.571172 s : Benchmark DWF on 12^4 local volume
Grid : Message : 10.571173 s : * Nc : 3
Grid : Message : 10.571174 s : * Global volume : 12 12 12 12
Grid : Message : 10.571178 s : * Ls : 1
Grid : Message : 10.571179 s : * ranks : 1
Grid : Message : 10.571180 s : * nodes : 1
Grid : Message : 10.571181 s : * ranks/node : 1
Grid : Message : 10.571182 s : * ranks geom : 1 1 1 1
Grid : Message : 10.571183 s : * Using 1 threads
Grid : Message : 10.571184 s : ===============================================================================
Grid : Message : 10.576804 s : Initialised RNGs
Grid : Message : 10.843984 s : ------------------------------------------
Grid : Message : 10.843998 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.843999 s : * Using sequential Comms/Compute
Grid : Message : 10.844000 s : * SINGLE precision
Grid : Message : 10.844001 s : ------------------------------------------
Grid : Message : 10.862118 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.862131 s : Deo Gflop/s = 408.9 (0.5) 296.5-422.3
Grid : Message : 10.862134 s : Deo Gflop/s per rank 408.9
Grid : Message : 10.862135 s : Deo Gflop/s per node 408.9
Grid : Message : 10.862136 s : ------------------------------------------
Grid : Message : 10.862137 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.862138 s : * Using Overlapped Comms/Compute
Grid : Message : 10.862139 s : * SINGLE precision
Grid : Message : 10.862140 s : ------------------------------------------
Grid : Message : 10.884288 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.884292 s : Deo Gflop/s = 321.3 (0.3) 290.3-331.8
Grid : Message : 10.884294 s : Deo Gflop/s per rank 321.3
Grid : Message : 10.884295 s : Deo Gflop/s per node 321.3
Grid : Message : 10.884296 s : ------------------------------------------
Grid : Message : 10.884297 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.884298 s : * Using sequential Comms/Compute
Grid : Message : 10.884299 s : * SINGLE precision
Grid : Message : 10.884299 s : ------------------------------------------
Grid : Message : 10.901666 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.901669 s : Deo Gflop/s = 409.5 (0.4) 366.7-422.3
Grid : Message : 10.901671 s : Deo Gflop/s per rank 409.5
Grid : Message : 10.901672 s : Deo Gflop/s per node 409.5
Grid : Message : 10.901673 s : ------------------------------------------
Grid : Message : 10.901674 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 10.901675 s : * Using Overlapped Comms/Compute
Grid : Message : 10.901675 s : * SINGLE precision
Grid : Message : 10.901675 s : ------------------------------------------
Grid : Message : 10.923814 s : Deo FlopsPerSite is 1344.0
Grid : Message : 10.923817 s : Deo Gflop/s = 321.4 (0.3) 290.3-331.8
Grid : Message : 10.923819 s : Deo Gflop/s per rank 321.4
Grid : Message : 10.923820 s : Deo Gflop/s per node 321.4
Grid : Message : 10.923821 s : ------------------------------------------
Grid : Message : 10.923822 s : 12^4 x 1 Deo Best Gflop/s = 409.5 ; 409.5 per node
Grid : Message : 10.923824 s : 12^4 x 1 Deo Worst Gflop/s = 321.3 ; 321.3 per node
Grid : Message : 10.923826 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 10.923827 s : 408.9 ; 321.3 ; 409.5 ; 321.4 ;
Grid : Message : 10.926507 s : ===============================================================================
Grid : Message : 10.926512 s : Benchmark DWF on 16^4 local volume
Grid : Message : 10.926513 s : * Nc : 3
Grid : Message : 10.926514 s : * Global volume : 16 16 16 16
Grid : Message : 10.926522 s : * Ls : 1
Grid : Message : 10.926523 s : * ranks : 1
Grid : Message : 10.926524 s : * nodes : 1
Grid : Message : 10.926525 s : * ranks/node : 1
Grid : Message : 10.926526 s : * ranks geom : 1 1 1 1
Grid : Message : 10.926527 s : * Using 1 threads
Grid : Message : 10.926528 s : ===============================================================================
Grid : Message : 10.942650 s : Initialised RNGs
Grid : Message : 11.759317 s : ------------------------------------------
Grid : Message : 11.759335 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.759336 s : * Using sequential Comms/Compute
Grid : Message : 11.759337 s : * SINGLE precision
Grid : Message : 11.759338 s : ------------------------------------------
Grid : Message : 11.792221 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.792236 s : Deo Gflop/s = 725.8 (0.9) 478.7-746.4
Grid : Message : 11.792239 s : Deo Gflop/s per rank 725.8
Grid : Message : 11.792240 s : Deo Gflop/s per node 725.8
Grid : Message : 11.792241 s : ------------------------------------------
Grid : Message : 11.792242 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.792243 s : * Using Overlapped Comms/Compute
Grid : Message : 11.792244 s : * SINGLE precision
Grid : Message : 11.792245 s : ------------------------------------------
Grid : Message : 11.823738 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.823742 s : Deo Gflop/s = 713.9 (0.5) 667.3-734.0
Grid : Message : 11.823744 s : Deo Gflop/s per rank 713.9
Grid : Message : 11.823745 s : Deo Gflop/s per node 713.9
Grid : Message : 11.823746 s : ------------------------------------------
Grid : Message : 11.823747 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.823748 s : * Using sequential Comms/Compute
Grid : Message : 11.823749 s : * SINGLE precision
Grid : Message : 11.823750 s : ------------------------------------------
Grid : Message : 11.854663 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.854666 s : Deo Gflop/s = 727.3 (0.5) 677.5-746.4
Grid : Message : 11.854668 s : Deo Gflop/s per rank 727.3
Grid : Message : 11.854669 s : Deo Gflop/s per node 727.3
Grid : Message : 11.854670 s : ------------------------------------------
Grid : Message : 11.854671 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 11.854672 s : * Using Overlapped Comms/Compute
Grid : Message : 11.854673 s : * SINGLE precision
Grid : Message : 11.854674 s : ------------------------------------------
Grid : Message : 11.886128 s : Deo FlopsPerSite is 1344.0
Grid : Message : 11.886131 s : Deo Gflop/s = 714.5 (0.5) 667.3-746.4
Grid : Message : 11.886133 s : Deo Gflop/s per rank 714.5
Grid : Message : 11.886134 s : Deo Gflop/s per node 714.5
Grid : Message : 11.886135 s : ------------------------------------------
Grid : Message : 11.886136 s : 16^4 x 1 Deo Best Gflop/s = 727.3 ; 727.3 per node
Grid : Message : 11.886138 s : 16^4 x 1 Deo Worst Gflop/s = 713.9 ; 713.9 per node
Grid : Message : 11.886140 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 11.886141 s : 725.8 ; 713.9 ; 727.3 ; 714.5 ;
Grid : Message : 11.892130 s : ===============================================================================
Grid : Message : 11.892137 s : Benchmark DWF on 24^4 local volume
Grid : Message : 11.892138 s : * Nc : 3
Grid : Message : 11.892139 s : * Global volume : 24 24 24 24
Grid : Message : 11.892147 s : * Ls : 1
Grid : Message : 11.892148 s : * ranks : 1
Grid : Message : 11.892149 s : * nodes : 1
Grid : Message : 11.892150 s : * ranks/node : 1
Grid : Message : 11.892151 s : * ranks geom : 1 1 1 1
Grid : Message : 11.892152 s : * Using 1 threads
Grid : Message : 11.892153 s : ===============================================================================
Grid : Message : 11.978452 s : Initialised RNGs
Grid : Message : 16.753360 s : ------------------------------------------
Grid : Message : 16.753560 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.753570 s : * Using sequential Comms/Compute
Grid : Message : 16.753580 s : * SINGLE precision
Grid : Message : 16.753590 s : ------------------------------------------
Grid : Message : 16.178475 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.178489 s : Deo Gflop/s = 1204.9 (0.4) 1161.2-1225.0
Grid : Message : 16.178492 s : Deo Gflop/s per rank 1204.9
Grid : Message : 16.178493 s : Deo Gflop/s per node 1204.9
Grid : Message : 16.178494 s : ------------------------------------------
Grid : Message : 16.178495 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.178496 s : * Using Overlapped Comms/Compute
Grid : Message : 16.178497 s : * SINGLE precision
Grid : Message : 16.178498 s : ------------------------------------------
Grid : Message : 16.313480 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.313484 s : Deo Gflop/s = 842.5 (0.3) 816.7-864.2
Grid : Message : 16.313486 s : Deo Gflop/s per rank 842.5
Grid : Message : 16.313487 s : Deo Gflop/s per node 842.5
Grid : Message : 16.313488 s : ------------------------------------------
Grid : Message : 16.313489 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.313490 s : * Using sequential Comms/Compute
Grid : Message : 16.313491 s : * SINGLE precision
Grid : Message : 16.313491 s : ------------------------------------------
Grid : Message : 16.407847 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.407850 s : Deo Gflop/s = 1205.3 (0.4) 1167.3-1225.0
Grid : Message : 16.407852 s : Deo Gflop/s per rank 1205.3
Grid : Message : 16.407853 s : Deo Gflop/s per node 1205.3
Grid : Message : 16.407854 s : ------------------------------------------
Grid : Message : 16.407855 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 16.407856 s : * Using Overlapped Comms/Compute
Grid : Message : 16.407857 s : * SINGLE precision
Grid : Message : 16.407858 s : ------------------------------------------
Grid : Message : 16.543002 s : Deo FlopsPerSite is 1344.0
Grid : Message : 16.543005 s : Deo Gflop/s = 841.5 (0.3) 825.8-860.8
Grid : Message : 16.543007 s : Deo Gflop/s per rank 841.5
Grid : Message : 16.543008 s : Deo Gflop/s per node 841.5
Grid : Message : 16.543009 s : ------------------------------------------
Grid : Message : 16.543010 s : 24^4 x 1 Deo Best Gflop/s = 1205.3 ; 1205.3 per node
Grid : Message : 16.543012 s : 24^4 x 1 Deo Worst Gflop/s = 841.5 ; 841.5 per node
Grid : Message : 16.543013 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 16.543014 s : 1204.9 ; 842.5 ; 1205.3 ; 841.5 ;
Grid : Message : 16.575736 s : ===============================================================================
Grid : Message : 16.575752 s : Benchmark DWF on 32^4 local volume
Grid : Message : 16.575753 s : * Nc : 3
Grid : Message : 16.575754 s : * Global volume : 32 32 32 32
Grid : Message : 16.575767 s : * Ls : 1
Grid : Message : 16.575768 s : * ranks : 1
Grid : Message : 16.575769 s : * nodes : 1
Grid : Message : 16.575770 s : * ranks/node : 1
Grid : Message : 16.575771 s : * ranks geom : 1 1 1 1
Grid : Message : 16.575773 s : * Using 1 threads
Grid : Message : 16.575774 s : ===============================================================================
Grid : Message : 16.839329 s : Initialised RNGs
Grid : Message : 29.647706 s : ------------------------------------------
Grid : Message : 29.647727 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 29.647728 s : * Using sequential Comms/Compute
Grid : Message : 29.647729 s : * SINGLE precision
Grid : Message : 29.647730 s : ------------------------------------------
Grid : Message : 29.981303 s : Deo FlopsPerSite is 1344.0
Grid : Message : 29.981317 s : Deo Gflop/s = 1171.5 (0.4) 1142.0-1198.4
Grid : Message : 29.981320 s : Deo Gflop/s per rank 1171.5
Grid : Message : 29.981321 s : Deo Gflop/s per node 1171.5
Grid : Message : 29.981322 s : ------------------------------------------
Grid : Message : 29.981323 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 29.981324 s : * Using Overlapped Comms/Compute
Grid : Message : 29.981325 s : * SINGLE precision
Grid : Message : 29.981325 s : ------------------------------------------
Grid : Message : 30.405186 s : Deo FlopsPerSite is 1344.0
Grid : Message : 30.405194 s : Deo Gflop/s = 847.8 (0.2) 833.9-861.4
Grid : Message : 30.405196 s : Deo Gflop/s per rank 847.8
Grid : Message : 30.405197 s : Deo Gflop/s per node 847.8
Grid : Message : 30.405198 s : ------------------------------------------
Grid : Message : 30.405199 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 30.405200 s : * Using sequential Comms/Compute
Grid : Message : 30.405201 s : * SINGLE precision
Grid : Message : 30.405202 s : ------------------------------------------
Grid : Message : 30.711705 s : Deo FlopsPerSite is 1344.0
Grid : Message : 30.711710 s : Deo Gflop/s = 1172.6 (0.4) 1143.9-1200.4
Grid : Message : 30.711712 s : Deo Gflop/s per rank 1172.6
Grid : Message : 30.711713 s : Deo Gflop/s per node 1172.6
Grid : Message : 30.711714 s : ------------------------------------------
Grid : Message : 30.711715 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 30.711716 s : * Using Overlapped Comms/Compute
Grid : Message : 30.711717 s : * SINGLE precision
Grid : Message : 30.711717 s : ------------------------------------------
Grid : Message : 31.144787 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.144798 s : Deo Gflop/s = 829.5 (17.9) 72.9-862.5
Grid : Message : 31.144801 s : Deo Gflop/s per rank 829.5
Grid : Message : 31.144802 s : Deo Gflop/s per node 829.5
Grid : Message : 31.144803 s : ------------------------------------------
Grid : Message : 31.144804 s : 32^4 x 1 Deo Best Gflop/s = 1172.6 ; 1172.6 per node
Grid : Message : 31.144806 s : 32^4 x 1 Deo Worst Gflop/s = 829.5 ; 829.5 per node
Grid : Message : 31.144808 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 31.144809 s : 1171.5 ; 847.8 ; 1172.6 ; 829.5 ;
Grid : Message : 31.236007 s : ===============================================================================
Grid : Message : 31.236024 s : Domain wall dslash 4D vectorised
Grid : Message : 31.236205 s : ===============================================================================
Grid : Message : 31.236208 s : Benchmark DWF on 8^4 local volume
Grid : Message : 31.236210 s : * Nc : 3
Grid : Message : 31.236211 s : * Global volume : 8 8 8 8
Grid : Message : 31.236221 s : * Ls : 12
Grid : Message : 31.236222 s : * ranks : 1
Grid : Message : 31.236223 s : * nodes : 1
Grid : Message : 31.236224 s : * ranks/node : 1
Grid : Message : 31.236225 s : * ranks geom : 1 1 1 1
Grid : Message : 31.236226 s : * Using 1 threads
Grid : Message : 31.236227 s : ===============================================================================
Grid : Message : 31.242721 s : Initialised RNGs
Grid : Message : 31.459446 s : ------------------------------------------
Grid : Message : 31.459454 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.459455 s : * Using sequential Comms/Compute
Grid : Message : 31.459456 s : * SINGLE precision
Grid : Message : 31.459457 s : ------------------------------------------
Grid : Message : 31.479412 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.479425 s : Deo Gflop/s = 898.8 (0.9) 786.4-917.5
Grid : Message : 31.479428 s : Deo Gflop/s per rank 898.8
Grid : Message : 31.479429 s : Deo Gflop/s per node 898.8
Grid : Message : 31.479430 s : ------------------------------------------
Grid : Message : 31.479431 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.479432 s : * Using Overlapped Comms/Compute
Grid : Message : 31.479433 s : * SINGLE precision
Grid : Message : 31.479433 s : ------------------------------------------
Grid : Message : 31.503228 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.503233 s : Deo Gflop/s = 708.8 (0.6) 647.6-734.0
Grid : Message : 31.503235 s : Deo Gflop/s per rank 708.8
Grid : Message : 31.503236 s : Deo Gflop/s per node 708.8
Grid : Message : 31.503237 s : ------------------------------------------
Grid : Message : 31.503238 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.503238 s : * Using sequential Comms/Compute
Grid : Message : 31.503239 s : * SINGLE precision
Grid : Message : 31.503239 s : ------------------------------------------
Grid : Message : 31.521974 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.521977 s : Deo Gflop/s = 900.5 (0.9) 805.6-917.5
Grid : Message : 31.521979 s : Deo Gflop/s per rank 900.5
Grid : Message : 31.521980 s : Deo Gflop/s per node 900.5
Grid : Message : 31.521981 s : ------------------------------------------
Grid : Message : 31.521982 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 31.521983 s : * Using Overlapped Comms/Compute
Grid : Message : 31.521984 s : * SINGLE precision
Grid : Message : 31.521985 s : ------------------------------------------
Grid : Message : 31.545784 s : Deo FlopsPerSite is 1344.0
Grid : Message : 31.545787 s : Deo Gflop/s = 708.6 (0.6) 647.6-734.0
Grid : Message : 31.545789 s : Deo Gflop/s per rank 708.6
Grid : Message : 31.545790 s : Deo Gflop/s per node 708.6
Grid : Message : 31.545791 s : ------------------------------------------
Grid : Message : 31.545792 s : 8^4 x 12 Deo Best Gflop/s = 900.5 ; 900.5 per node
Grid : Message : 31.545794 s : 8^4 x 12 Deo Worst Gflop/s = 708.6 ; 708.6 per node
Grid : Message : 31.545796 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 31.545797 s : 898.8 ; 708.8 ; 900.5 ; 708.6 ;
Grid : Message : 31.548767 s : ===============================================================================
Grid : Message : 31.548772 s : Benchmark DWF on 12^4 local volume
Grid : Message : 31.548773 s : * Nc : 3
Grid : Message : 31.548774 s : * Global volume : 12 12 12 12
Grid : Message : 31.548780 s : * Ls : 12
Grid : Message : 31.548781 s : * ranks : 1
Grid : Message : 31.548782 s : * nodes : 1
Grid : Message : 31.548783 s : * ranks/node : 1
Grid : Message : 31.548784 s : * ranks geom : 1 1 1 1
Grid : Message : 31.548785 s : * Using 1 threads
Grid : Message : 31.548786 s : ===============================================================================
Grid : Message : 31.581443 s : Initialised RNGs
Grid : Message : 32.627696 s : ------------------------------------------
Grid : Message : 32.627712 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.627713 s : * Using sequential Comms/Compute
Grid : Message : 32.627714 s : * SINGLE precision
Grid : Message : 32.627715 s : ------------------------------------------
Grid : Message : 32.684713 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.684728 s : Deo Gflop/s = 1549.8 (0.6) 1479.8-1577.5
Grid : Message : 32.684731 s : Deo Gflop/s per rank 1549.8
Grid : Message : 32.684732 s : Deo Gflop/s per node 1549.8
Grid : Message : 32.684733 s : ------------------------------------------
Grid : Message : 32.684734 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.684735 s : * Using Overlapped Comms/Compute
Grid : Message : 32.684736 s : * SINGLE precision
Grid : Message : 32.684736 s : ------------------------------------------
Grid : Message : 32.741454 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.741459 s : Deo Gflop/s = 1504.6 (0.8) 1429.2-1534.1
Grid : Message : 32.741461 s : Deo Gflop/s per rank 1504.6
Grid : Message : 32.741462 s : Deo Gflop/s per node 1504.6
Grid : Message : 32.741463 s : ------------------------------------------
Grid : Message : 32.741464 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.741465 s : * Using sequential Comms/Compute
Grid : Message : 32.741466 s : * SINGLE precision
Grid : Message : 32.741467 s : ------------------------------------------
Grid : Message : 32.796496 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.796501 s : Deo Gflop/s = 1550.3 (0.6) 1466.8-1577.5
Grid : Message : 32.796503 s : Deo Gflop/s per rank 1550.3
Grid : Message : 32.796504 s : Deo Gflop/s per node 1550.3
Grid : Message : 32.796505 s : ------------------------------------------
Grid : Message : 32.796506 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 32.796507 s : * Using Overlapped Comms/Compute
Grid : Message : 32.796508 s : * SINGLE precision
Grid : Message : 32.796508 s : ------------------------------------------
Grid : Message : 32.853215 s : Deo FlopsPerSite is 1344.0
Grid : Message : 32.853220 s : Deo Gflop/s = 1504.4 (0.8) 1429.2-1548.3
Grid : Message : 32.853222 s : Deo Gflop/s per rank 1504.4
Grid : Message : 32.853223 s : Deo Gflop/s per node 1504.4
Grid : Message : 32.853224 s : ------------------------------------------
Grid : Message : 32.853225 s : 12^4 x 12 Deo Best Gflop/s = 1550.3 ; 1550.3 per node
Grid : Message : 32.853227 s : 12^4 x 12 Deo Worst Gflop/s = 1504.4 ; 1504.4 per node
Grid : Message : 32.853228 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 32.853229 s : 1549.8 ; 1504.6 ; 1550.3 ; 1504.4 ;
Grid : Message : 32.864215 s : ===============================================================================
Grid : Message : 32.864221 s : Benchmark DWF on 16^4 local volume
Grid : Message : 32.864222 s : * Nc : 3
Grid : Message : 32.864223 s : * Global volume : 16 16 16 16
Grid : Message : 32.864230 s : * Ls : 12
Grid : Message : 32.864231 s : * ranks : 1
Grid : Message : 32.864232 s : * nodes : 1
Grid : Message : 32.864233 s : * ranks/node : 1
Grid : Message : 32.864234 s : * ranks geom : 1 1 1 1
Grid : Message : 32.864235 s : * Using 1 threads
Grid : Message : 32.864236 s : ===============================================================================
Grid : Message : 32.970228 s : Initialised RNGs
Grid : Message : 36.263248 s : ------------------------------------------
Grid : Message : 36.263263 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.263264 s : * Using sequential Comms/Compute
Grid : Message : 36.263265 s : * SINGLE precision
Grid : Message : 36.263266 s : ------------------------------------------
Grid : Message : 36.426972 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.426985 s : Deo Gflop/s = 1699.0 (0.5) 1661.9-1727.1
Grid : Message : 36.426988 s : Deo Gflop/s per rank 1699.0
Grid : Message : 36.426989 s : Deo Gflop/s per node 1699.0
Grid : Message : 36.426990 s : ------------------------------------------
Grid : Message : 36.426991 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.426992 s : * Using Overlapped Comms/Compute
Grid : Message : 36.426993 s : * SINGLE precision
Grid : Message : 36.426994 s : ------------------------------------------
Grid : Message : 36.609302 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.609306 s : Deo Gflop/s = 1478.6 (0.5) 1440.0-1514.3
Grid : Message : 36.609308 s : Deo Gflop/s per rank 1478.6
Grid : Message : 36.609309 s : Deo Gflop/s per node 1478.6
Grid : Message : 36.609310 s : ------------------------------------------
Grid : Message : 36.609311 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.609312 s : * Using sequential Comms/Compute
Grid : Message : 36.609313 s : * SINGLE precision
Grid : Message : 36.609314 s : ------------------------------------------
Grid : Message : 36.767966 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.767972 s : Deo Gflop/s = 1699.2 (0.5) 1656.7-1732.7
Grid : Message : 36.767974 s : Deo Gflop/s per rank 1699.2
Grid : Message : 36.767975 s : Deo Gflop/s per node 1699.2
Grid : Message : 36.767976 s : ------------------------------------------
Grid : Message : 36.767977 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 36.767978 s : * Using Overlapped Comms/Compute
Grid : Message : 36.767978 s : * SINGLE precision
Grid : Message : 36.767978 s : ------------------------------------------
Grid : Message : 36.950363 s : Deo FlopsPerSite is 1344.0
Grid : Message : 36.950367 s : Deo Gflop/s = 1477.9 (0.5) 1443.9-1509.9
Grid : Message : 36.950369 s : Deo Gflop/s per rank 1477.9
Grid : Message : 36.950370 s : Deo Gflop/s per node 1477.9
Grid : Message : 36.950371 s : ------------------------------------------
Grid : Message : 36.950372 s : 16^4 x 12 Deo Best Gflop/s = 1699.2 ; 1699.2 per node
Grid : Message : 36.950374 s : 16^4 x 12 Deo Worst Gflop/s = 1477.9 ; 1477.9 per node
Grid : Message : 36.950376 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 36.950377 s : 1699.0 ; 1478.6 ; 1699.2 ; 1477.9 ;
Grid : Message : 36.988167 s : ===============================================================================
Grid : Message : 36.988172 s : Benchmark DWF on 24^4 local volume
Grid : Message : 36.988173 s : * Nc : 3
Grid : Message : 36.988174 s : * Global volume : 24 24 24 24
Grid : Message : 36.988182 s : * Ls : 12
Grid : Message : 36.988183 s : * ranks : 1
Grid : Message : 36.988184 s : * nodes : 1
Grid : Message : 36.988185 s : * ranks/node : 1
Grid : Message : 36.988188 s : * ranks geom : 1 1 1 1
Grid : Message : 36.988190 s : * Using 1 threads
Grid : Message : 36.988191 s : ===============================================================================
Grid : Message : 37.567321 s : Initialised RNGs
Grid : Message : 54.225331 s : ------------------------------------------
Grid : Message : 54.225352 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 54.225353 s : * Using sequential Comms/Compute
Grid : Message : 54.225354 s : * SINGLE precision
Grid : Message : 54.225355 s : ------------------------------------------
Grid : Message : 55.805700 s : Deo FlopsPerSite is 1344.0
Grid : Message : 55.807400 s : Deo Gflop/s = 1800.0 (0.2) 1788.4-1809.0
Grid : Message : 55.807700 s : Deo Gflop/s per rank 1800.0
Grid : Message : 55.807800 s : Deo Gflop/s per node 1800.0
Grid : Message : 55.807900 s : ------------------------------------------
Grid : Message : 55.808000 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 55.808100 s : * Using Overlapped Comms/Compute
Grid : Message : 55.808200 s : * SINGLE precision
Grid : Message : 55.808300 s : ------------------------------------------
Grid : Message : 55.989838 s : Deo FlopsPerSite is 1344.0
Grid : Message : 55.989846 s : Deo Gflop/s = 1389.9 (0.2) 1376.3-1403.0
Grid : Message : 55.989849 s : Deo Gflop/s per rank 1389.9
Grid : Message : 55.989850 s : Deo Gflop/s per node 1389.9
Grid : Message : 55.989851 s : ------------------------------------------
Grid : Message : 55.989852 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 55.989853 s : * Using sequential Comms/Compute
Grid : Message : 55.989854 s : * SINGLE precision
Grid : Message : 55.989854 s : ------------------------------------------
Grid : Message : 56.748218 s : Deo FlopsPerSite is 1344.0
Grid : Message : 56.748226 s : Deo Gflop/s = 1799.3 (0.2) 1786.0-1809.0
Grid : Message : 56.748228 s : Deo Gflop/s per rank 1799.3
Grid : Message : 56.748229 s : Deo Gflop/s per node 1799.3
Grid : Message : 56.748230 s : ------------------------------------------
Grid : Message : 56.748231 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 56.748232 s : * Using Overlapped Comms/Compute
Grid : Message : 56.748233 s : * SINGLE precision
Grid : Message : 56.748234 s : ------------------------------------------
Grid : Message : 57.729778 s : Deo FlopsPerSite is 1344.0
Grid : Message : 57.729790 s : Deo Gflop/s = 1390.2 (0.2) 1375.5-1403.7
Grid : Message : 57.729793 s : Deo Gflop/s per rank 1390.2
Grid : Message : 57.729794 s : Deo Gflop/s per node 1390.2
Grid : Message : 57.729795 s : ------------------------------------------
Grid : Message : 57.729796 s : 24^4 x 12 Deo Best Gflop/s = 1800.0 ; 1800.0 per node
Grid : Message : 57.729798 s : 24^4 x 12 Deo Worst Gflop/s = 1389.9 ; 1389.9 per node
Grid : Message : 57.729801 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 57.729802 s : 1800.0 ; 1389.9 ; 1799.3 ; 1390.2 ;
Grid : Message : 57.919179 s : ===============================================================================
Grid : Message : 57.919195 s : Benchmark DWF on 32^4 local volume
Grid : Message : 57.919197 s : * Nc : 3
Grid : Message : 57.919198 s : * Global volume : 32 32 32 32
Grid : Message : 57.919207 s : * Ls : 12
Grid : Message : 57.919208 s : * ranks : 1
Grid : Message : 57.919209 s : * nodes : 1
Grid : Message : 57.919210 s : * ranks/node : 1
Grid : Message : 57.919211 s : * ranks geom : 1 1 1 1
Grid : Message : 57.919212 s : * Using 1 threads
Grid : Message : 57.919213 s : ===============================================================================
Grid : Message : 59.798510 s : Initialised RNGs
Grid : Message : 112.360179 s : ------------------------------------------
Grid : Message : 112.360198 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 112.360199 s : * Using sequential Comms/Compute
Grid : Message : 112.360200 s : * SINGLE precision
Grid : Message : 112.360201 s : ------------------------------------------
Grid : Message : 115.191800 s : Deo FlopsPerSite is 1344.0
Grid : Message : 115.192040 s : Deo Gflop/s = 1670.1 (0.3) 1650.5-1685.1
Grid : Message : 115.192080 s : Deo Gflop/s per rank 1670.1
Grid : Message : 115.192090 s : Deo Gflop/s per node 1670.1
Grid : Message : 115.192100 s : ------------------------------------------
Grid : Message : 115.192110 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 115.192120 s : * Using Overlapped Comms/Compute
Grid : Message : 115.192130 s : * SINGLE precision
Grid : Message : 115.192140 s : ------------------------------------------
Grid : Message : 117.996882 s : Deo FlopsPerSite is 1344.0
Grid : Message : 117.996897 s : Deo Gflop/s = 1448.3 (0.1) 1440.3-1455.6
Grid : Message : 117.996900 s : Deo Gflop/s per rank 1448.3
Grid : Message : 117.996901 s : Deo Gflop/s per node 1448.3
Grid : Message : 117.996902 s : ------------------------------------------
Grid : Message : 117.996903 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 117.996904 s : * Using sequential Comms/Compute
Grid : Message : 117.996905 s : * SINGLE precision
Grid : Message : 117.996906 s : ------------------------------------------
Grid : Message : 120.578643 s : Deo FlopsPerSite is 1344.0
Grid : Message : 120.578657 s : Deo Gflop/s = 1670.3 (0.3) 1635.5-1685.7
Grid : Message : 120.578660 s : Deo Gflop/s per rank 1670.3
Grid : Message : 120.578661 s : Deo Gflop/s per node 1670.3
Grid : Message : 120.578663 s : ------------------------------------------
Grid : Message : 120.578664 s : * Using GENERIC Nc WilsonKernels
Grid : Message : 120.578665 s : * Using Overlapped Comms/Compute
Grid : Message : 120.578666 s : * SINGLE precision
Grid : Message : 120.578667 s : ------------------------------------------
Grid : Message : 123.556314 s : Deo FlopsPerSite is 1344.0
Grid : Message : 123.556328 s : Deo Gflop/s = 1448.3 (0.1) 1440.7-1455.6
Grid : Message : 123.556331 s : Deo Gflop/s per rank 1448.3
Grid : Message : 123.556332 s : Deo Gflop/s per node 1448.3
Grid : Message : 123.556333 s : ------------------------------------------
Grid : Message : 123.556334 s : 32^4 x 12 Deo Best Gflop/s = 1670.3 ; 1670.3 per node
Grid : Message : 123.556336 s : 32^4 x 12 Deo Worst Gflop/s = 1448.3 ; 1448.3 per node
Grid : Message : 123.556338 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 123.556339 s : 1670.1 ; 1448.3 ; 1670.3 ; 1448.3 ;
Grid : Message : 124.246882 s : ===============================================================================
Grid : Message : 124.246903 s : Improved Staggered dslash 4D vectorised
Grid : Message : 124.247016 s : ===============================================================================
Grid : Message : 124.247019 s : Benchmark ImprovedStaggered on 8^4 local volume
Grid : Message : 124.247020 s : * Global volume : 8 8 8 8
Grid : Message : 124.247033 s : * ranks : 1
Grid : Message : 124.247034 s : * nodes : 1
Grid : Message : 124.247035 s : * ranks/node : 1
Grid : Message : 124.247039 s : * ranks geom : 1 1 1 1
Grid : Message : 124.247043 s : * Using 1 threads
Grid : Message : 124.247044 s : ===============================================================================
Grid : Message : 124.247555 s : Initialised RNGs
Grid : Message : 124.319977 s : ------------------------------------------
Grid : Message : 124.319984 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.319985 s : * Using sequential Comms/Compute
Grid : Message : 124.319986 s : * SINGLE precision
Grid : Message : 124.319987 s : ------------------------------------------
Grid : Message : 124.344334 s : Deo Gflop/s = 67.3 (0.1) 51.0-71.1
Grid : Message : 124.344347 s : Deo Gflop/s per rank 67.3
Grid : Message : 124.344349 s : Deo Gflop/s per node 67.3
Grid : Message : 124.344350 s : ------------------------------------------
Grid : Message : 124.344351 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.344352 s : * Using Overlapped Comms/Compute
Grid : Message : 124.344353 s : * SINGLE precision
Grid : Message : 124.344354 s : ------------------------------------------
Grid : Message : 124.375542 s : Deo Gflop/s = 38.5 (0.0) 33.5-39.8
Grid : Message : 124.375547 s : Deo Gflop/s per rank 38.5
Grid : Message : 124.375548 s : Deo Gflop/s per node 38.5
Grid : Message : 124.375549 s : ------------------------------------------
Grid : Message : 124.375550 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.375551 s : * Using sequential Comms/Compute
Grid : Message : 124.375552 s : * SINGLE precision
Grid : Message : 124.375552 s : ------------------------------------------
Grid : Message : 124.393356 s : Deo Gflop/s = 67.4 (0.1) 63.4-71.1
Grid : Message : 124.393361 s : Deo Gflop/s per rank 67.4
Grid : Message : 124.393362 s : Deo Gflop/s per node 67.4
Grid : Message : 124.393363 s : ------------------------------------------
Grid : Message : 124.393364 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.393365 s : * Using Overlapped Comms/Compute
Grid : Message : 124.393366 s : * SINGLE precision
Grid : Message : 124.393366 s : ------------------------------------------
Grid : Message : 124.424488 s : Deo Gflop/s = 38.5 (0.0) 36.1-39.8
Grid : Message : 124.424493 s : Deo Gflop/s per rank 38.5
Grid : Message : 124.424494 s : Deo Gflop/s per node 38.5
Grid : Message : 124.424495 s : ------------------------------------------
Grid : Message : 124.424496 s : 8^4 Deo Best Gflop/s = 67.4 ; 67.4 per node
Grid : Message : 124.424498 s : 8^4 Deo Worst Gflop/s = 38.5 ; 38.5 per node
Grid : Message : 124.424500 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 124.424501 s : 67.3 ; 38.5 ; 67.4 ; 38.5 ;
Grid : Message : 124.425032 s : ===============================================================================
Grid : Message : 124.425036 s : Benchmark ImprovedStaggered on 12^4 local volume
Grid : Message : 124.425037 s : * Global volume : 12 12 12 12
Grid : Message : 124.425040 s : * ranks : 1
Grid : Message : 124.425041 s : * nodes : 1
Grid : Message : 124.425042 s : * ranks/node : 1
Grid : Message : 124.425043 s : * ranks geom : 1 1 1 1
Grid : Message : 124.425044 s : * Using 1 threads
Grid : Message : 124.425045 s : ===============================================================================
Grid : Message : 124.427370 s : Initialised RNGs
Grid : Message : 124.662237 s : ------------------------------------------
Grid : Message : 124.662245 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.662246 s : * Using sequential Comms/Compute
Grid : Message : 124.662247 s : * SINGLE precision
Grid : Message : 124.662248 s : ------------------------------------------
Grid : Message : 124.682724 s : Deo Gflop/s = 315.4 (0.3) 289.8-321.1
Grid : Message : 124.682733 s : Deo Gflop/s per rank 315.4
Grid : Message : 124.682734 s : Deo Gflop/s per node 315.4
Grid : Message : 124.682735 s : ------------------------------------------
Grid : Message : 124.682736 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.682737 s : * Using Overlapped Comms/Compute
Grid : Message : 124.682738 s : * SINGLE precision
Grid : Message : 124.682738 s : ------------------------------------------
Grid : Message : 124.717283 s : Deo Gflop/s = 175.7 (0.1) 156.3-180.0
Grid : Message : 124.717288 s : Deo Gflop/s per rank 175.7
Grid : Message : 124.717289 s : Deo Gflop/s per node 175.7
Grid : Message : 124.717290 s : ------------------------------------------
Grid : Message : 124.717291 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.717292 s : * Using sequential Comms/Compute
Grid : Message : 124.717293 s : * SINGLE precision
Grid : Message : 124.717293 s : ------------------------------------------
Grid : Message : 124.736518 s : Deo Gflop/s = 315.7 (0.4) 289.8-330.0
Grid : Message : 124.736522 s : Deo Gflop/s per rank 315.7
Grid : Message : 124.736523 s : Deo Gflop/s per node 315.7
Grid : Message : 124.736524 s : ------------------------------------------
Grid : Message : 124.736525 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 124.736526 s : * Using Overlapped Comms/Compute
Grid : Message : 124.736527 s : * SINGLE precision
Grid : Message : 124.736527 s : ------------------------------------------
Grid : Message : 124.771049 s : Deo Gflop/s = 175.7 (0.1) 160.6-180.0
Grid : Message : 124.771055 s : Deo Gflop/s per rank 175.7
Grid : Message : 124.771056 s : Deo Gflop/s per node 175.7
Grid : Message : 124.771057 s : ------------------------------------------
Grid : Message : 124.771058 s : 12^4 Deo Best Gflop/s = 315.7 ; 315.7 per node
Grid : Message : 124.771060 s : 12^4 Deo Worst Gflop/s = 175.7 ; 175.7 per node
Grid : Message : 124.771061 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 124.771062 s : 315.4 ; 175.7 ; 315.7 ; 175.7 ;
Grid : Message : 124.772087 s : ===============================================================================
Grid : Message : 124.772091 s : Benchmark ImprovedStaggered on 16^4 local volume
Grid : Message : 124.772092 s : * Global volume : 16 16 16 16
Grid : Message : 124.772097 s : * ranks : 1
Grid : Message : 124.772098 s : * nodes : 1
Grid : Message : 124.772099 s : * ranks/node : 1
Grid : Message : 124.772100 s : * ranks geom : 1 1 1 1
Grid : Message : 124.772101 s : * Using 1 threads
Grid : Message : 124.772102 s : ===============================================================================
Grid : Message : 124.779417 s : Initialised RNGs
Grid : Message : 125.477612 s : ------------------------------------------
Grid : Message : 125.477627 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.477628 s : * Using sequential Comms/Compute
Grid : Message : 125.477629 s : * SINGLE precision
Grid : Message : 125.477630 s : ------------------------------------------
Grid : Message : 125.519431 s : Deo Gflop/s = 498.8 (0.3) 469.4-507.5
Grid : Message : 125.519447 s : Deo Gflop/s per rank 498.8
Grid : Message : 125.519448 s : Deo Gflop/s per node 498.8
Grid : Message : 125.519449 s : ------------------------------------------
Grid : Message : 125.519450 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.519451 s : * Using Overlapped Comms/Compute
Grid : Message : 125.519452 s : * SINGLE precision
Grid : Message : 125.519453 s : ------------------------------------------
Grid : Message : 125.563771 s : Deo Gflop/s = 432.5 (0.4) 399.5-441.8
Grid : Message : 125.563776 s : Deo Gflop/s per rank 432.5
Grid : Message : 125.563777 s : Deo Gflop/s per node 432.5
Grid : Message : 125.563778 s : ------------------------------------------
Grid : Message : 125.563779 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.563780 s : * Using sequential Comms/Compute
Grid : Message : 125.563781 s : * SINGLE precision
Grid : Message : 125.563781 s : ------------------------------------------
Grid : Message : 125.602189 s : Deo Gflop/s = 499.0 (0.3) 475.3-507.5
Grid : Message : 125.602193 s : Deo Gflop/s per rank 499.0
Grid : Message : 125.602194 s : Deo Gflop/s per node 499.0
Grid : Message : 125.602195 s : ------------------------------------------
Grid : Message : 125.602196 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 125.602197 s : * Using Overlapped Comms/Compute
Grid : Message : 125.602198 s : * SINGLE precision
Grid : Message : 125.602198 s : ------------------------------------------
Grid : Message : 125.646629 s : Deo Gflop/s = 431.3 (0.3) 399.5-441.8
Grid : Message : 125.646634 s : Deo Gflop/s per rank 431.3
Grid : Message : 125.646635 s : Deo Gflop/s per node 431.3
Grid : Message : 125.646636 s : ------------------------------------------
Grid : Message : 125.646637 s : 16^4 Deo Best Gflop/s = 499.0 ; 499.0 per node
Grid : Message : 125.646639 s : 16^4 Deo Worst Gflop/s = 431.3 ; 431.3 per node
Grid : Message : 125.646640 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 125.646641 s : 498.8 ; 432.5 ; 499.0 ; 431.3 ;
Grid : Message : 125.649458 s : ===============================================================================
Grid : Message : 125.649463 s : Benchmark ImprovedStaggered on 24^4 local volume
Grid : Message : 125.649464 s : * Global volume : 24 24 24 24
Grid : Message : 125.649471 s : * ranks : 1
Grid : Message : 125.649472 s : * nodes : 1
Grid : Message : 125.649473 s : * ranks/node : 1
Grid : Message : 125.649474 s : * ranks geom : 1 1 1 1
Grid : Message : 125.649475 s : * Using 1 threads
Grid : Message : 125.649478 s : ===============================================================================
Grid : Message : 125.686019 s : Initialised RNGs
Grid : Message : 129.152249 s : ------------------------------------------
Grid : Message : 129.152265 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.152266 s : * Using sequential Comms/Compute
Grid : Message : 129.152267 s : * SINGLE precision
Grid : Message : 129.152268 s : ------------------------------------------
Grid : Message : 129.300262 s : Deo Gflop/s = 747.3 (0.2) 731.2-763.5
Grid : Message : 129.300278 s : Deo Gflop/s per rank 747.3
Grid : Message : 129.300279 s : Deo Gflop/s per node 747.3
Grid : Message : 129.300280 s : ------------------------------------------
Grid : Message : 129.300281 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.300282 s : * Using Overlapped Comms/Compute
Grid : Message : 129.300283 s : * SINGLE precision
Grid : Message : 129.300284 s : ------------------------------------------
Grid : Message : 129.447275 s : Deo Gflop/s = 659.7 (0.2) 646.6-671.8
Grid : Message : 129.447281 s : Deo Gflop/s per rank 659.7
Grid : Message : 129.447282 s : Deo Gflop/s per node 659.7
Grid : Message : 129.447283 s : ------------------------------------------
Grid : Message : 129.447284 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.447285 s : * Using sequential Comms/Compute
Grid : Message : 129.447286 s : * SINGLE precision
Grid : Message : 129.447287 s : ------------------------------------------
Grid : Message : 129.577059 s : Deo Gflop/s = 747.3 (0.2) 731.2-763.5
Grid : Message : 129.577065 s : Deo Gflop/s per rank 747.3
Grid : Message : 129.577066 s : Deo Gflop/s per node 747.3
Grid : Message : 129.577067 s : ------------------------------------------
Grid : Message : 129.577068 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 129.577069 s : * Using Overlapped Comms/Compute
Grid : Message : 129.577070 s : * SINGLE precision
Grid : Message : 129.577070 s : ------------------------------------------
Grid : Message : 129.724027 s : Deo Gflop/s = 659.9 (0.2) 644.4-674.1
Grid : Message : 129.724032 s : Deo Gflop/s per rank 659.9
Grid : Message : 129.724033 s : Deo Gflop/s per node 659.9
Grid : Message : 129.724034 s : ------------------------------------------
Grid : Message : 129.724035 s : 24^4 Deo Best Gflop/s = 747.3 ; 747.3 per node
Grid : Message : 129.724037 s : 24^4 Deo Worst Gflop/s = 659.7 ; 659.7 per node
Grid : Message : 129.724038 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 129.724039 s : 747.3 ; 659.7 ; 747.3 ; 659.9 ;
Grid : Message : 129.737401 s : ===============================================================================
Grid : Message : 129.737409 s : Benchmark ImprovedStaggered on 32^4 local volume
Grid : Message : 129.737410 s : * Global volume : 32 32 32 32
Grid : Message : 129.737418 s : * ranks : 1
Grid : Message : 129.737419 s : * nodes : 1
Grid : Message : 129.737420 s : * ranks/node : 1
Grid : Message : 129.737421 s : * ranks geom : 1 1 1 1
Grid : Message : 129.737422 s : * Using 1 threads
Grid : Message : 129.737423 s : ===============================================================================
Grid : Message : 129.853540 s : Initialised RNGs
Grid : Message : 140.878355 s : ------------------------------------------
Grid : Message : 140.878368 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 140.878369 s : * Using sequential Comms/Compute
Grid : Message : 140.878370 s : * SINGLE precision
Grid : Message : 140.878371 s : ------------------------------------------
Grid : Message : 141.304260 s : Deo Gflop/s = 809.5 (0.2) 797.9-823.1
Grid : Message : 141.304277 s : Deo Gflop/s per rank 809.5
Grid : Message : 141.304278 s : Deo Gflop/s per node 809.5
Grid : Message : 141.304279 s : ------------------------------------------
Grid : Message : 141.304280 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 141.304281 s : * Using Overlapped Comms/Compute
Grid : Message : 141.304282 s : * SINGLE precision
Grid : Message : 141.304283 s : ------------------------------------------
Grid : Message : 141.724304 s : Deo Gflop/s = 729.6 (0.1) 721.3-733.6
Grid : Message : 141.724313 s : Deo Gflop/s per rank 729.6
Grid : Message : 141.724314 s : Deo Gflop/s per node 729.6
Grid : Message : 141.724315 s : ------------------------------------------
Grid : Message : 141.724316 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 141.724317 s : * Using sequential Comms/Compute
Grid : Message : 141.724318 s : * SINGLE precision
Grid : Message : 141.724319 s : ------------------------------------------
Grid : Message : 142.103287 s : Deo Gflop/s = 808.6 (0.2) 796.9-819.7
Grid : Message : 142.103294 s : Deo Gflop/s per rank 808.6
Grid : Message : 142.103295 s : Deo Gflop/s per node 808.6
Grid : Message : 142.103296 s : ------------------------------------------
Grid : Message : 142.103297 s : * Using GENERIC Nc StaggeredKernels
Grid : Message : 142.103298 s : * Using Overlapped Comms/Compute
Grid : Message : 142.103299 s : * SINGLE precision
Grid : Message : 142.103300 s : ------------------------------------------
Grid : Message : 142.523689 s : Deo Gflop/s = 729.0 (0.1) 721.3-733.6
Grid : Message : 142.523695 s : Deo Gflop/s per rank 729.0
Grid : Message : 142.523696 s : Deo Gflop/s per node 729.0
Grid : Message : 142.523697 s : ------------------------------------------
Grid : Message : 142.523698 s : 32^4 Deo Best Gflop/s = 809.5 ; 809.5 per node
Grid : Message : 142.523700 s : 32^4 Deo Worst Gflop/s = 729.0 ; 729.0 per node
Grid : Message : 142.523702 s : G/S/C ; G/O/C ; G/S/S ; G/O/S
Grid : Message : 142.523703 s : 809.5 ; 729.6 ; 808.6 ; 729.0 ;
Grid : Message : 142.574389 s : ===============================================================================
Grid : Message : 142.574401 s : Gflop/s/node Summary table Ls=12
Grid : Message : 142.574402 s : ===============================================================================
Grid : Message : 142.574406 s : L Wilson DWF Staggered
Grid : Message : 142.574411 s : 8 90.09 900.49 67.41
Grid : Message : 142.574422 s : 12 409.48 1550.33 315.72
Grid : Message : 142.574425 s : 16 727.26 1699.19 499.04
Grid : Message : 142.574434 s : 24 1205.32 1799.95 747.35
Grid : Message : 142.574438 s : 32 1172.60 1670.31 809.49
Grid : Message : 142.574440 s : ===============================================================================
Grid : Message : 142.574442 s : Comparison point result: 1735.1 Gflop/s per node
Grid : Message : 142.574448 s : Comparison point is 0.5*(1670.3+1800.0)
Grid : Message : 142.574450 s : ===============================================================================
Grid : Message : 142.574458 s : writing benchmark results to 1GPU_1_1_1_1_test_05_04_23.json

18
bin/run.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash -l
#SBATCH -p 1CN96C8G1H_MI250_Ubuntu22
##SBATCH -p 1CN_MI250_Hackathon_Ubuntu22
#SBATCH -N 1
#SBATCH --cpus-per-task=12
#SBATCH --gres=gpu:1
#SBATCH --ntasks-per-node=1
#SBATCH --time=00:10:00
#SBATCH -e job.err
#SBATCH -o job.out
module load rocm@5.4.3
spack load gmp mpfr openmpi fftw hdf5 c-lime
OMP_NUM_THREADS=8 mpirun -np 1 --bind-to none ./helper.sh ./hello_jobstep
OMP_NUM_THREADS=8 mpirun -np 1 --bind-to none ./helper.sh omnitrace ./Benchmark_Grid --accelerator-threads 8 --mpi 1.1.1.1 --shm 2048 --comms-overlap --comms-concurrent --shm-mpi 0 --json-out 1GPU_1_1_1_1_test_05_04_23.json

1
bin/slurm-36500.out Normal file
View File

@ -0,0 +1 @@
/var/spool/slurm-llnl/job36500/slurm_script: line 11: mpirun: command not found

247
bin/test_05_04_23.json Normal file
View File

@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 72.95795546516696,
"GFlops": 97.92251022059891,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 582.8988697806803,
"GFlops": 782.3536195573076,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 831.1497694944638,
"GFlops": 1115.5503368927066,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 871.4805098225537,
"GFlops": 1169.6813402466482,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 856.24886270296,
"GFlops": 1149.2377695457521,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 848.7965671453275,
"GFlops": 1139.235467764453,
"L": 48,
"size_MB": 3888.0
}
],
"axpy": [
{
"GBps": 36.664104253158314,
"GFlops": 3.2806485146760305,
"L": 8,
"size_MB": 1.5
},
{
"GBps": 475.5541394610069,
"GFlops": 42.55186409296766,
"L": 16,
"size_MB": 24.0
},
{
"GBps": 1062.0541626046584,
"GFlops": 95.03099781182652,
"L": 24,
"size_MB": 121.5
},
{
"GBps": 1091.2166155916666,
"GFlops": 97.6404099337086,
"L": 32,
"size_MB": 384.0
},
{
"GBps": 1083.0714951643492,
"GFlops": 96.91159689501464,
"L": 40,
"size_MB": 937.5
},
{
"GBps": 1091.7986736212977,
"GFlops": 97.69249160457608,
"L": 48,
"size_MB": 1944.0
},
{
"GBps": 1116.5673848828242,
"GFlops": 99.90875837191614,
"L": 56,
"size_MB": 3601.5
},
{
"GBps": 1102.0528489443711,
"GFlops": 98.61001968082712,
"L": 64,
"size_MB": 6144.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 0.5819955845789178,
"max": 319.60227272727275,
"mean": 299.4569846678024
},
"time_usec": 58.7
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 0.6045177667005093,
"max": 313.8950892857143,
"mean": 300.7120862201694
},
"time_usec": 58.455
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 0.3219677699052459,
"max": 624.4860197368421,
"mean": 617.4985362997658
},
"time_usec": 96.075
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 0.3217013534900953,
"max": 624.4860197368422,
"mean": 616.3117792956576
},
"time_usec": 96.26
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 0.16708382399125116,
"max": 803.5714285714286,
"mean": 800.4610655737705
},
"time_usec": 175.68
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 0.16094436467098314,
"max": 803.5714285714286,
"mean": 800.7801378053641
},
"time_usec": 175.61
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 3.6932378948517726,
"max": 1060.4563827220077,
"mean": 999.66588944495
},
"time_usec": 274.75
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 0.7732919743918085,
"max": 1060.456382722008,
"mean": 1033.1905246675572
},
"time_usec": 265.835
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 3.4290882620704632,
"max": 1103.7427325581398,
"mean": 1019.9634122754235
},
"time_usec": 465.32
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 1.7805341746498673,
"max": 1103.7427325581393,
"mean": 1052.9793335256138
},
"time_usec": 450.73
}
],
"flops": {
"comparison_point_Gflops": 1611.577549655192,
"results": [
{
"Gflops_dwf4": 323.4349167180752,
"Gflops_staggered": 12.066444909437706,
"Gflops_wilson": 31.980666449783893,
"L": 8
},
{
"Gflops_dwf4": 865.5026086956523,
"Gflops_staggered": 56.31257464596485,
"Gflops_wilson": 154.20341946550104,
"L": 12
},
{
"Gflops_dwf4": 1274.4032988497431,
"Gflops_staggered": 151.36310980156634,
"Gflops_wilson": 406.90922194195747,
"L": 16
},
{
"Gflops_dwf4": 1635.941519395089,
"Gflops_staggered": 410.01962234851345,
"Gflops_wilson": 832.8886796893387,
"L": 24
},
{
"Gflops_dwf4": 1587.2135799152945,
"Gflops_staggered": 601.6161489936918,
"Gflops_wilson": 1019.3722000081013,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
2
],
"nodes": 1,
"ranks": 2
}
}

247
bin/test_result.json Normal file
View File

@ -0,0 +1,247 @@
{
"SU4": [
{
"GBps": 82.16994221079263,
"GFlops": 110.28662953423883,
"L": 8,
"size_MB": 6.0
},
{
"GBps": 583.033802716333,
"GFlops": 782.5347234778643,
"L": 16,
"size_MB": 96.0
},
{
"GBps": 686.9200472555763,
"GFlops": 921.9684806029609,
"L": 24,
"size_MB": 486.0
},
{
"GBps": 721.9096917712991,
"GFlops": 968.9307865072406,
"L": 32,
"size_MB": 1536.0
},
{
"GBps": 708.3069986388066,
"GFlops": 950.6735608379971,
"L": 40,
"size_MB": 3750.0
},
{
"GBps": 725.7398604285772,
"GFlops": 974.0715518576075,
"L": 48,
"size_MB": 7776.0
}
],
"axpy": [
{
"GBps": 41.59376781166453,
"GFlops": 3.7217473430940964,
"L": 8,
"size_MB": 3.0
},
{
"GBps": 413.54098604712715,
"GFlops": 37.00302105475007,
"L": 16,
"size_MB": 48.0
},
{
"GBps": 736.025296235133,
"GFlops": 65.85842867413767,
"L": 24,
"size_MB": 243.0
},
{
"GBps": 845.301987657182,
"GFlops": 75.63634150482068,
"L": 32,
"size_MB": 768.0
},
{
"GBps": 915.650883948628,
"GFlops": 81.93105418985103,
"L": 40,
"size_MB": 1875.0
},
{
"GBps": 918.5294036587682,
"GFlops": 82.18861977351649,
"L": 48,
"size_MB": 3888.0
},
{
"GBps": 915.8908049272835,
"GFlops": 81.9525219556208,
"L": 56,
"size_MB": 7203.0
},
{
"GBps": 933.3426149071149,
"GFlops": 83.51408347894127,
"L": 64,
"size_MB": 12288.0
}
],
"comms": [
{
"L": 16,
"bytes": 4718592,
"dir": 3,
"rate_GBps": {
"error": 4.118706014626012,
"max": 434.0277777777777,
"mean": 284.4817122511733
},
"time_usec": 123.58
},
{
"L": 16,
"bytes": 4718592,
"dir": 7,
"rate_GBps": {
"error": 4.1521797501551525,
"max": 434.02777777777777,
"mean": 284.9197665937272
},
"time_usec": 123.39
},
{
"L": 24,
"bytes": 15925248,
"dir": 3,
"rate_GBps": {
"error": 5.296617827547019,
"max": 885.4652518656717,
"mean": 650.613279322257
},
"time_usec": 182.37
},
{
"L": 24,
"bytes": 15925248,
"dir": 7,
"rate_GBps": {
"error": 5.353297810533095,
"max": 885.4652518656717,
"mean": 652.2943581638262
},
"time_usec": 181.9
},
{
"L": 32,
"bytes": 37748736,
"dir": 3,
"rate_GBps": {
"error": 4.726418547916897,
"max": 1019.021739130435,
"mean": 843.5180325410513
},
"time_usec": 333.425
},
{
"L": 32,
"bytes": 37748736,
"dir": 7,
"rate_GBps": {
"error": 6.106965199705783,
"max": 1041.6666666666667,
"mean": 865.5044544629257
},
"time_usec": 324.955
},
{
"L": 40,
"bytes": 73728000,
"dir": 3,
"rate_GBps": {
"error": 3.5355426867815396,
"max": 1081.3315083661416,
"mean": 974.9938432388778
},
"time_usec": 563.405
},
{
"L": 40,
"bytes": 73728000,
"dir": 7,
"rate_GBps": {
"error": 3.2512421053107325,
"max": 1083.4643121301774,
"mean": 989.6255573571138
},
"time_usec": 555.075
},
{
"L": 48,
"bytes": 127401984,
"dir": 3,
"rate_GBps": {
"error": 2.5924773984958884,
"max": 1084.8214285714287,
"mean": 1003.6093592230957
},
"time_usec": 945.805
},
{
"L": 48,
"bytes": 127401984,
"dir": 7,
"rate_GBps": {
"error": 2.2239838373071463,
"max": 1102.4608013937282,
"mean": 1023.0965520214704
},
"time_usec": 927.79
}
],
"flops": {
"comparison_point_Gflops": 870.0277887227762,
"results": [
{
"Gflops_dwf4": 314.95374405232997,
"Gflops_staggered": 3.4646403436580835,
"Gflops_wilson": 30.77048282338211,
"L": 8
},
{
"Gflops_dwf4": 627.6042246860386,
"Gflops_staggered": 47.91260849841221,
"Gflops_wilson": 150.31760175186892,
"L": 12
},
{
"Gflops_dwf4": 790.2716382423531,
"Gflops_staggered": 133.10150763030714,
"Gflops_wilson": 339.1803331715996,
"L": 16
},
{
"Gflops_dwf4": 867.4337937916907,
"Gflops_staggered": 348.15413029524154,
"Gflops_wilson": 618.5334424546753,
"L": 24
},
{
"Gflops_dwf4": 872.6217836538618,
"Gflops_staggered": 471.57989384559534,
"Gflops_wilson": 675.8167829404576,
"L": 32
}
]
},
"geometry": {
"mpi": [
1,
1,
1,
4
],
"nodes": 1,
"ranks": 4
}
}

16
bin/wrapper_omniperf.sh Executable file
View File

@ -0,0 +1,16 @@
#! /usr/bin/env bash
if [[ -n ${OMPI_COMM_WORLD_RANK+z} ]]; then
# mpich
export MPI_RANK=${OMPI_COMM_WORLD_RANK}
elif [[ -n ${MV2_COMM_WORLD_RANK+z} ]]; then
# ompi
export MPI_RANK=${MV2_COMM_WORLD_RANK}
elif [[ -n ${SLURM_PROCID+z} ]]; then
# mpich via srun
export MPI_RANK=${SLURM_PROCID}
fi
if [[ ${MPI_RANK} == "0" ]]; then
eval "omniperf profile -n testbench -- $*"
else
"$*"
fi