Cleanup. Move benchmarking scripts in Wiki.
This commit is contained in:
@@ -1,45 +0,0 @@
|
|||||||
# Single file teaching version
|
|
||||||
|
|
||||||
bwBench.c contains a single file version of The Bandwidth Benchmark that is tailored for usage in Tutorials or Courses.
|
|
||||||
|
|
||||||
It should compile with any C99 compiler.
|
|
||||||
|
|
||||||
# Benchmarking skripts
|
|
||||||
|
|
||||||
## bench.pl to determine the absolute highest main memory bandwidth
|
|
||||||
|
|
||||||
A wrapper scripts in perl (bench.pl) and python (bench.py) are also provided to scan ranges of thread counts and determine the absolute highest sustained main memory bandwidth. In order to use it `likwid-pin` has to be in your path. The script has three required and one optional command line arguments:
|
|
||||||
```
|
|
||||||
$./bench.pl <executable> <thread count range> <repetitions> [<SMT setting>]
|
|
||||||
```
|
|
||||||
Example usage:
|
|
||||||
```
|
|
||||||
$./bench.pl ./bwbench-GCC 2-8 6
|
|
||||||
```
|
|
||||||
The script will always use physical cores only, where two SMT threads is the default. For different SMT thread counts use the 4th command line argument. Example for a processor without SMT:
|
|
||||||
```
|
|
||||||
$./bench.pl ./bwbench-GCC 14-24 10 1
|
|
||||||
```
|
|
||||||
|
|
||||||
## extractResults.pl to generate a plottable output files from multiple scaling runs
|
|
||||||
|
|
||||||
Please see how to use it in the toplevel [README](https://github.com/RRZE-HPC/TheBandwidthBenchmark#scaling-runs).
|
|
||||||
|
|
||||||
## benchmarkSystem.pl to benchmark a system and generate plots and markdown for the result wiki
|
|
||||||
|
|
||||||
**Please use with care!**
|
|
||||||
|
|
||||||
The script is designed to be used from the root of TheBandwidthBenchmark.
|
|
||||||
This script cleans and builds the currently configured toolchain. It expects that all Likwid tools are in the path!
|
|
||||||
Desired frequency settings must be already in place.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
```
|
|
||||||
perl ./benchmarkSystem.pl <DATA-DIR> <EXECUTABLE> <PREFIX>
|
|
||||||
```
|
|
||||||
|
|
||||||
where ```<DATA-DIR>``` is the directory where you want to store all results and generated output.
|
|
||||||
```<EXECUTABLE>``` is the bwBench executable name, this must be in accordance to the configured tool chain in ```config.mk```. E.g. ```./bwBench-CLANG```.
|
|
||||||
```<PREFIX>``` is the file prefix for all generated output, e.g. Intel-Haswell .
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
#!/usr/bin/env perl
|
|
||||||
|
|
||||||
# =======================================================================================
|
|
||||||
#
|
|
||||||
# Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
|
||||||
# Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in all
|
|
||||||
# copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
# SOFTWARE.
|
|
||||||
#
|
|
||||||
# =======================================================================================
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
use utf8;
|
|
||||||
|
|
||||||
my $CMD = $ARGV[0];
|
|
||||||
my @N = split /-/, $ARGV[1];
|
|
||||||
my $R = $ARGV[2];
|
|
||||||
my $MAX = 0; my $CORES = 0; my $BENCH = '';
|
|
||||||
my $SMT = $ARGV[3] ? $ARGV[3] : 2;
|
|
||||||
|
|
||||||
foreach my $numcores ( $N[0] ... $N[1] ) {
|
|
||||||
foreach ( 1 ... $R ) {
|
|
||||||
foreach my $ln ( split /\n/, `likwid-pin -c E:S0:$numcores:1:$SMT $CMD` ){
|
|
||||||
if ( $ln =~ /^([A-Za-z]+):[ ]+([0-9.]+) /) {
|
|
||||||
if ( $MAX < $2 ){
|
|
||||||
$MAX = $2; $CORES = $numcores; $BENCH = $1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
print "$BENCH was best using $CORES threads: $MAX\n";
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,421 +0,0 @@
|
|||||||
/*
|
|
||||||
* =======================================================================================
|
|
||||||
*
|
|
||||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
|
||||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
|
||||||
* in the Software without restriction, including without limitation the rights
|
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
|
||||||
* furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in all
|
|
||||||
* copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*
|
|
||||||
* =======================================================================================
|
|
||||||
*/
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <float.h>
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#include <omp.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <likwid-marker.h>
|
|
||||||
|
|
||||||
#define SIZE 120000000ull
|
|
||||||
#define NTIMES 5
|
|
||||||
#define ARRAY_ALIGNMENT 64
|
|
||||||
#define HLINE "----------------------------------------------------------------------------\n"
|
|
||||||
|
|
||||||
#ifndef MIN
|
|
||||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
|
||||||
#endif
|
|
||||||
#ifndef MAX
|
|
||||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
|
||||||
#endif
|
|
||||||
#ifndef ABS
|
|
||||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define LIKWID_PROFILE(tag,call) \
|
|
||||||
_Pragma ("omp parallel") \
|
|
||||||
{LIKWID_MARKER_START(#tag);} \
|
|
||||||
times[tag][k] = call; \
|
|
||||||
_Pragma ("omp parallel") \
|
|
||||||
{LIKWID_MARKER_STOP(#tag);}
|
|
||||||
|
|
||||||
typedef enum benchmark {
|
|
||||||
INIT = 0,
|
|
||||||
COPY,
|
|
||||||
UPDATE,
|
|
||||||
TRIAD,
|
|
||||||
DAXPY,
|
|
||||||
STRIAD,
|
|
||||||
SDAXPY,
|
|
||||||
NUMBENCH
|
|
||||||
} benchmark;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char* label;
|
|
||||||
int words;
|
|
||||||
int flops;
|
|
||||||
} benchmarkType;
|
|
||||||
|
|
||||||
extern double init(double*, double, int);
|
|
||||||
extern double copy(double*, double*, int);
|
|
||||||
extern double update(double*, double, int);
|
|
||||||
extern double triad(double*, double*, double*, double, int);
|
|
||||||
extern double daxpy(double*, double*, double, int);
|
|
||||||
extern double striad(double*, double*, double*, double*, int);
|
|
||||||
extern double sdaxpy(double*, double*, double*, int);
|
|
||||||
extern void check(double*, double*, double*, double*, int);
|
|
||||||
extern double getTimeStamp();
|
|
||||||
|
|
||||||
int main (int argc, char** argv)
|
|
||||||
{
|
|
||||||
size_t bytesPerWord = sizeof(double);
|
|
||||||
size_t N = SIZE;
|
|
||||||
double *a, *b, *c, *d;
|
|
||||||
double scalar, tmp;
|
|
||||||
double E, S;
|
|
||||||
|
|
||||||
double avgtime[NUMBENCH],
|
|
||||||
maxtime[NUMBENCH],
|
|
||||||
mintime[NUMBENCH];
|
|
||||||
|
|
||||||
double times[NUMBENCH][NTIMES];
|
|
||||||
|
|
||||||
benchmarkType benchmarks[NUMBENCH] = {
|
|
||||||
{"Init: ", 1, 0},
|
|
||||||
{"Copy: ", 2, 0},
|
|
||||||
{"Update: ", 2, 1},
|
|
||||||
{"Triad: ", 3, 2},
|
|
||||||
{"Daxpy: ", 3, 2},
|
|
||||||
{"STriad: ", 4, 2},
|
|
||||||
{"SDaxpy: ", 4, 2}
|
|
||||||
};
|
|
||||||
|
|
||||||
LIKWID_MARKER_INIT;
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
LIKWID_MARKER_REGISTER("INIT");
|
|
||||||
LIKWID_MARKER_REGISTER("COPY");
|
|
||||||
LIKWID_MARKER_REGISTER("UPDATE");
|
|
||||||
LIKWID_MARKER_REGISTER("TRIAD");
|
|
||||||
LIKWID_MARKER_REGISTER("DAXPY");
|
|
||||||
LIKWID_MARKER_REGISTER("STRIAD");
|
|
||||||
LIKWID_MARKER_REGISTER("SDAXPY");
|
|
||||||
}
|
|
||||||
|
|
||||||
posix_memalign((void**) &a, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
posix_memalign((void**) &b, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
posix_memalign((void**) &c, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
posix_memalign((void**) &d, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
|
|
||||||
for (int i=0; i<NUMBENCH; i++) {
|
|
||||||
avgtime[i] = 0;
|
|
||||||
maxtime[i] = 0;
|
|
||||||
mintime[i] = FLT_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
printf(HLINE);
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
int k = omp_get_num_threads();
|
|
||||||
int i = omp_get_thread_num();
|
|
||||||
|
|
||||||
#pragma omp single
|
|
||||||
printf ("OpenMP enabled, running with %d threads\n", k);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = 2.0;
|
|
||||||
b[i] = 2.0;
|
|
||||||
c[i] = 0.5;
|
|
||||||
d[i] = 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
scalar = 3.0;
|
|
||||||
|
|
||||||
for ( int k=0; k < NTIMES; k++) {
|
|
||||||
LIKWID_PROFILE(INIT,init(b, scalar, N));
|
|
||||||
LIKWID_PROFILE(COPY,copy(c, a, N));
|
|
||||||
LIKWID_PROFILE(UPDATE,update(a, scalar, N));
|
|
||||||
LIKWID_PROFILE(TRIAD,triad(a, b, c, scalar, N));
|
|
||||||
LIKWID_PROFILE(DAXPY,daxpy(a, b, scalar, N));
|
|
||||||
LIKWID_PROFILE(STRIAD,striad(a, b, c, d, N));
|
|
||||||
LIKWID_PROFILE(SDAXPY,sdaxpy(a, b, c, N));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j=0; j<NUMBENCH; j++) {
|
|
||||||
for (int k=1; k<NTIMES; k++) {
|
|
||||||
avgtime[j] = avgtime[j] + times[j][k];
|
|
||||||
mintime[j] = MIN(mintime[j], times[j][k]);
|
|
||||||
maxtime[j] = MAX(maxtime[j], times[j][k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(HLINE);
|
|
||||||
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
|
|
||||||
for (int j=0; j<NUMBENCH; j++) {
|
|
||||||
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
|
|
||||||
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
|
|
||||||
double flops = (double) benchmarks[j].flops * N;
|
|
||||||
|
|
||||||
if (flops > 0){
|
|
||||||
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
|
||||||
1.0E-06 * bytes/mintime[j],
|
|
||||||
1.0E-06 * flops/mintime[j],
|
|
||||||
avgtime[j],
|
|
||||||
mintime[j],
|
|
||||||
maxtime[j]);
|
|
||||||
} else {
|
|
||||||
printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
|
||||||
1.0E-06 * bytes/mintime[j],
|
|
||||||
avgtime[j],
|
|
||||||
mintime[j],
|
|
||||||
maxtime[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf(HLINE);
|
|
||||||
check(a, b, c, d, N);
|
|
||||||
LIKWID_MARKER_CLOSE;
|
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void check(
|
|
||||||
double * a,
|
|
||||||
double * b,
|
|
||||||
double * c,
|
|
||||||
double * d,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double aj, bj, cj, dj, scalar;
|
|
||||||
double asum, bsum, csum, dsum;
|
|
||||||
double epsilon;
|
|
||||||
|
|
||||||
/* reproduce initialization */
|
|
||||||
aj = 2.0;
|
|
||||||
bj = 2.0;
|
|
||||||
cj = 0.5;
|
|
||||||
dj = 1.0;
|
|
||||||
|
|
||||||
/* now execute timing loop */
|
|
||||||
scalar = 3.0;
|
|
||||||
|
|
||||||
for (int k=0; k<NTIMES; k++) {
|
|
||||||
bj = scalar;
|
|
||||||
cj = aj;
|
|
||||||
aj = aj * scalar;
|
|
||||||
aj = bj + scalar * cj;
|
|
||||||
aj = aj + scalar * bj;
|
|
||||||
aj = bj + cj * dj;
|
|
||||||
aj = aj + bj * cj;
|
|
||||||
}
|
|
||||||
|
|
||||||
aj = aj * (double) (N);
|
|
||||||
bj = bj * (double) (N);
|
|
||||||
cj = cj * (double) (N);
|
|
||||||
dj = dj * (double) (N);
|
|
||||||
|
|
||||||
asum = 0.0; bsum = 0.0; csum = 0.0; dsum = 0.0;
|
|
||||||
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
asum += a[i];
|
|
||||||
bsum += b[i];
|
|
||||||
csum += c[i];
|
|
||||||
dsum += d[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef VERBOSE
|
|
||||||
printf ("Results Comparison: \n");
|
|
||||||
printf (" Expected : %f %f %f \n",aj,bj,cj);
|
|
||||||
printf (" Observed : %f %f %f \n",asum,bsum,csum);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
epsilon = 1.e-8;
|
|
||||||
|
|
||||||
if (ABS(aj-asum)/asum > epsilon) {
|
|
||||||
printf ("Failed Validation on array a[]\n");
|
|
||||||
printf (" Expected : %f \n",aj);
|
|
||||||
printf (" Observed : %f \n",asum);
|
|
||||||
}
|
|
||||||
else if (ABS(bj-bsum)/bsum > epsilon) {
|
|
||||||
printf ("Failed Validation on array b[]\n");
|
|
||||||
printf (" Expected : %f \n",bj);
|
|
||||||
printf (" Observed : %f \n",bsum);
|
|
||||||
}
|
|
||||||
else if (ABS(cj-csum)/csum > epsilon) {
|
|
||||||
printf ("Failed Validation on array c[]\n");
|
|
||||||
printf (" Expected : %f \n",cj);
|
|
||||||
printf (" Observed : %f \n",csum);
|
|
||||||
}
|
|
||||||
else if (ABS(dj-dsum)/dsum > epsilon) {
|
|
||||||
printf ("Failed Validation on array d[]\n");
|
|
||||||
printf (" Expected : %f \n",dj);
|
|
||||||
printf (" Observed : %f \n",dsum);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
printf ("Solution Validates\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double getTimeStamp()
|
|
||||||
{
|
|
||||||
struct timespec ts;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
|
||||||
}
|
|
||||||
|
|
||||||
double init(
|
|
||||||
double * restrict a,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = scalar;
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double copy(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = b[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double update(
|
|
||||||
double * restrict a,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = a[i] * scalar;
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double triad(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double * restrict c,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = b[i] + scalar * c[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double daxpy(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = a[i] + scalar * b[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double striad(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double * restrict c,
|
|
||||||
double * restrict d,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = b[i] + d[i] * c[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double sdaxpy(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double * restrict c,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = a[i] + b[i] * c[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
@@ -1,323 +0,0 @@
|
|||||||
!=======================================================================================
|
|
||||||
!
|
|
||||||
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
|
||||||
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
!
|
|
||||||
! Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
! of this software and associated documentation files (the "Software"), to deal
|
|
||||||
! in the Software without restriction, including without limitation the rights
|
|
||||||
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
! copies of the Software, and to permit persons to whom the Software is
|
|
||||||
! furnished to do so, subject to the following conditions:
|
|
||||||
!
|
|
||||||
! The above copyright notice and this permission notice shall be included in all
|
|
||||||
! copies or substantial portions of the Software.
|
|
||||||
!
|
|
||||||
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
! SOFTWARE.
|
|
||||||
!
|
|
||||||
!=======================================================================================
|
|
||||||
|
|
||||||
module timer
|
|
||||||
use iso_fortran_env, only: int32, int64, real64
|
|
||||||
implicit none
|
|
||||||
public :: getTimeStamp
|
|
||||||
contains
|
|
||||||
function getTimeStamp() result(ts)
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
integer(int64) :: counter, count_step
|
|
||||||
real(real64) :: ts
|
|
||||||
|
|
||||||
call system_clock(counter, count_step)
|
|
||||||
ts = counter / real(count_step,real64)
|
|
||||||
end function getTimeStamp
|
|
||||||
end module timer
|
|
||||||
|
|
||||||
module constants
|
|
||||||
implicit none
|
|
||||||
integer, parameter :: n = 20000000
|
|
||||||
integer, parameter :: ntimes = 10
|
|
||||||
integer, parameter :: sp = kind(0.0e0)
|
|
||||||
integer, parameter :: dp = kind(0.0d0)
|
|
||||||
end module constants
|
|
||||||
|
|
||||||
module benchmarks
|
|
||||||
use timer
|
|
||||||
use likwid
|
|
||||||
use constants
|
|
||||||
contains
|
|
||||||
function init (a, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("INIT")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = scalar
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("INIT")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function init
|
|
||||||
|
|
||||||
function copy (a, b) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("COPY")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = b(i)
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("COPY")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function copy
|
|
||||||
|
|
||||||
function update (a, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("UPDATE")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = a(i) * scalar
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("UPDATE")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function update
|
|
||||||
|
|
||||||
function triad (a, b, c, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: c(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("TRIAD")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = b(i) + scalar * c(i)
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("TRIAD")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function triad
|
|
||||||
|
|
||||||
function daxpy (a, b, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("DAXPY")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = a(i) + scalar * b(i)
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("DAXPY")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function daxpy
|
|
||||||
|
|
||||||
function striad (a, b, c, d) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: c(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: d(:)
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("STRIAD")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = b(i) + c(i) * d(i)
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("STRIAD")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function striad
|
|
||||||
|
|
||||||
function sdaxpy (a, b, c) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: c(:)
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerStartRegion("SDAXPY")
|
|
||||||
!$omp do
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = a(i) + b(i) * c(i)
|
|
||||||
end do
|
|
||||||
call likwid_markerStopRegion("SDAXPY")
|
|
||||||
!$omp end parallel
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function sdaxpy
|
|
||||||
|
|
||||||
end module benchmarks
|
|
||||||
|
|
||||||
program bwBench
|
|
||||||
use constants
|
|
||||||
use benchmarks
|
|
||||||
use likwid
|
|
||||||
|
|
||||||
implicit none
|
|
||||||
integer, parameter :: numbench = 7
|
|
||||||
real(kind=dp), allocatable :: a(:), b(:), c(:), d(:)
|
|
||||||
real(kind=dp) :: scalar, tmp
|
|
||||||
real(kind=dp) :: maxtime(numbench), mintime(numbench), avgtime(numbench), &
|
|
||||||
times(numbench,ntimes)
|
|
||||||
integer :: i, k
|
|
||||||
integer :: bytes(numbench)
|
|
||||||
integer :: bytesPerWord
|
|
||||||
character :: label(numbench)*11
|
|
||||||
|
|
||||||
!$ INTEGER omp_get_num_threads
|
|
||||||
!$ EXTERNAL omp_get_num_threads
|
|
||||||
|
|
||||||
bytesPerWord = 8
|
|
||||||
|
|
||||||
bytes(1) = 1 * bytesPerWord * n ! init
|
|
||||||
bytes(2) = 2 * bytesPerWord * n ! copy
|
|
||||||
bytes(3) = 2 * bytesPerWord * n ! update
|
|
||||||
bytes(4) = 3 * bytesPerWord * n ! triad
|
|
||||||
bytes(5) = 3 * bytesPerWord * n ! daxpy
|
|
||||||
bytes(6) = 4 * bytesPerWord * n ! striad
|
|
||||||
bytes(7) = 4 * bytesPerWord * n ! sdaxpy
|
|
||||||
|
|
||||||
label(1) = " Init: "
|
|
||||||
label(2) = " Copy: "
|
|
||||||
label(3) = " Update: "
|
|
||||||
label(4) = " Triad: "
|
|
||||||
label(5) = " Daxpy: "
|
|
||||||
label(6) = " STriad: "
|
|
||||||
label(7) = " SDaxpy: "
|
|
||||||
|
|
||||||
do i = 1, numbench
|
|
||||||
avgtime(i) = 0.0D0
|
|
||||||
mintime(i) = 1.0D+36
|
|
||||||
maxtime(i) = 0.0D0
|
|
||||||
end do
|
|
||||||
|
|
||||||
allocate(a(n))
|
|
||||||
allocate(b(n))
|
|
||||||
allocate(c(n))
|
|
||||||
allocate(d(n))
|
|
||||||
|
|
||||||
call likwid_markerInit()
|
|
||||||
|
|
||||||
!$omp parallel
|
|
||||||
call likwid_markerRegisterRegion("INIT")
|
|
||||||
call likwid_markerRegisterRegion("COPY")
|
|
||||||
call likwid_markerRegisterRegion("UPDATE")
|
|
||||||
call likwid_markerRegisterRegion("TRIAD")
|
|
||||||
call likwid_markerRegisterRegion("DAXPY")
|
|
||||||
call likwid_markerRegisterRegion("STRIAD")
|
|
||||||
call likwid_markerRegisterRegion("SDAXPY")
|
|
||||||
|
|
||||||
!$omp master
|
|
||||||
print *,'----------------------------------------------'
|
|
||||||
!$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS()
|
|
||||||
!$omp end master
|
|
||||||
!$omp end parallel
|
|
||||||
|
|
||||||
PRINT *,'----------------------------------------------'
|
|
||||||
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = 2.0d0
|
|
||||||
b(i) = 2.0d0
|
|
||||||
c(i) = 0.5d0
|
|
||||||
d(i) = 1.0d0
|
|
||||||
end do
|
|
||||||
|
|
||||||
scalar = 3.0d0
|
|
||||||
|
|
||||||
do k = 1, ntimes
|
|
||||||
times(1, k) = init(b, scalar)
|
|
||||||
times(2, k) = copy(c, a)
|
|
||||||
times(3, k) = update(a, scalar)
|
|
||||||
times(4, k) = triad(a, b, c, scalar)
|
|
||||||
times(5, k) = daxpy(a, b, scalar)
|
|
||||||
times(6, k) = striad(a, b, c, d)
|
|
||||||
times(7, k) = sdaxpy(a, b, c)
|
|
||||||
end do
|
|
||||||
|
|
||||||
do k = 1, ntimes
|
|
||||||
do i = 1, numbench
|
|
||||||
avgtime(i) = avgtime(i) + times(i, k)
|
|
||||||
mintime(i) = MIN(mintime(i), times(i, k))
|
|
||||||
maxtime(i) = MAX(mintime(i), times(i, k))
|
|
||||||
end do
|
|
||||||
end do
|
|
||||||
|
|
||||||
print *,"-------------------------------------------------------------"
|
|
||||||
print *,"Function Rate (MB/s) Avg time Min time Max time"
|
|
||||||
|
|
||||||
do i = 1, numbench
|
|
||||||
avgtime(i) = avgtime(i)/dble(ntimes-1)
|
|
||||||
print "(a,f12.2, 2x, 3 (f10.4,3x))", label(i), bytes(i)/mintime(i)/1.0D6, &
|
|
||||||
avgtime(i), mintime(i), maxtime(i)
|
|
||||||
end do
|
|
||||||
print *,"-------------------------------------------------------------"
|
|
||||||
|
|
||||||
call likwid_markerClose()
|
|
||||||
end program bwBench
|
|
||||||
399
util/bwBench.c
399
util/bwBench.c
@@ -1,399 +0,0 @@
|
|||||||
/*
|
|
||||||
* =======================================================================================
|
|
||||||
*
|
|
||||||
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
|
||||||
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
|
||||||
* in the Software without restriction, including without limitation the rights
|
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
|
||||||
* furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in all
|
|
||||||
* copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*
|
|
||||||
* =======================================================================================
|
|
||||||
*/
|
|
||||||
#define _GNU_SOURCE
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <float.h>
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#include <omp.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SIZE 120000000ull
|
|
||||||
#define NTIMES 5
|
|
||||||
#define ARRAY_ALIGNMENT 64
|
|
||||||
#define HLINE "----------------------------------------------------------------------------\n"
|
|
||||||
|
|
||||||
#ifndef MIN
|
|
||||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
|
||||||
#endif
|
|
||||||
#ifndef MAX
|
|
||||||
#define MAX(x,y) ((x)>(y)?(x):(y))
|
|
||||||
#endif
|
|
||||||
#ifndef ABS
|
|
||||||
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef enum benchmark {
|
|
||||||
INIT = 0,
|
|
||||||
COPY,
|
|
||||||
UPDATE,
|
|
||||||
TRIAD,
|
|
||||||
DAXPY,
|
|
||||||
STRIAD,
|
|
||||||
SDAXPY,
|
|
||||||
NUMBENCH
|
|
||||||
} benchmark;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char* label;
|
|
||||||
int words;
|
|
||||||
int flops;
|
|
||||||
} benchmarkType;
|
|
||||||
|
|
||||||
extern double init(double*, double, int);
|
|
||||||
extern double copy(double*, double*, int);
|
|
||||||
extern double update(double*, double, int);
|
|
||||||
extern double triad(double*, double*, double*, double, int);
|
|
||||||
extern double daxpy(double*, double*, double, int);
|
|
||||||
extern double striad(double*, double*, double*, double*, int);
|
|
||||||
extern double sdaxpy(double*, double*, double*, int);
|
|
||||||
extern void check(double*, double*, double*, double*, int);
|
|
||||||
extern double getTimeStamp();
|
|
||||||
|
|
||||||
int main (int argc, char** argv)
|
|
||||||
{
|
|
||||||
size_t bytesPerWord = sizeof(double);
|
|
||||||
size_t N = SIZE;
|
|
||||||
double *a, *b, *c, *d;
|
|
||||||
double scalar, tmp;
|
|
||||||
double E, S;
|
|
||||||
|
|
||||||
double avgtime[NUMBENCH],
|
|
||||||
maxtime[NUMBENCH],
|
|
||||||
mintime[NUMBENCH];
|
|
||||||
|
|
||||||
double times[NUMBENCH][NTIMES];
|
|
||||||
|
|
||||||
benchmarkType benchmarks[NUMBENCH] = {
|
|
||||||
{"Init: ", 1, 0},
|
|
||||||
{"Copy: ", 2, 0},
|
|
||||||
{"Update: ", 2, 1},
|
|
||||||
{"Triad: ", 3, 2},
|
|
||||||
{"Daxpy: ", 3, 2},
|
|
||||||
{"STriad: ", 4, 2},
|
|
||||||
{"SDaxpy: ", 4, 2}
|
|
||||||
};
|
|
||||||
|
|
||||||
posix_memalign((void**) &a, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
posix_memalign((void**) &b, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
posix_memalign((void**) &c, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
posix_memalign((void**) &d, ARRAY_ALIGNMENT, N * bytesPerWord );
|
|
||||||
|
|
||||||
for (int i=0; i<NUMBENCH; i++) {
|
|
||||||
avgtime[i] = 0;
|
|
||||||
maxtime[i] = 0;
|
|
||||||
mintime[i] = FLT_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
printf(HLINE);
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
int k = omp_get_num_threads();
|
|
||||||
int i = omp_get_thread_num();
|
|
||||||
|
|
||||||
#pragma omp single
|
|
||||||
printf ("OpenMP enabled, running with %d threads\n", k);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = 2.0;
|
|
||||||
b[i] = 2.0;
|
|
||||||
c[i] = 0.5;
|
|
||||||
d[i] = 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
scalar = 3.0;
|
|
||||||
|
|
||||||
for ( int k=0; k < NTIMES; k++) {
|
|
||||||
times[INIT][k] = init(b, scalar, N);
|
|
||||||
times[COPY][k] = copy(c, a, N);
|
|
||||||
times[UPDATE][k] = update(a, scalar, N);
|
|
||||||
times[TRIAD][k] = triad(a, b, c, scalar, N);
|
|
||||||
times[DAXPY][k] = daxpy(a, b, scalar, N);
|
|
||||||
times[STRIAD][k] = striad(a, b, c, d, N);
|
|
||||||
times[SDAXPY][k] = sdaxpy(a, b, c, N);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j=0; j<NUMBENCH; j++) {
|
|
||||||
for (int k=1; k<NTIMES; k++) {
|
|
||||||
avgtime[j] = avgtime[j] + times[j][k];
|
|
||||||
mintime[j] = MIN(mintime[j], times[j][k]);
|
|
||||||
maxtime[j] = MAX(maxtime[j], times[j][k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(HLINE);
|
|
||||||
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
|
|
||||||
for (int j=0; j<NUMBENCH; j++) {
|
|
||||||
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
|
|
||||||
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
|
|
||||||
double flops = (double) benchmarks[j].flops * N;
|
|
||||||
|
|
||||||
if (flops > 0){
|
|
||||||
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
|
||||||
1.0E-06 * bytes/mintime[j],
|
|
||||||
1.0E-06 * flops/mintime[j],
|
|
||||||
avgtime[j],
|
|
||||||
mintime[j],
|
|
||||||
maxtime[j]);
|
|
||||||
} else {
|
|
||||||
printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
|
||||||
1.0E-06 * bytes/mintime[j],
|
|
||||||
avgtime[j],
|
|
||||||
mintime[j],
|
|
||||||
maxtime[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf(HLINE);
|
|
||||||
check(a, b, c, d, N);
|
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
void check(
|
|
||||||
double * a,
|
|
||||||
double * b,
|
|
||||||
double * c,
|
|
||||||
double * d,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double aj, bj, cj, dj, scalar;
|
|
||||||
double asum, bsum, csum, dsum;
|
|
||||||
double epsilon;
|
|
||||||
|
|
||||||
/* reproduce initialization */
|
|
||||||
aj = 2.0;
|
|
||||||
bj = 2.0;
|
|
||||||
cj = 0.5;
|
|
||||||
dj = 1.0;
|
|
||||||
|
|
||||||
/* now execute timing loop */
|
|
||||||
scalar = 3.0;
|
|
||||||
|
|
||||||
for (int k=0; k<NTIMES; k++) {
|
|
||||||
bj = scalar;
|
|
||||||
cj = aj;
|
|
||||||
aj = aj * scalar;
|
|
||||||
aj = bj + scalar * cj;
|
|
||||||
aj = aj + scalar * bj;
|
|
||||||
aj = bj + cj * dj;
|
|
||||||
aj = aj + bj * cj;
|
|
||||||
}
|
|
||||||
|
|
||||||
aj = aj * (double) (N);
|
|
||||||
bj = bj * (double) (N);
|
|
||||||
cj = cj * (double) (N);
|
|
||||||
dj = dj * (double) (N);
|
|
||||||
|
|
||||||
asum = 0.0; bsum = 0.0; csum = 0.0; dsum = 0.0;
|
|
||||||
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
asum += a[i];
|
|
||||||
bsum += b[i];
|
|
||||||
csum += c[i];
|
|
||||||
dsum += d[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef VERBOSE
|
|
||||||
printf ("Results Comparison: \n");
|
|
||||||
printf (" Expected : %f %f %f \n",aj,bj,cj);
|
|
||||||
printf (" Observed : %f %f %f \n",asum,bsum,csum);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
epsilon = 1.e-8;
|
|
||||||
|
|
||||||
if (ABS(aj-asum)/asum > epsilon) {
|
|
||||||
printf ("Failed Validation on array a[]\n");
|
|
||||||
printf (" Expected : %f \n",aj);
|
|
||||||
printf (" Observed : %f \n",asum);
|
|
||||||
}
|
|
||||||
else if (ABS(bj-bsum)/bsum > epsilon) {
|
|
||||||
printf ("Failed Validation on array b[]\n");
|
|
||||||
printf (" Expected : %f \n",bj);
|
|
||||||
printf (" Observed : %f \n",bsum);
|
|
||||||
}
|
|
||||||
else if (ABS(cj-csum)/csum > epsilon) {
|
|
||||||
printf ("Failed Validation on array c[]\n");
|
|
||||||
printf (" Expected : %f \n",cj);
|
|
||||||
printf (" Observed : %f \n",csum);
|
|
||||||
}
|
|
||||||
else if (ABS(dj-dsum)/dsum > epsilon) {
|
|
||||||
printf ("Failed Validation on array d[]\n");
|
|
||||||
printf (" Expected : %f \n",dj);
|
|
||||||
printf (" Observed : %f \n",dsum);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
printf ("Solution Validates\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double getTimeStamp()
|
|
||||||
{
|
|
||||||
struct timespec ts;
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
||||||
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
|
||||||
}
|
|
||||||
|
|
||||||
double init(
|
|
||||||
double * restrict a,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = scalar;
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double copy(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = b[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double update(
|
|
||||||
double * restrict a,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = a[i] * scalar;
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double triad(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double * restrict c,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = b[i] + scalar * c[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double daxpy(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double scalar,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = a[i] + scalar * b[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double striad(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double * restrict c,
|
|
||||||
double * restrict d,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = b[i] + d[i] * c[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
|
|
||||||
double sdaxpy(
|
|
||||||
double * restrict a,
|
|
||||||
double * restrict b,
|
|
||||||
double * restrict c,
|
|
||||||
int N
|
|
||||||
)
|
|
||||||
{
|
|
||||||
double S, E;
|
|
||||||
|
|
||||||
S = getTimeStamp();
|
|
||||||
#pragma omp parallel for schedule(static)
|
|
||||||
for (int i=0; i<N; i++) {
|
|
||||||
a[i] = a[i] + b[i] * c[i];
|
|
||||||
}
|
|
||||||
E = getTimeStamp();
|
|
||||||
|
|
||||||
return E-S;
|
|
||||||
}
|
|
||||||
283
util/bwBench.f90
283
util/bwBench.f90
@@ -1,283 +0,0 @@
|
|||||||
!=======================================================================================
|
|
||||||
!
|
|
||||||
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
|
||||||
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
!
|
|
||||||
! Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
! of this software and associated documentation files (the "Software"), to deal
|
|
||||||
! in the Software without restriction, including without limitation the rights
|
|
||||||
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
! copies of the Software, and to permit persons to whom the Software is
|
|
||||||
! furnished to do so, subject to the following conditions:
|
|
||||||
!
|
|
||||||
! The above copyright notice and this permission notice shall be included in all
|
|
||||||
! copies or substantial portions of the Software.
|
|
||||||
!
|
|
||||||
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
! SOFTWARE.
|
|
||||||
!
|
|
||||||
!=======================================================================================
|
|
||||||
|
|
||||||
module timer
|
|
||||||
use iso_fortran_env, only: int32, int64, real64
|
|
||||||
implicit none
|
|
||||||
public :: getTimeStamp
|
|
||||||
contains
|
|
||||||
function getTimeStamp() result(ts)
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
integer(int64) :: counter, count_step
|
|
||||||
real(real64) :: ts
|
|
||||||
|
|
||||||
call system_clock(counter, count_step)
|
|
||||||
ts = counter / real(count_step,real64)
|
|
||||||
end function getTimeStamp
|
|
||||||
end module timer
|
|
||||||
|
|
||||||
module constants
|
|
||||||
implicit none
|
|
||||||
integer, parameter :: n = 20000000
|
|
||||||
integer, parameter :: ntimes = 10
|
|
||||||
integer, parameter :: sp = kind(0.0e0)
|
|
||||||
integer, parameter :: dp = kind(0.0d0)
|
|
||||||
end module constants
|
|
||||||
|
|
||||||
module benchmarks
|
|
||||||
use timer
|
|
||||||
use constants
|
|
||||||
contains
|
|
||||||
function init (a, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = scalar
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function init
|
|
||||||
|
|
||||||
function copy (a, b) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = b(i)
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function copy
|
|
||||||
|
|
||||||
function update (a, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = a(i) * scalar
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function update
|
|
||||||
|
|
||||||
function triad (a, b, c, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: c(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = b(i) + scalar * c(i)
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function triad
|
|
||||||
|
|
||||||
function daxpy (a, b, scalar) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), intent(in) :: scalar
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = a(i) + scalar * b(i)
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function daxpy
|
|
||||||
|
|
||||||
function striad (a, b, c, d) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: c(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: d(:)
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = b(i) + c(i) * d(i)
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function striad
|
|
||||||
|
|
||||||
function sdaxpy (a, b, c) result(seconds)
|
|
||||||
implicit none
|
|
||||||
real(kind=dp) :: seconds
|
|
||||||
real(kind=dp), allocatable, intent(inout) :: a(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: b(:)
|
|
||||||
real(kind=dp), allocatable, intent(in) :: c(:)
|
|
||||||
real(kind=dp) :: S, E
|
|
||||||
integer :: i
|
|
||||||
|
|
||||||
S = getTimeStamp()
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = a(i) + b(i) * c(i)
|
|
||||||
end do
|
|
||||||
E = getTimeStamp()
|
|
||||||
|
|
||||||
seconds = E-S
|
|
||||||
end function sdaxpy
|
|
||||||
|
|
||||||
end module benchmarks
|
|
||||||
|
|
||||||
program bwBench
|
|
||||||
use constants
|
|
||||||
use benchmarks
|
|
||||||
|
|
||||||
implicit none
|
|
||||||
|
|
||||||
integer, parameter :: numbench = 7
|
|
||||||
real(kind=dp), allocatable :: a(:), b(:), c(:), d(:)
|
|
||||||
real(kind=dp) :: scalar, tmp
|
|
||||||
real(kind=dp) :: maxtime(numbench), mintime(numbench), avgtime(numbench), &
|
|
||||||
times(numbench,ntimes)
|
|
||||||
integer :: i, k
|
|
||||||
integer :: bytes(numbench)
|
|
||||||
integer :: bytesPerWord
|
|
||||||
character :: label(numbench)*11
|
|
||||||
|
|
||||||
!$ INTEGER omp_get_num_threads
|
|
||||||
!$ EXTERNAL omp_get_num_threads
|
|
||||||
|
|
||||||
bytesPerWord = 8
|
|
||||||
|
|
||||||
bytes(1) = 1 * bytesPerWord * n ! init
|
|
||||||
bytes(2) = 2 * bytesPerWord * n ! copy
|
|
||||||
bytes(3) = 2 * bytesPerWord * n ! update
|
|
||||||
bytes(4) = 3 * bytesPerWord * n ! triad
|
|
||||||
bytes(5) = 3 * bytesPerWord * n ! daxpy
|
|
||||||
bytes(6) = 4 * bytesPerWord * n ! striad
|
|
||||||
bytes(7) = 4 * bytesPerWord * n ! sdaxpy
|
|
||||||
|
|
||||||
label(1) = " Init: "
|
|
||||||
label(2) = " Copy: "
|
|
||||||
label(3) = " Update: "
|
|
||||||
label(4) = " Triad: "
|
|
||||||
label(5) = " Daxpy: "
|
|
||||||
label(6) = " STriad: "
|
|
||||||
label(7) = " SDaxpy: "
|
|
||||||
|
|
||||||
do i = 1, numbench
|
|
||||||
avgtime(i) = 0.0D0
|
|
||||||
mintime(i) = 1.0D+36
|
|
||||||
maxtime(i) = 0.0D0
|
|
||||||
end do
|
|
||||||
|
|
||||||
allocate(a(n))
|
|
||||||
allocate(b(n))
|
|
||||||
allocate(c(n))
|
|
||||||
allocate(d(n))
|
|
||||||
|
|
||||||
!$omp parallel
|
|
||||||
!$omp master
|
|
||||||
print *,'----------------------------------------------'
|
|
||||||
!$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS()
|
|
||||||
!$omp end master
|
|
||||||
!$omp end parallel
|
|
||||||
|
|
||||||
PRINT *,'----------------------------------------------'
|
|
||||||
|
|
||||||
!$OMP PARALLEL DO
|
|
||||||
do i = 1, n
|
|
||||||
a(i) = 2.0d0
|
|
||||||
b(i) = 2.0d0
|
|
||||||
c(i) = 0.5d0
|
|
||||||
d(i) = 1.0d0
|
|
||||||
end do
|
|
||||||
|
|
||||||
scalar = 3.0d0
|
|
||||||
|
|
||||||
do k = 1, ntimes
|
|
||||||
times(1, k) = init(b, scalar)
|
|
||||||
times(2, k) = copy(c, a)
|
|
||||||
times(3, k) = update(a, scalar)
|
|
||||||
times(4, k) = triad(a, b, c, scalar)
|
|
||||||
times(5, k) = daxpy(a, b, scalar)
|
|
||||||
times(6, k) = striad(a, b, c, d)
|
|
||||||
times(7, k) = sdaxpy(a, b, c)
|
|
||||||
end do
|
|
||||||
|
|
||||||
do k = 1, ntimes
|
|
||||||
do i = 1, numbench
|
|
||||||
avgtime(i) = avgtime(i) + times(i, k)
|
|
||||||
mintime(i) = MIN(mintime(i), times(i, k))
|
|
||||||
maxtime(i) = MAX(mintime(i), times(i, k))
|
|
||||||
end do
|
|
||||||
end do
|
|
||||||
|
|
||||||
print *,"-------------------------------------------------------------"
|
|
||||||
print *,"Function Rate (MB/s) Avg time Min time Max time"
|
|
||||||
|
|
||||||
do i = 1, numbench
|
|
||||||
avgtime(i) = avgtime(i)/dble(ntimes-1)
|
|
||||||
print "(a,f12.2, 2x, 3 (f10.4,3x))", label(i), bytes(i)/mintime(i)/1.0D6, &
|
|
||||||
avgtime(i), mintime(i), maxtime(i)
|
|
||||||
end do
|
|
||||||
print *,"-------------------------------------------------------------"
|
|
||||||
|
|
||||||
end program bwBench
|
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
#!/usr/bin/env perl
|
|
||||||
# =======================================================================================
|
|
||||||
#
|
|
||||||
# Author: Jan Eitzinger (je), jan.eitzinger@fau.de
|
|
||||||
# Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in all
|
|
||||||
# copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
# SOFTWARE.
|
|
||||||
#
|
|
||||||
# =======================================================================================
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
use utf8;
|
|
||||||
|
|
||||||
my ($DIR, $UNIT) = @ARGV;
|
|
||||||
|
|
||||||
if (not defined $DIR) {
|
|
||||||
die "Need directory: $0 <DIR>\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (not defined $UNIT) {
|
|
||||||
$UNIT = 1.0;
|
|
||||||
} else {
|
|
||||||
if ( $UNIT eq 'GB' ) {
|
|
||||||
$UNIT = 0.001;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
my %RES;
|
|
||||||
|
|
||||||
my @testcases = ('Init', 'Sum', 'Copy', 'Update', 'Triad', 'Daxpy', 'STriad', 'SDaxpy');
|
|
||||||
|
|
||||||
while( defined( my $file = glob($DIR . '/*' ) ) ) {
|
|
||||||
|
|
||||||
my $nt = 1;
|
|
||||||
open(my $fh, "<","$file");
|
|
||||||
if ($file =~ /.*-([0-9]+)\.txt/) {
|
|
||||||
$nt = $1;
|
|
||||||
}
|
|
||||||
$RES{$nt} = {};
|
|
||||||
|
|
||||||
while ( <$fh> ) {
|
|
||||||
my $cnt = split(/[ ]+/, $_);
|
|
||||||
|
|
||||||
if ( $cnt == 6 ) {
|
|
||||||
my @fields = split(/[ ]+/, $_);
|
|
||||||
|
|
||||||
if ( $fields[1] =~ /[0-9]+/ ) {
|
|
||||||
$fields[0] =~ s/://;
|
|
||||||
$RES{$nt}->{$fields[0]} = $fields[1] * $UNIT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
close $fh or die "can't close file $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
printf "#nt";
|
|
||||||
foreach my $test ( @testcases ) {
|
|
||||||
printf "\t%s", $test;
|
|
||||||
}
|
|
||||||
printf "\n";
|
|
||||||
|
|
||||||
foreach my $key (sort {$a <=> $b} keys %RES) {
|
|
||||||
printf "%d", $key;
|
|
||||||
|
|
||||||
foreach my $test ( @testcases ) {
|
|
||||||
if ( $UNIT > 0.1 ) {
|
|
||||||
printf "\t%.0f", $RES{$key}->{$test};
|
|
||||||
} else {
|
|
||||||
printf "\t%.2f", $RES{$key}->{$test};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf "\n";
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user