Cleanup. Move benchmarking scripts in Wiki.

This commit is contained in:
Jan Eitzinger
2021-09-28 11:28:14 +02:00
parent 9b18a7c95e
commit 50d0233188
8 changed files with 0 additions and 3090 deletions

View File

@@ -1,45 +0,0 @@
# Single file teaching version
bwBench.c contains a single file version of The Bandwidth Benchmark that is tailored for usage in Tutorials or Courses.
It should compile with any C99 compiler.
# Benchmarking skripts
## bench.pl to determine the absolute highest main memory bandwidth
A wrapper scripts in perl (bench.pl) and python (bench.py) are also provided to scan ranges of thread counts and determine the absolute highest sustained main memory bandwidth. In order to use it `likwid-pin` has to be in your path. The script has three required and one optional command line arguments:
```
$./bench.pl <executable> <thread count range> <repetitions> [<SMT setting>]
```
Example usage:
```
$./bench.pl ./bwbench-GCC 2-8 6
```
The script will always use physical cores only, where two SMT threads is the default. For different SMT thread counts use the 4th command line argument. Example for a processor without SMT:
```
$./bench.pl ./bwbench-GCC 14-24 10 1
```
## extractResults.pl to generate a plottable output files from multiple scaling runs
Please see how to use it in the toplevel [README](https://github.com/RRZE-HPC/TheBandwidthBenchmark#scaling-runs).
## benchmarkSystem.pl to benchmark a system and generate plots and markdown for the result wiki
**Please use with care!**
The script is designed to be used from the root of TheBandwidthBenchmark.
This script cleans and builds the currently configured toolchain. It expects that all Likwid tools are in the path!
Desired frequency settings must be already in place.
Usage:
```
perl ./benchmarkSystem.pl <DATA-DIR> <EXECUTABLE> <PREFIX>
```
where ```<DATA-DIR>``` is the directory where you want to store all results and generated output.
```<EXECUTABLE>``` is the bwBench executable name, this must be in accordance to the configured tool chain in ```config.mk```. E.g. ```./bwBench-CLANG```.
```<PREFIX>``` is the file prefix for all generated output, e.g. Intel-Haswell .

View File

@@ -1,49 +0,0 @@
#!/usr/bin/env perl
# =======================================================================================
#
# Author: Jan Eitzinger (je), jan.eitzinger@fau.de
# Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# =======================================================================================
use strict;
use warnings;
use utf8;
my $CMD = $ARGV[0];
my @N = split /-/, $ARGV[1];
my $R = $ARGV[2];
my $MAX = 0; my $CORES = 0; my $BENCH = '';
my $SMT = $ARGV[3] ? $ARGV[3] : 2;
foreach my $numcores ( $N[0] ... $N[1] ) {
foreach ( 1 ... $R ) {
foreach my $ln ( split /\n/, `likwid-pin -c E:S0:$numcores:1:$SMT $CMD` ){
if ( $ln =~ /^([A-Za-z]+):[ ]+([0-9.]+) /) {
if ( $MAX < $2 ){
$MAX = $2; $CORES = $numcores; $BENCH = $1;
}
}
}
}
}
print "$BENCH was best using $CORES threads: $MAX\n";

File diff suppressed because it is too large Load Diff

View File

@@ -1,421 +0,0 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <limits.h>
#include <float.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <likwid-marker.h>
#define SIZE 120000000ull
#define NTIMES 5
#define ARRAY_ALIGNMENT 64
#define HLINE "----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
#define LIKWID_PROFILE(tag,call) \
_Pragma ("omp parallel") \
{LIKWID_MARKER_START(#tag);} \
times[tag][k] = call; \
_Pragma ("omp parallel") \
{LIKWID_MARKER_STOP(#tag);}
typedef enum benchmark {
INIT = 0,
COPY,
UPDATE,
TRIAD,
DAXPY,
STRIAD,
SDAXPY,
NUMBENCH
} benchmark;
typedef struct {
char* label;
int words;
int flops;
} benchmarkType;
extern double init(double*, double, int);
extern double copy(double*, double*, int);
extern double update(double*, double, int);
extern double triad(double*, double*, double*, double, int);
extern double daxpy(double*, double*, double, int);
extern double striad(double*, double*, double*, double*, int);
extern double sdaxpy(double*, double*, double*, int);
extern void check(double*, double*, double*, double*, int);
extern double getTimeStamp();
int main (int argc, char** argv)
{
size_t bytesPerWord = sizeof(double);
size_t N = SIZE;
double *a, *b, *c, *d;
double scalar, tmp;
double E, S;
double avgtime[NUMBENCH],
maxtime[NUMBENCH],
mintime[NUMBENCH];
double times[NUMBENCH][NTIMES];
benchmarkType benchmarks[NUMBENCH] = {
{"Init: ", 1, 0},
{"Copy: ", 2, 0},
{"Update: ", 2, 1},
{"Triad: ", 3, 2},
{"Daxpy: ", 3, 2},
{"STriad: ", 4, 2},
{"SDaxpy: ", 4, 2}
};
LIKWID_MARKER_INIT;
#pragma omp parallel
{
LIKWID_MARKER_REGISTER("INIT");
LIKWID_MARKER_REGISTER("COPY");
LIKWID_MARKER_REGISTER("UPDATE");
LIKWID_MARKER_REGISTER("TRIAD");
LIKWID_MARKER_REGISTER("DAXPY");
LIKWID_MARKER_REGISTER("STRIAD");
LIKWID_MARKER_REGISTER("SDAXPY");
}
posix_memalign((void**) &a, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &b, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &c, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &d, ARRAY_ALIGNMENT, N * bytesPerWord );
for (int i=0; i<NUMBENCH; i++) {
avgtime[i] = 0;
maxtime[i] = 0;
mintime[i] = FLT_MAX;
}
#ifdef _OPENMP
printf(HLINE);
#pragma omp parallel
{
int k = omp_get_num_threads();
int i = omp_get_thread_num();
#pragma omp single
printf ("OpenMP enabled, running with %d threads\n", k);
}
#endif
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = 2.0;
b[i] = 2.0;
c[i] = 0.5;
d[i] = 1.0;
}
scalar = 3.0;
for ( int k=0; k < NTIMES; k++) {
LIKWID_PROFILE(INIT,init(b, scalar, N));
LIKWID_PROFILE(COPY,copy(c, a, N));
LIKWID_PROFILE(UPDATE,update(a, scalar, N));
LIKWID_PROFILE(TRIAD,triad(a, b, c, scalar, N));
LIKWID_PROFILE(DAXPY,daxpy(a, b, scalar, N));
LIKWID_PROFILE(STRIAD,striad(a, b, c, d, N));
LIKWID_PROFILE(SDAXPY,sdaxpy(a, b, c, N));
}
for (int j=0; j<NUMBENCH; j++) {
for (int k=1; k<NTIMES; k++) {
avgtime[j] = avgtime[j] + times[j][k];
mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]);
}
}
printf(HLINE);
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
for (int j=0; j<NUMBENCH; j++) {
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
double flops = (double) benchmarks[j].flops * N;
if (flops > 0){
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
1.0E-06 * bytes/mintime[j],
1.0E-06 * flops/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
} else {
printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label,
1.0E-06 * bytes/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
}
}
printf(HLINE);
check(a, b, c, d, N);
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}
void check(
double * a,
double * b,
double * c,
double * d,
int N
)
{
double aj, bj, cj, dj, scalar;
double asum, bsum, csum, dsum;
double epsilon;
/* reproduce initialization */
aj = 2.0;
bj = 2.0;
cj = 0.5;
dj = 1.0;
/* now execute timing loop */
scalar = 3.0;
for (int k=0; k<NTIMES; k++) {
bj = scalar;
cj = aj;
aj = aj * scalar;
aj = bj + scalar * cj;
aj = aj + scalar * bj;
aj = bj + cj * dj;
aj = aj + bj * cj;
}
aj = aj * (double) (N);
bj = bj * (double) (N);
cj = cj * (double) (N);
dj = dj * (double) (N);
asum = 0.0; bsum = 0.0; csum = 0.0; dsum = 0.0;
for (int i=0; i<N; i++) {
asum += a[i];
bsum += b[i];
csum += c[i];
dsum += d[i];
}
#ifdef VERBOSE
printf ("Results Comparison: \n");
printf (" Expected : %f %f %f \n",aj,bj,cj);
printf (" Observed : %f %f %f \n",asum,bsum,csum);
#endif
epsilon = 1.e-8;
if (ABS(aj-asum)/asum > epsilon) {
printf ("Failed Validation on array a[]\n");
printf (" Expected : %f \n",aj);
printf (" Observed : %f \n",asum);
}
else if (ABS(bj-bsum)/bsum > epsilon) {
printf ("Failed Validation on array b[]\n");
printf (" Expected : %f \n",bj);
printf (" Observed : %f \n",bsum);
}
else if (ABS(cj-csum)/csum > epsilon) {
printf ("Failed Validation on array c[]\n");
printf (" Expected : %f \n",cj);
printf (" Observed : %f \n",csum);
}
else if (ABS(dj-dsum)/dsum > epsilon) {
printf ("Failed Validation on array d[]\n");
printf (" Expected : %f \n",dj);
printf (" Observed : %f \n",dsum);
}
else {
printf ("Solution Validates\n");
}
}
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double init(
double * restrict a,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = scalar;
}
E = getTimeStamp();
return E-S;
}
double copy(
double * restrict a,
double * restrict b,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i];
}
E = getTimeStamp();
return E-S;
}
double update(
double * restrict a,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] * scalar;
}
E = getTimeStamp();
return E-S;
}
double triad(
double * restrict a,
double * restrict b,
double * restrict c,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i] + scalar * c[i];
}
E = getTimeStamp();
return E-S;
}
double daxpy(
double * restrict a,
double * restrict b,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] + scalar * b[i];
}
E = getTimeStamp();
return E-S;
}
double striad(
double * restrict a,
double * restrict b,
double * restrict c,
double * restrict d,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i] + d[i] * c[i];
}
E = getTimeStamp();
return E-S;
}
double sdaxpy(
double * restrict a,
double * restrict b,
double * restrict c,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] + b[i] * c[i];
}
E = getTimeStamp();
return E-S;
}

View File

@@ -1,323 +0,0 @@
!=======================================================================================
!
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
!
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in all
! copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.
!
!=======================================================================================
module timer
use iso_fortran_env, only: int32, int64, real64
implicit none
public :: getTimeStamp
contains
function getTimeStamp() result(ts)
implicit none
integer(int64) :: counter, count_step
real(real64) :: ts
call system_clock(counter, count_step)
ts = counter / real(count_step,real64)
end function getTimeStamp
end module timer
module constants
implicit none
integer, parameter :: n = 20000000
integer, parameter :: ntimes = 10
integer, parameter :: sp = kind(0.0e0)
integer, parameter :: dp = kind(0.0d0)
end module constants
module benchmarks
use timer
use likwid
use constants
contains
function init (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("INIT")
!$omp do
do i = 1, n
a(i) = scalar
end do
call likwid_markerStopRegion("INIT")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function init
function copy (a, b) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("COPY")
!$omp do
do i = 1, n
a(i) = b(i)
end do
call likwid_markerStopRegion("COPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function copy
function update (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("UPDATE")
!$omp do
do i = 1, n
a(i) = a(i) * scalar
end do
call likwid_markerStopRegion("UPDATE")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function update
function triad (a, b, c, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("TRIAD")
!$omp do
do i = 1, n
a(i) = b(i) + scalar * c(i)
end do
call likwid_markerStopRegion("TRIAD")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function triad
function daxpy (a, b, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("DAXPY")
!$omp do
do i = 1, n
a(i) = a(i) + scalar * b(i)
end do
call likwid_markerStopRegion("DAXPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function daxpy
function striad (a, b, c, d) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), allocatable, intent(in) :: d(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("STRIAD")
!$omp do
do i = 1, n
a(i) = b(i) + c(i) * d(i)
end do
call likwid_markerStopRegion("STRIAD")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function striad
function sdaxpy (a, b, c) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$omp parallel
call likwid_markerStartRegion("SDAXPY")
!$omp do
do i = 1, n
a(i) = a(i) + b(i) * c(i)
end do
call likwid_markerStopRegion("SDAXPY")
!$omp end parallel
E = getTimeStamp()
seconds = E-S
end function sdaxpy
end module benchmarks
program bwBench
use constants
use benchmarks
use likwid
implicit none
integer, parameter :: numbench = 7
real(kind=dp), allocatable :: a(:), b(:), c(:), d(:)
real(kind=dp) :: scalar, tmp
real(kind=dp) :: maxtime(numbench), mintime(numbench), avgtime(numbench), &
times(numbench,ntimes)
integer :: i, k
integer :: bytes(numbench)
integer :: bytesPerWord
character :: label(numbench)*11
!$ INTEGER omp_get_num_threads
!$ EXTERNAL omp_get_num_threads
bytesPerWord = 8
bytes(1) = 1 * bytesPerWord * n ! init
bytes(2) = 2 * bytesPerWord * n ! copy
bytes(3) = 2 * bytesPerWord * n ! update
bytes(4) = 3 * bytesPerWord * n ! triad
bytes(5) = 3 * bytesPerWord * n ! daxpy
bytes(6) = 4 * bytesPerWord * n ! striad
bytes(7) = 4 * bytesPerWord * n ! sdaxpy
label(1) = " Init: "
label(2) = " Copy: "
label(3) = " Update: "
label(4) = " Triad: "
label(5) = " Daxpy: "
label(6) = " STriad: "
label(7) = " SDaxpy: "
do i = 1, numbench
avgtime(i) = 0.0D0
mintime(i) = 1.0D+36
maxtime(i) = 0.0D0
end do
allocate(a(n))
allocate(b(n))
allocate(c(n))
allocate(d(n))
call likwid_markerInit()
!$omp parallel
call likwid_markerRegisterRegion("INIT")
call likwid_markerRegisterRegion("COPY")
call likwid_markerRegisterRegion("UPDATE")
call likwid_markerRegisterRegion("TRIAD")
call likwid_markerRegisterRegion("DAXPY")
call likwid_markerRegisterRegion("STRIAD")
call likwid_markerRegisterRegion("SDAXPY")
!$omp master
print *,'----------------------------------------------'
!$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS()
!$omp end master
!$omp end parallel
PRINT *,'----------------------------------------------'
!$OMP PARALLEL DO
do i = 1, n
a(i) = 2.0d0
b(i) = 2.0d0
c(i) = 0.5d0
d(i) = 1.0d0
end do
scalar = 3.0d0
do k = 1, ntimes
times(1, k) = init(b, scalar)
times(2, k) = copy(c, a)
times(3, k) = update(a, scalar)
times(4, k) = triad(a, b, c, scalar)
times(5, k) = daxpy(a, b, scalar)
times(6, k) = striad(a, b, c, d)
times(7, k) = sdaxpy(a, b, c)
end do
do k = 1, ntimes
do i = 1, numbench
avgtime(i) = avgtime(i) + times(i, k)
mintime(i) = MIN(mintime(i), times(i, k))
maxtime(i) = MAX(mintime(i), times(i, k))
end do
end do
print *,"-------------------------------------------------------------"
print *,"Function Rate (MB/s) Avg time Min time Max time"
do i = 1, numbench
avgtime(i) = avgtime(i)/dble(ntimes-1)
print "(a,f12.2, 2x, 3 (f10.4,3x))", label(i), bytes(i)/mintime(i)/1.0D6, &
avgtime(i), mintime(i), maxtime(i)
end do
print *,"-------------------------------------------------------------"
call likwid_markerClose()
end program bwBench

View File

@@ -1,399 +0,0 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.eitzinger@fau.de
* Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include <limits.h>
#include <float.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#define SIZE 120000000ull
#define NTIMES 5
#define ARRAY_ALIGNMENT 64
#define HLINE "----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
typedef enum benchmark {
INIT = 0,
COPY,
UPDATE,
TRIAD,
DAXPY,
STRIAD,
SDAXPY,
NUMBENCH
} benchmark;
typedef struct {
char* label;
int words;
int flops;
} benchmarkType;
extern double init(double*, double, int);
extern double copy(double*, double*, int);
extern double update(double*, double, int);
extern double triad(double*, double*, double*, double, int);
extern double daxpy(double*, double*, double, int);
extern double striad(double*, double*, double*, double*, int);
extern double sdaxpy(double*, double*, double*, int);
extern void check(double*, double*, double*, double*, int);
extern double getTimeStamp();
int main (int argc, char** argv)
{
size_t bytesPerWord = sizeof(double);
size_t N = SIZE;
double *a, *b, *c, *d;
double scalar, tmp;
double E, S;
double avgtime[NUMBENCH],
maxtime[NUMBENCH],
mintime[NUMBENCH];
double times[NUMBENCH][NTIMES];
benchmarkType benchmarks[NUMBENCH] = {
{"Init: ", 1, 0},
{"Copy: ", 2, 0},
{"Update: ", 2, 1},
{"Triad: ", 3, 2},
{"Daxpy: ", 3, 2},
{"STriad: ", 4, 2},
{"SDaxpy: ", 4, 2}
};
posix_memalign((void**) &a, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &b, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &c, ARRAY_ALIGNMENT, N * bytesPerWord );
posix_memalign((void**) &d, ARRAY_ALIGNMENT, N * bytesPerWord );
for (int i=0; i<NUMBENCH; i++) {
avgtime[i] = 0;
maxtime[i] = 0;
mintime[i] = FLT_MAX;
}
#ifdef _OPENMP
printf(HLINE);
#pragma omp parallel
{
int k = omp_get_num_threads();
int i = omp_get_thread_num();
#pragma omp single
printf ("OpenMP enabled, running with %d threads\n", k);
}
#endif
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = 2.0;
b[i] = 2.0;
c[i] = 0.5;
d[i] = 1.0;
}
scalar = 3.0;
for ( int k=0; k < NTIMES; k++) {
times[INIT][k] = init(b, scalar, N);
times[COPY][k] = copy(c, a, N);
times[UPDATE][k] = update(a, scalar, N);
times[TRIAD][k] = triad(a, b, c, scalar, N);
times[DAXPY][k] = daxpy(a, b, scalar, N);
times[STRIAD][k] = striad(a, b, c, d, N);
times[SDAXPY][k] = sdaxpy(a, b, c, N);
}
for (int j=0; j<NUMBENCH; j++) {
for (int k=1; k<NTIMES; k++) {
avgtime[j] = avgtime[j] + times[j][k];
mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]);
}
}
printf(HLINE);
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
for (int j=0; j<NUMBENCH; j++) {
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
double flops = (double) benchmarks[j].flops * N;
if (flops > 0){
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
1.0E-06 * bytes/mintime[j],
1.0E-06 * flops/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
} else {
printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label,
1.0E-06 * bytes/mintime[j],
avgtime[j],
mintime[j],
maxtime[j]);
}
}
printf(HLINE);
check(a, b, c, d, N);
return EXIT_SUCCESS;
}
void check(
double * a,
double * b,
double * c,
double * d,
int N
)
{
double aj, bj, cj, dj, scalar;
double asum, bsum, csum, dsum;
double epsilon;
/* reproduce initialization */
aj = 2.0;
bj = 2.0;
cj = 0.5;
dj = 1.0;
/* now execute timing loop */
scalar = 3.0;
for (int k=0; k<NTIMES; k++) {
bj = scalar;
cj = aj;
aj = aj * scalar;
aj = bj + scalar * cj;
aj = aj + scalar * bj;
aj = bj + cj * dj;
aj = aj + bj * cj;
}
aj = aj * (double) (N);
bj = bj * (double) (N);
cj = cj * (double) (N);
dj = dj * (double) (N);
asum = 0.0; bsum = 0.0; csum = 0.0; dsum = 0.0;
for (int i=0; i<N; i++) {
asum += a[i];
bsum += b[i];
csum += c[i];
dsum += d[i];
}
#ifdef VERBOSE
printf ("Results Comparison: \n");
printf (" Expected : %f %f %f \n",aj,bj,cj);
printf (" Observed : %f %f %f \n",asum,bsum,csum);
#endif
epsilon = 1.e-8;
if (ABS(aj-asum)/asum > epsilon) {
printf ("Failed Validation on array a[]\n");
printf (" Expected : %f \n",aj);
printf (" Observed : %f \n",asum);
}
else if (ABS(bj-bsum)/bsum > epsilon) {
printf ("Failed Validation on array b[]\n");
printf (" Expected : %f \n",bj);
printf (" Observed : %f \n",bsum);
}
else if (ABS(cj-csum)/csum > epsilon) {
printf ("Failed Validation on array c[]\n");
printf (" Expected : %f \n",cj);
printf (" Observed : %f \n",csum);
}
else if (ABS(dj-dsum)/dsum > epsilon) {
printf ("Failed Validation on array d[]\n");
printf (" Expected : %f \n",dj);
printf (" Observed : %f \n",dsum);
}
else {
printf ("Solution Validates\n");
}
}
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double init(
double * restrict a,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = scalar;
}
E = getTimeStamp();
return E-S;
}
double copy(
double * restrict a,
double * restrict b,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i];
}
E = getTimeStamp();
return E-S;
}
double update(
double * restrict a,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] * scalar;
}
E = getTimeStamp();
return E-S;
}
double triad(
double * restrict a,
double * restrict b,
double * restrict c,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i] + scalar * c[i];
}
E = getTimeStamp();
return E-S;
}
double daxpy(
double * restrict a,
double * restrict b,
double scalar,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] + scalar * b[i];
}
E = getTimeStamp();
return E-S;
}
double striad(
double * restrict a,
double * restrict b,
double * restrict c,
double * restrict d,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = b[i] + d[i] * c[i];
}
E = getTimeStamp();
return E-S;
}
double sdaxpy(
double * restrict a,
double * restrict b,
double * restrict c,
int N
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel for schedule(static)
for (int i=0; i<N; i++) {
a[i] = a[i] + b[i] * c[i];
}
E = getTimeStamp();
return E-S;
}

View File

@@ -1,283 +0,0 @@
!=======================================================================================
!
! Author: Jan Eitzinger (je), jan.treibig@gmail.com
! Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
!
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in all
! copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.
!
!=======================================================================================
module timer
use iso_fortran_env, only: int32, int64, real64
implicit none
public :: getTimeStamp
contains
function getTimeStamp() result(ts)
implicit none
integer(int64) :: counter, count_step
real(real64) :: ts
call system_clock(counter, count_step)
ts = counter / real(count_step,real64)
end function getTimeStamp
end module timer
module constants
implicit none
integer, parameter :: n = 20000000
integer, parameter :: ntimes = 10
integer, parameter :: sp = kind(0.0e0)
integer, parameter :: dp = kind(0.0d0)
end module constants
module benchmarks
use timer
use constants
contains
function init (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = scalar
end do
E = getTimeStamp()
seconds = E-S
end function init
function copy (a, b) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = b(i)
end do
E = getTimeStamp()
seconds = E-S
end function copy
function update (a, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = a(i) * scalar
end do
E = getTimeStamp()
seconds = E-S
end function update
function triad (a, b, c, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = b(i) + scalar * c(i)
end do
E = getTimeStamp()
seconds = E-S
end function triad
function daxpy (a, b, scalar) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), intent(in) :: scalar
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = a(i) + scalar * b(i)
end do
E = getTimeStamp()
seconds = E-S
end function daxpy
function striad (a, b, c, d) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp), allocatable, intent(in) :: d(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = b(i) + c(i) * d(i)
end do
E = getTimeStamp()
seconds = E-S
end function striad
function sdaxpy (a, b, c) result(seconds)
implicit none
real(kind=dp) :: seconds
real(kind=dp), allocatable, intent(inout) :: a(:)
real(kind=dp), allocatable, intent(in) :: b(:)
real(kind=dp), allocatable, intent(in) :: c(:)
real(kind=dp) :: S, E
integer :: i
S = getTimeStamp()
!$OMP PARALLEL DO
do i = 1, n
a(i) = a(i) + b(i) * c(i)
end do
E = getTimeStamp()
seconds = E-S
end function sdaxpy
end module benchmarks
program bwBench
use constants
use benchmarks
implicit none
integer, parameter :: numbench = 7
real(kind=dp), allocatable :: a(:), b(:), c(:), d(:)
real(kind=dp) :: scalar, tmp
real(kind=dp) :: maxtime(numbench), mintime(numbench), avgtime(numbench), &
times(numbench,ntimes)
integer :: i, k
integer :: bytes(numbench)
integer :: bytesPerWord
character :: label(numbench)*11
!$ INTEGER omp_get_num_threads
!$ EXTERNAL omp_get_num_threads
bytesPerWord = 8
bytes(1) = 1 * bytesPerWord * n ! init
bytes(2) = 2 * bytesPerWord * n ! copy
bytes(3) = 2 * bytesPerWord * n ! update
bytes(4) = 3 * bytesPerWord * n ! triad
bytes(5) = 3 * bytesPerWord * n ! daxpy
bytes(6) = 4 * bytesPerWord * n ! striad
bytes(7) = 4 * bytesPerWord * n ! sdaxpy
label(1) = " Init: "
label(2) = " Copy: "
label(3) = " Update: "
label(4) = " Triad: "
label(5) = " Daxpy: "
label(6) = " STriad: "
label(7) = " SDaxpy: "
do i = 1, numbench
avgtime(i) = 0.0D0
mintime(i) = 1.0D+36
maxtime(i) = 0.0D0
end do
allocate(a(n))
allocate(b(n))
allocate(c(n))
allocate(d(n))
!$omp parallel
!$omp master
print *,'----------------------------------------------'
!$ print *,'Number of Threads = ',OMP_GET_NUM_THREADS()
!$omp end master
!$omp end parallel
PRINT *,'----------------------------------------------'
!$OMP PARALLEL DO
do i = 1, n
a(i) = 2.0d0
b(i) = 2.0d0
c(i) = 0.5d0
d(i) = 1.0d0
end do
scalar = 3.0d0
do k = 1, ntimes
times(1, k) = init(b, scalar)
times(2, k) = copy(c, a)
times(3, k) = update(a, scalar)
times(4, k) = triad(a, b, c, scalar)
times(5, k) = daxpy(a, b, scalar)
times(6, k) = striad(a, b, c, d)
times(7, k) = sdaxpy(a, b, c)
end do
do k = 1, ntimes
do i = 1, numbench
avgtime(i) = avgtime(i) + times(i, k)
mintime(i) = MIN(mintime(i), times(i, k))
maxtime(i) = MAX(mintime(i), times(i, k))
end do
end do
print *,"-------------------------------------------------------------"
print *,"Function Rate (MB/s) Avg time Min time Max time"
do i = 1, numbench
avgtime(i) = avgtime(i)/dble(ntimes-1)
print "(a,f12.2, 2x, 3 (f10.4,3x))", label(i), bytes(i)/mintime(i)/1.0D6, &
avgtime(i), mintime(i), maxtime(i)
end do
print *,"-------------------------------------------------------------"
end program bwBench

View File

@@ -1,91 +0,0 @@
#!/usr/bin/env perl
# =======================================================================================
#
# Author: Jan Eitzinger (je), jan.eitzinger@fau.de
# Copyright (c) 2020 RRZE, University Erlangen-Nuremberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# =======================================================================================
use strict;
use warnings;
use utf8;
my ($DIR, $UNIT) = @ARGV;
if (not defined $DIR) {
die "Need directory: $0 <DIR>\n";
}
if (not defined $UNIT) {
$UNIT = 1.0;
} else {
if ( $UNIT eq 'GB' ) {
$UNIT = 0.001;
}
}
my %RES;
my @testcases = ('Init', 'Sum', 'Copy', 'Update', 'Triad', 'Daxpy', 'STriad', 'SDaxpy');
while( defined( my $file = glob($DIR . '/*' ) ) ) {
my $nt = 1;
open(my $fh, "<","$file");
if ($file =~ /.*-([0-9]+)\.txt/) {
$nt = $1;
}
$RES{$nt} = {};
while ( <$fh> ) {
my $cnt = split(/[ ]+/, $_);
if ( $cnt == 6 ) {
my @fields = split(/[ ]+/, $_);
if ( $fields[1] =~ /[0-9]+/ ) {
$fields[0] =~ s/://;
$RES{$nt}->{$fields[0]} = $fields[1] * $UNIT;
}
}
}
close $fh or die "can't close file $!";
}
printf "#nt";
foreach my $test ( @testcases ) {
printf "\t%s", $test;
}
printf "\n";
foreach my $key (sort {$a <=> $b} keys %RES) {
printf "%d", $key;
foreach my $test ( @testcases ) {
if ( $UNIT > 0.1 ) {
printf "\t%.0f", $RES{$key}->{$test};
} else {
printf "\t%.2f", $RES{$key}->{$test};
}
}
printf "\n";
}