Merge branch 'master' of github.com:RRZE-HPC/TheBandwidthBenchmark

This commit is contained in:
Jan Eitzinger
2019-07-01 12:34:17 +02:00
35 changed files with 823 additions and 113 deletions

View File

@@ -180,7 +180,7 @@ int main (int argc, char** argv)
for (int j=0; j<NUMBENCH; j++) {
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
double flops = (double) benchmarks[j].flops * sizeof(double) * N;
double flops = (double) benchmarks[j].flops * N;
if (flops > 0){
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,

45
MemoryHierarchy/Makefile Normal file
View File

@@ -0,0 +1,45 @@
#CONFIGURE TOOL CHAIN
CC = gcc
OPENMP = -fopenmp
#CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
CFLAGS = -O3 -std=c99 $(OPENMP)
LFLAGS = $(OPENMP)
DEFINES = -D_GNU_SOURCE
#DEFINES += -DVERBOSE
#DEFINES += -DLIKWID_PERFMON
DEFINES += -DNTIMES=5
DEFINES += -DARRAY_ALIGNMENT=64
#CONFIGURE BUILD SYSTEM
TARGET = striad
BUILD_DIR = ./build
SRC_DIR = ./src
INCLUDES += -I./src/
#DO NOT EDIT BELOW
VPATH = $(SRC_DIR)
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES)
${TARGET}: $(BUILD_DIR) $(OBJ)
${CC} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
asm: $(BUILD_DIR) $(ASM)
$(BUILD_DIR)/%.o: %.c
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
$(BUILD_DIR)/%.s: %.c
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
$(BUILD_DIR):
@mkdir $(BUILD_DIR)
.PHONY: clean
clean:
@echo "===> CLEAN"
@rm -rf $(BUILD_DIR)
@rm -f $(TARGET)

38
MemoryHierarchy/bench.pl Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
if ( $#ARGV < 2 ){
print "Usage: ./bench.pl <numcores> <seq|tp|ws> <SMT>\n";
exit;
}
my $numCores = $ARGV[0];
my $type = 0;
my $SMT = $ARGV[2] ? $ARGV[2] : 2;
my $N = 100;
if ( $ARGV[1] eq 'seq' ){
$type = 0;
} elsif ( $ARGV[1] eq 'tp' ){
$type = 1;
} elsif ( $ARGV[1] eq 'ws' ){
$type = 2;
}
while ( $N < 8000000 ) {
my $result;
my $performance = '0.00';
while ( $performance eq '0.00' ){
$result = `likwid-pin -c E:S0:$numCores:1:$SMT -q ./striad $type $N`;
$result =~ /([0-9.]+) ([0-9.]+)/;
$performance = $2;
}
print $result;
$N = int($N * 1.2);
}

View File

@@ -0,0 +1,89 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifdef __linux__
#ifdef _OPENMP
#include <stdlib.h>
#include <stdio.h>
#include <sched.h>
#include <unistd.h>
#include <sys/types.h>
#include <pthread.h>
#include <sys/syscall.h>
#define MAX_NUM_THREADS 128
#define gettid() syscall(SYS_gettid)
static int
getProcessorID(cpu_set_t* cpu_set)
{
int processorId;
for ( processorId = 0; processorId < MAX_NUM_THREADS; processorId++ )
{
if ( CPU_ISSET(processorId,cpu_set) )
{
break;
}
}
return processorId;
}
int
affinity_getProcessorId()
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
return getProcessorID(&cpu_set);
}
void
affinity_pinThread(int processorId)
{
cpu_set_t cpuset;
pthread_t thread;
thread = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
}
void
affinity_pinProcess(int processorId)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(processorId, &cpuset);
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
}
#endif /*__linux__*/
#endif /*_OPENMP*/

View File

@@ -0,0 +1,36 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef AFFINITY_H
#define AFFINITY_H
extern int affinity_getProcessorId();
extern void affinity_pinProcess(int);
extern void affinity_pinThread(int);
#endif /*AFFINITY_H*/

View File

@@ -0,0 +1,58 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
void* allocate (int alignment, size_t bytesize)
{
int errorCode;
void* ptr;
errorCode = posix_memalign(&ptr, alignment, bytesize);
if (errorCode) {
if (errorCode == EINVAL) {
fprintf(stderr,
"Error: Alignment parameter is not a power of two\n");
exit(EXIT_FAILURE);
}
if (errorCode == ENOMEM) {
fprintf(stderr,
"Error: Insufficient memory to fulfill the request\n");
exit(EXIT_FAILURE);
}
}
if (ptr == NULL) {
fprintf(stderr, "Error: posix_memalign failed!\n");
exit(EXIT_FAILURE);
}
return ptr;
}

View File

@@ -0,0 +1,33 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef __ALLOCATE_H_
#define __ALLOCATE_H_
extern void* allocate (int alignment, size_t bytesize);
#endif

View File

@@ -0,0 +1,44 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef LIKWID_MARKERS_H
#define LIKWID_MARKERS_H
#ifdef LIKWID_PERFMON
#include <likwid.h>
#else
#define LIKWID_MARKER_INIT
#define LIKWID_MARKER_THREADINIT
#define LIKWID_MARKER_SWITCH
#define LIKWID_MARKER_REGISTER(regionTag)
#define LIKWID_MARKER_START(regionTag)
#define LIKWID_MARKER_STOP(regionTag)
#define LIKWID_MARKER_CLOSE
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
#endif
#endif /*LIKWID_MARKERS_H*/

216
MemoryHierarchy/src/main.c Normal file
View File

@@ -0,0 +1,216 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <limits.h>
#include <float.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <likwid_markers.h>
#include <timing.h>
#include <allocate.h>
#include <affinity.h>
#define HLINE "----------------------------------------------------------------------------\n"
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
#ifndef ABS
#define ABS(a) ((a) >= 0 ? (a) : -(a))
#endif
extern double striad_seq(double*, double*, double*, double*, int, int);
extern double striad_tp(double*, double*, double*, double*, int, int);
extern double striad_ws(double*, double*, double*, double*, int, int);
typedef double (*testFunc)(double*, double*, double*, double*, int, int);
int main (int argc, char** argv)
{
size_t bytesPerWord = sizeof(double);
size_t N;
int type;
size_t iter = 1;
size_t scale = 1;
double *a, *b, *c, *d;
double E, S;
double avgtime, maxtime, mintime;
double times[NTIMES];
double dataSize;
testFunc func;
char* testname;
if ( argc > 2 ) {
type = atoi(argv[1]);
N = atoi(argv[2]);
} else {
printf("Usage: %s <test type> <N>\n",argv[0]);
printf("Test types: 0 - sequential, 1 - OpenMP throughput, 2 - OpenMP worksharing\n");
exit(EXIT_SUCCESS);
}
LIKWID_MARKER_INIT;
switch ( type ) {
case 0:
func = striad_seq;
testname = "striad_seq";
break;
case 1:
func = striad_tp;
testname = "striad_tp";
#ifdef _OPENMP
#pragma omp parallel
{
#pragma omp single
scale = omp_get_num_threads();
LIKWID_MARKER_REGISTER("BENCH");
}
#endif
break;
case 2:
func = striad_ws;
testname = "striad_ws";
break;
default:
printf("Unknown test type: %d\n", type);
exit(EXIT_FAILURE);
}
a = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
b = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
c = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
d = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
#ifdef VERBOSE
printf(HLINE);
dataSize = 4.0 * bytesPerWord * N;
if ( dataSize < 1.0E06 ) {
printf ("Total allocated datasize: %8.2f KB\n", dataSize * 1.0E-03);
} else {
printf ("Total allocated datasize: %8.2f MB\n", dataSize * 1.0E-06);
}
#endif
avgtime = 0;
maxtime = 0;
mintime = FLT_MAX;
#ifdef VERBOSE
#ifdef _OPENMP
printf(HLINE);
#pragma omp parallel
{
int k = omp_get_num_threads();
int i = omp_get_thread_num();
#pragma omp single
printf ("OpenMP enabled, running with %d threads\n", k);
printf ("\tThread %d running on processor %d\n", i, affinity_getProcessorId());
}
#endif
#endif
S = getTimeStamp();
#pragma omp parallel for
for (int i=0; i<N; i++) {
a[i] = 2.0;
b[i] = 1.0;
c[i] = 0.8;
d[i] = 1.01;
}
E = getTimeStamp();
#ifdef VERBOSE
printf ("Timer resolution %.2e ", getTimeResolution());
printf ("Ticks used %.0e\n", (E-S) / getTimeResolution());
#endif
iter = 5;
times[0] = 0.0;
times[1] = 0.0;
while ( times[0] < 0.2 ){
times[0] = func(a, b, c, d, N, iter);
if ( times[0] > 0.1 ) break;
double factor = 0.2 / (times[0] - times[1]);
iter *= (int) factor;
times[1] = times[0];
}
#ifdef VERBOSE
printf ("Using %d iterations \n", iter);
#endif
for ( int k=0; k < NTIMES; k++) {
times[k] = func(a, b, c, d, N, iter);
}
for (int k=1; k<NTIMES; k++) {
avgtime = avgtime + times[k];
mintime = MIN(mintime, times[k]);
maxtime = MAX(maxtime, times[k]);
}
#ifdef VERBOSE
printf(HLINE);
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
avgtime = avgtime/(double)(NTIMES-1);
double bytes = (double) 4.0 * sizeof(double) * N * iter * scale;
double flops = (double) 2.0 * N * iter * scale;
printf("%s %11.2f %11.2f %11.4f %11.4f %11.4f\n",
testname,
1.0E-06 * bytes/mintime,
1.0E-06 * flops/mintime,
avgtime,
mintime,
maxtime);
printf("Flops %e\n", flops);
printf(HLINE);
#else
double flops = (double) 2 * N * iter * scale;
printf("%d %.2f\n", N, 1.0E-06 * flops/mintime);
#endif
LIKWID_MARKER_CLOSE;
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,57 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdio.h>
#include <timing.h>
#include <likwid_markers.h>
double striad_seq(
double * restrict a,
const double * restrict b,
const double * restrict c,
const double * restrict d,
int N,
int iter
)
{
double S, E;
S = getTimeStamp();
LIKWID_MARKER_START("BENCH");
for(int j = 0; j < iter; j++) {
for (int i=0; i<N; i++) {
a[i] = b[i] + d[i] * c[i];
}
if (a[N-1] > 2000) printf("Ai = %f\n",a[N-1]);
}
LIKWID_MARKER_STOP("BENCH");
E = getTimeStamp();
return E-S;
}

View File

@@ -0,0 +1,62 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdio.h>
#include <timing.h>
#include <allocate.h>
double striad_tp(
double * restrict a,
const double * restrict b,
const double * restrict c,
const double * restrict d,
int N,
int iter
)
{
double S, E;
#pragma omp parallel
{
double* al = (double*) allocate( ARRAY_ALIGNMENT, N * sizeof(double));
#pragma omp single
S = getTimeStamp();
for(int j = 0; j < iter; j++) {
for (int i=0; i<N; i++) {
al[i] = b[i] + d[i] * c[i];
}
if (al[N-1] > 2000) printf("Ai = %f\n",al[N-1]);
}
#pragma omp single
E = getTimeStamp();
}
return E-S;
}

View File

@@ -0,0 +1,57 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdio.h>
#include <timing.h>
double striad_ws(
double * restrict a,
const double * restrict b,
const double * restrict c,
const double * restrict d,
int N,
int iter
)
{
double S, E;
S = getTimeStamp();
#pragma omp parallel
{
for(int j = 0; j < iter; j++) {
#pragma omp for
for (int i=0; i<N; i++) {
a[i] = b[i] + d[i] * c[i];
}
if (a[N-1] > 2000) printf("Ai = %f\n",a[N-1]);
}
}
E = getTimeStamp();
return E-S;
}

View File

@@ -0,0 +1,49 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#include <stdlib.h>
#include <time.h>
double getTimeStamp()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeResolution()
{
struct timespec ts;
clock_getres(CLOCK_MONOTONIC, &ts);
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
}
double getTimeStamp_()
{
return getTimeStamp();
}

View File

@@ -0,0 +1,35 @@
/*
* =======================================================================================
*
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* =======================================================================================
*/
#ifndef __TIMING_H_
#define __TIMING_H_
extern double getTimeStamp();
extern double getTimeResolution();
extern double getTimeStamp_();
#endif

115
README.md
View File

@@ -1,117 +1,8 @@
# The Bandwidth Benchmark
This is a collection of simple streaming kernels for teaching purposes.
It is heavily inspired by John McCalpin's https://www.cs.virginia.edu/stream/ benchmark.
It contains the following streaming kernels with corresponding data access pattern (Notation: S - store, L - load, WA - write allocate). All variables are vectors, s is a scalar:
It consists of two banchmark applications:
* init (S1, WA): Initilize an array: `a = s`. Store only.
* sum (L1): Vector reduction: `s += a`. Load only.
* copy (L1, S1, WA): Classic memcopy: `a = b`.
* update (L1, S1): Update vector: `a = a * scalar`. Also load + store but without write allocate.
* triad (L2, S1, WA): Stream triad: `a = b + c * scalar`.
* daxpy (L2, S1): Daxpy: `a = a + b * scalar`.
* striad (L3, S1, WA): Schoenauer triad: `a = b + c * d`.
* sdaxpy (L3, S1): Schoenauer triad without write allocate: `a = a + b * c`.
As added benefit the code is a blueprint for a minimal benchmarking application with a generic makefile and modules for aligned array allocation, accurate timing and affinity settings. Those components can be used standalone in your own project.
## Build
1. Configure the toolchain and additional options in `config.mk`:
```
# Supported: GCC, CLANG, ICC
TAG ?= GCC
ENABLE_OPENMP ?= false
OPTIONS = -DSIZE=40000000ull
OPTIONS += -DNTIMES=10
OPTIONS += -DARRAY_ALIGNMENT=64
#OPTIONS += -DVERBOSE_AFFINITY
#OPTIONS += -DVERBOSE_DATASIZE
#OPTIONS += -DVERBOSE_TIMER
```
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
2. Build with:
```
make
```
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set. Intermediate build results are located in the `<TOOLCHAIN>` directory.
To output the executed commands use:
```
make Q=
```
3. Clean up with:
```
make clean
```
to clean intermediate build results.
```
make distclean
```
to clean intermediate build results and binary.
4. (Optional) Generate assembler:
```
make asm
```
The assembler files will also be located in the `<TOOLCHAIN>` directory.
## Usage
To run the benchmark call:
```
./bwBench-<TOOLCHAIN>
```
The benchmark will output the results similar to the stream benchmark. Results are validated.
For threaded execution it is recommended to control thread affinity.
We recommend to use likwid-pin for benchmarking:
```
likwid-pin -c 0-3 ./bwbench-GCC
```
Example output for threaded execution:
```
-------------------------------------------------------------
[pthread wrapper]
[pthread wrapper] MAIN -> 0
[pthread wrapper] PIN_MASK: 0->1 1->2 2->3
[pthread wrapper] SKIP MASK: 0x0
threadid 140271463495424 -> core 1 - OK
threadid 140271455102720 -> core 2 - OK
threadid 140271446710016 -> core 3 - OK
OpenMP enabled, running with 4 threads
----------------------------------------------------------------------------
Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time
Init: 22111.53 - 0.0148 0.0145 0.0165
Sum: 46808.59 46808.59 0.0077 0.0068 0.0140
Copy: 30983.06 - 0.0207 0.0207 0.0208
Update: 43778.69 21889.34 0.0147 0.0146 0.0148
Triad: 34476.64 22984.43 0.0282 0.0278 0.0305
Daxpy: 45908.82 30605.88 0.0214 0.0209 0.0242
STriad: 37502.37 18751.18 0.0349 0.0341 0.0388
SDaxpy: 46822.63 23411.32 0.0281 0.0273 0.0325
----------------------------------------------------------------------------
Solution Validates
```
A perl wrapper script (bench.pl) is also provided to scan ranges of thread counts and determine the absolute highest sustained main memory bandwidth. In order to use it `likwid-pin` has to be in your path. The script has three required and one optional command line arguments:
```
$./bench.pl <executable> <thread count range> <repititions> [<SMT setting>]
```
Example usage:
```
$./bench.pl ./bwbench-GCC 2-8 6
```
The script will always use physical cores only, where two SMT threads is the default. For different SMT thread counts use the 4th command line argument. Example for a processor without SMT:
```
$./bench.pl ./bwbench-GCC 14-24 10 1
```
* [MainMemory](https://github.com/RRZE-HPC/TheBandwidthBenchmark/wiki/MainMemory)
* [MemoryHierarchy](https://github.com/RRZE-HPC/TheBandwidthBenchmark/wiki/MemoryHierarchy)