Merge branch 'master' of github.com:RRZE-HPC/TheBandwidthBenchmark
This commit is contained in:
@@ -180,7 +180,7 @@ int main (int argc, char** argv)
|
|||||||
for (int j=0; j<NUMBENCH; j++) {
|
for (int j=0; j<NUMBENCH; j++) {
|
||||||
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
|
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
|
||||||
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
|
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
|
||||||
double flops = (double) benchmarks[j].flops * sizeof(double) * N;
|
double flops = (double) benchmarks[j].flops * N;
|
||||||
|
|
||||||
if (flops > 0){
|
if (flops > 0){
|
||||||
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
||||||
45
MemoryHierarchy/Makefile
Normal file
45
MemoryHierarchy/Makefile
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
#CONFIGURE TOOL CHAIN
|
||||||
|
CC = gcc
|
||||||
|
OPENMP = -fopenmp
|
||||||
|
#CFLAGS = -O3 -xHost -qopt-zmm-usage=high -std=c99 $(OPENMP)
|
||||||
|
CFLAGS = -O3 -std=c99 $(OPENMP)
|
||||||
|
LFLAGS = $(OPENMP)
|
||||||
|
DEFINES = -D_GNU_SOURCE
|
||||||
|
#DEFINES += -DVERBOSE
|
||||||
|
#DEFINES += -DLIKWID_PERFMON
|
||||||
|
DEFINES += -DNTIMES=5
|
||||||
|
DEFINES += -DARRAY_ALIGNMENT=64
|
||||||
|
|
||||||
|
#CONFIGURE BUILD SYSTEM
|
||||||
|
TARGET = striad
|
||||||
|
BUILD_DIR = ./build
|
||||||
|
SRC_DIR = ./src
|
||||||
|
INCLUDES += -I./src/
|
||||||
|
|
||||||
|
#DO NOT EDIT BELOW
|
||||||
|
VPATH = $(SRC_DIR)
|
||||||
|
ASM = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.s,$(wildcard $(SRC_DIR)/*.c))
|
||||||
|
OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
|
||||||
|
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES)
|
||||||
|
|
||||||
|
${TARGET}: $(BUILD_DIR) $(OBJ)
|
||||||
|
${CC} ${LFLAGS} -o $(TARGET) $(OBJ) $(LIBS)
|
||||||
|
|
||||||
|
asm: $(BUILD_DIR) $(ASM)
|
||||||
|
|
||||||
|
$(BUILD_DIR)/%.o: %.c
|
||||||
|
$(CC) -c $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
|
$(BUILD_DIR)/%.s: %.c
|
||||||
|
$(CC) -S $(CPPFLAGS) $(CFLAGS) $< -o $@
|
||||||
|
|
||||||
|
$(BUILD_DIR):
|
||||||
|
@mkdir $(BUILD_DIR)
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@echo "===> CLEAN"
|
||||||
|
@rm -rf $(BUILD_DIR)
|
||||||
|
@rm -f $(TARGET)
|
||||||
|
|
||||||
38
MemoryHierarchy/bench.pl
Executable file
38
MemoryHierarchy/bench.pl
Executable file
@@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
if ( $#ARGV < 2 ){
|
||||||
|
print "Usage: ./bench.pl <numcores> <seq|tp|ws> <SMT>\n";
|
||||||
|
exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $numCores = $ARGV[0];
|
||||||
|
my $type = 0;
|
||||||
|
my $SMT = $ARGV[2] ? $ARGV[2] : 2;
|
||||||
|
my $N = 100;
|
||||||
|
|
||||||
|
|
||||||
|
if ( $ARGV[1] eq 'seq' ){
|
||||||
|
$type = 0;
|
||||||
|
} elsif ( $ARGV[1] eq 'tp' ){
|
||||||
|
$type = 1;
|
||||||
|
} elsif ( $ARGV[1] eq 'ws' ){
|
||||||
|
$type = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
while ( $N < 8000000 ) {
|
||||||
|
my $result;
|
||||||
|
my $performance = '0.00';
|
||||||
|
|
||||||
|
while ( $performance eq '0.00' ){
|
||||||
|
$result = `likwid-pin -c E:S0:$numCores:1:$SMT -q ./striad $type $N`;
|
||||||
|
$result =~ /([0-9.]+) ([0-9.]+)/;
|
||||||
|
$performance = $2;
|
||||||
|
}
|
||||||
|
|
||||||
|
print $result;
|
||||||
|
$N = int($N * 1.2);
|
||||||
|
}
|
||||||
89
MemoryHierarchy/src/affinity.c
Normal file
89
MemoryHierarchy/src/affinity.c
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
|
||||||
|
#define MAX_NUM_THREADS 128
|
||||||
|
#define gettid() syscall(SYS_gettid)
|
||||||
|
|
||||||
|
static int
|
||||||
|
getProcessorID(cpu_set_t* cpu_set)
|
||||||
|
{
|
||||||
|
int processorId;
|
||||||
|
|
||||||
|
for ( processorId = 0; processorId < MAX_NUM_THREADS; processorId++ )
|
||||||
|
{
|
||||||
|
if ( CPU_ISSET(processorId,cpu_set) )
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return processorId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
affinity_getProcessorId()
|
||||||
|
{
|
||||||
|
cpu_set_t cpu_set;
|
||||||
|
CPU_ZERO(&cpu_set);
|
||||||
|
sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
|
||||||
|
|
||||||
|
return getProcessorID(&cpu_set);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
affinity_pinThread(int processorId)
|
||||||
|
{
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
pthread_t thread;
|
||||||
|
|
||||||
|
thread = pthread_self();
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
CPU_SET(processorId, &cpuset);
|
||||||
|
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
affinity_pinProcess(int processorId)
|
||||||
|
{
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
CPU_SET(processorId, &cpuset);
|
||||||
|
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
|
||||||
|
}
|
||||||
|
#endif /*__linux__*/
|
||||||
|
#endif /*_OPENMP*/
|
||||||
36
MemoryHierarchy/src/affinity.h
Normal file
36
MemoryHierarchy/src/affinity.h
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AFFINITY_H
|
||||||
|
#define AFFINITY_H
|
||||||
|
|
||||||
|
extern int affinity_getProcessorId();
|
||||||
|
extern void affinity_pinProcess(int);
|
||||||
|
extern void affinity_pinThread(int);
|
||||||
|
|
||||||
|
#endif /*AFFINITY_H*/
|
||||||
|
|
||||||
58
MemoryHierarchy/src/allocate.c
Normal file
58
MemoryHierarchy/src/allocate.c
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
|
void* allocate (int alignment, size_t bytesize)
|
||||||
|
{
|
||||||
|
int errorCode;
|
||||||
|
void* ptr;
|
||||||
|
|
||||||
|
errorCode = posix_memalign(&ptr, alignment, bytesize);
|
||||||
|
|
||||||
|
if (errorCode) {
|
||||||
|
if (errorCode == EINVAL) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Error: Alignment parameter is not a power of two\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
if (errorCode == ENOMEM) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Error: Insufficient memory to fulfill the request\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ptr == NULL) {
|
||||||
|
fprintf(stderr, "Error: posix_memalign failed!\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
33
MemoryHierarchy/src/allocate.h
Normal file
33
MemoryHierarchy/src/allocate.h
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ALLOCATE_H_
|
||||||
|
#define __ALLOCATE_H_
|
||||||
|
|
||||||
|
extern void* allocate (int alignment, size_t bytesize);
|
||||||
|
|
||||||
|
#endif
|
||||||
44
MemoryHierarchy/src/likwid_markers.h
Normal file
44
MemoryHierarchy/src/likwid_markers.h
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LIKWID_MARKERS_H
|
||||||
|
#define LIKWID_MARKERS_H
|
||||||
|
|
||||||
|
#ifdef LIKWID_PERFMON
|
||||||
|
#include <likwid.h>
|
||||||
|
#else
|
||||||
|
#define LIKWID_MARKER_INIT
|
||||||
|
#define LIKWID_MARKER_THREADINIT
|
||||||
|
#define LIKWID_MARKER_SWITCH
|
||||||
|
#define LIKWID_MARKER_REGISTER(regionTag)
|
||||||
|
#define LIKWID_MARKER_START(regionTag)
|
||||||
|
#define LIKWID_MARKER_STOP(regionTag)
|
||||||
|
#define LIKWID_MARKER_CLOSE
|
||||||
|
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /*LIKWID_MARKERS_H*/
|
||||||
216
MemoryHierarchy/src/main.c
Normal file
216
MemoryHierarchy/src/main.c
Normal file
@@ -0,0 +1,216 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <float.h>
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <likwid_markers.h>
|
||||||
|
#include <timing.h>
|
||||||
|
#include <allocate.h>
|
||||||
|
#include <affinity.h>
|
||||||
|
|
||||||
|
#define HLINE "----------------------------------------------------------------------------\n"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef MIN
|
||||||
|
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||||
|
#endif
|
||||||
|
#ifndef MAX
|
||||||
|
#define MAX(x,y) ((x)>(y)?(x):(y))
|
||||||
|
#endif
|
||||||
|
#ifndef ABS
|
||||||
|
#define ABS(a) ((a) >= 0 ? (a) : -(a))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern double striad_seq(double*, double*, double*, double*, int, int);
|
||||||
|
extern double striad_tp(double*, double*, double*, double*, int, int);
|
||||||
|
extern double striad_ws(double*, double*, double*, double*, int, int);
|
||||||
|
|
||||||
|
typedef double (*testFunc)(double*, double*, double*, double*, int, int);
|
||||||
|
|
||||||
|
int main (int argc, char** argv)
|
||||||
|
{
|
||||||
|
size_t bytesPerWord = sizeof(double);
|
||||||
|
size_t N;
|
||||||
|
int type;
|
||||||
|
size_t iter = 1;
|
||||||
|
size_t scale = 1;
|
||||||
|
double *a, *b, *c, *d;
|
||||||
|
double E, S;
|
||||||
|
double avgtime, maxtime, mintime;
|
||||||
|
double times[NTIMES];
|
||||||
|
double dataSize;
|
||||||
|
testFunc func;
|
||||||
|
char* testname;
|
||||||
|
|
||||||
|
|
||||||
|
if ( argc > 2 ) {
|
||||||
|
type = atoi(argv[1]);
|
||||||
|
N = atoi(argv[2]);
|
||||||
|
} else {
|
||||||
|
printf("Usage: %s <test type> <N>\n",argv[0]);
|
||||||
|
printf("Test types: 0 - sequential, 1 - OpenMP throughput, 2 - OpenMP worksharing\n");
|
||||||
|
exit(EXIT_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
LIKWID_MARKER_INIT;
|
||||||
|
|
||||||
|
switch ( type ) {
|
||||||
|
case 0:
|
||||||
|
func = striad_seq;
|
||||||
|
testname = "striad_seq";
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
func = striad_tp;
|
||||||
|
testname = "striad_tp";
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp single
|
||||||
|
scale = omp_get_num_threads();
|
||||||
|
|
||||||
|
|
||||||
|
LIKWID_MARKER_REGISTER("BENCH");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
func = striad_ws;
|
||||||
|
testname = "striad_ws";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printf("Unknown test type: %d\n", type);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
a = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
|
b = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
|
c = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
|
d = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
printf(HLINE);
|
||||||
|
dataSize = 4.0 * bytesPerWord * N;
|
||||||
|
|
||||||
|
if ( dataSize < 1.0E06 ) {
|
||||||
|
printf ("Total allocated datasize: %8.2f KB\n", dataSize * 1.0E-03);
|
||||||
|
} else {
|
||||||
|
printf ("Total allocated datasize: %8.2f MB\n", dataSize * 1.0E-06);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
avgtime = 0;
|
||||||
|
maxtime = 0;
|
||||||
|
mintime = FLT_MAX;
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
#ifdef _OPENMP
|
||||||
|
printf(HLINE);
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
int k = omp_get_num_threads();
|
||||||
|
int i = omp_get_thread_num();
|
||||||
|
|
||||||
|
#pragma omp single
|
||||||
|
printf ("OpenMP enabled, running with %d threads\n", k);
|
||||||
|
|
||||||
|
printf ("\tThread %d running on processor %d\n", i, affinity_getProcessorId());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
S = getTimeStamp();
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i=0; i<N; i++) {
|
||||||
|
a[i] = 2.0;
|
||||||
|
b[i] = 1.0;
|
||||||
|
c[i] = 0.8;
|
||||||
|
d[i] = 1.01;
|
||||||
|
}
|
||||||
|
E = getTimeStamp();
|
||||||
|
#ifdef VERBOSE
|
||||||
|
printf ("Timer resolution %.2e ", getTimeResolution());
|
||||||
|
printf ("Ticks used %.0e\n", (E-S) / getTimeResolution());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
iter = 5;
|
||||||
|
times[0] = 0.0;
|
||||||
|
times[1] = 0.0;
|
||||||
|
|
||||||
|
while ( times[0] < 0.2 ){
|
||||||
|
times[0] = func(a, b, c, d, N, iter);
|
||||||
|
if ( times[0] > 0.1 ) break;
|
||||||
|
double factor = 0.2 / (times[0] - times[1]);
|
||||||
|
iter *= (int) factor;
|
||||||
|
times[1] = times[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
printf ("Using %d iterations \n", iter);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for ( int k=0; k < NTIMES; k++) {
|
||||||
|
times[k] = func(a, b, c, d, N, iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int k=1; k<NTIMES; k++) {
|
||||||
|
avgtime = avgtime + times[k];
|
||||||
|
mintime = MIN(mintime, times[k]);
|
||||||
|
maxtime = MAX(maxtime, times[k]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef VERBOSE
|
||||||
|
printf(HLINE);
|
||||||
|
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
|
||||||
|
avgtime = avgtime/(double)(NTIMES-1);
|
||||||
|
double bytes = (double) 4.0 * sizeof(double) * N * iter * scale;
|
||||||
|
double flops = (double) 2.0 * N * iter * scale;
|
||||||
|
|
||||||
|
printf("%s %11.2f %11.2f %11.4f %11.4f %11.4f\n",
|
||||||
|
testname,
|
||||||
|
1.0E-06 * bytes/mintime,
|
||||||
|
1.0E-06 * flops/mintime,
|
||||||
|
avgtime,
|
||||||
|
mintime,
|
||||||
|
maxtime);
|
||||||
|
printf("Flops %e\n", flops);
|
||||||
|
printf(HLINE);
|
||||||
|
#else
|
||||||
|
double flops = (double) 2 * N * iter * scale;
|
||||||
|
printf("%d %.2f\n", N, 1.0E-06 * flops/mintime);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LIKWID_MARKER_CLOSE;
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
57
MemoryHierarchy/src/striad_seq.c
Normal file
57
MemoryHierarchy/src/striad_seq.c
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <timing.h>
|
||||||
|
#include <likwid_markers.h>
|
||||||
|
|
||||||
|
double striad_seq(
|
||||||
|
double * restrict a,
|
||||||
|
const double * restrict b,
|
||||||
|
const double * restrict c,
|
||||||
|
const double * restrict d,
|
||||||
|
int N,
|
||||||
|
int iter
|
||||||
|
)
|
||||||
|
{
|
||||||
|
double S, E;
|
||||||
|
|
||||||
|
S = getTimeStamp();
|
||||||
|
LIKWID_MARKER_START("BENCH");
|
||||||
|
for(int j = 0; j < iter; j++) {
|
||||||
|
for (int i=0; i<N; i++) {
|
||||||
|
a[i] = b[i] + d[i] * c[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a[N-1] > 2000) printf("Ai = %f\n",a[N-1]);
|
||||||
|
}
|
||||||
|
LIKWID_MARKER_STOP("BENCH");
|
||||||
|
E = getTimeStamp();
|
||||||
|
|
||||||
|
return E-S;
|
||||||
|
}
|
||||||
62
MemoryHierarchy/src/striad_tp.c
Normal file
62
MemoryHierarchy/src/striad_tp.c
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <timing.h>
|
||||||
|
#include <allocate.h>
|
||||||
|
|
||||||
|
double striad_tp(
|
||||||
|
double * restrict a,
|
||||||
|
const double * restrict b,
|
||||||
|
const double * restrict c,
|
||||||
|
const double * restrict d,
|
||||||
|
int N,
|
||||||
|
int iter
|
||||||
|
)
|
||||||
|
{
|
||||||
|
double S, E;
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
double* al = (double*) allocate( ARRAY_ALIGNMENT, N * sizeof(double));
|
||||||
|
|
||||||
|
#pragma omp single
|
||||||
|
S = getTimeStamp();
|
||||||
|
for(int j = 0; j < iter; j++) {
|
||||||
|
for (int i=0; i<N; i++) {
|
||||||
|
al[i] = b[i] + d[i] * c[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (al[N-1] > 2000) printf("Ai = %f\n",al[N-1]);
|
||||||
|
}
|
||||||
|
#pragma omp single
|
||||||
|
E = getTimeStamp();
|
||||||
|
}
|
||||||
|
|
||||||
|
return E-S;
|
||||||
|
}
|
||||||
57
MemoryHierarchy/src/striad_ws.c
Normal file
57
MemoryHierarchy/src/striad_ws.c
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <timing.h>
|
||||||
|
|
||||||
|
double striad_ws(
|
||||||
|
double * restrict a,
|
||||||
|
const double * restrict b,
|
||||||
|
const double * restrict c,
|
||||||
|
const double * restrict d,
|
||||||
|
int N,
|
||||||
|
int iter
|
||||||
|
)
|
||||||
|
{
|
||||||
|
double S, E;
|
||||||
|
|
||||||
|
S = getTimeStamp();
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
for(int j = 0; j < iter; j++) {
|
||||||
|
#pragma omp for
|
||||||
|
for (int i=0; i<N; i++) {
|
||||||
|
a[i] = b[i] + d[i] * c[i];
|
||||||
|
}
|
||||||
|
if (a[N-1] > 2000) printf("Ai = %f\n",a[N-1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
E = getTimeStamp();
|
||||||
|
|
||||||
|
return E-S;
|
||||||
|
}
|
||||||
49
MemoryHierarchy/src/timing.c
Normal file
49
MemoryHierarchy/src/timing.c
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
double getTimeStamp()
|
||||||
|
{
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||||
|
}
|
||||||
|
|
||||||
|
double getTimeResolution()
|
||||||
|
{
|
||||||
|
struct timespec ts;
|
||||||
|
clock_getres(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
|
||||||
|
}
|
||||||
|
|
||||||
|
double getTimeStamp_()
|
||||||
|
{
|
||||||
|
return getTimeStamp();
|
||||||
|
}
|
||||||
|
|
||||||
35
MemoryHierarchy/src/timing.h
Normal file
35
MemoryHierarchy/src/timing.h
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __TIMING_H_
|
||||||
|
#define __TIMING_H_
|
||||||
|
|
||||||
|
extern double getTimeStamp();
|
||||||
|
extern double getTimeResolution();
|
||||||
|
extern double getTimeStamp_();
|
||||||
|
|
||||||
|
#endif
|
||||||
115
README.md
115
README.md
@@ -1,117 +1,8 @@
|
|||||||
# The Bandwidth Benchmark
|
# The Bandwidth Benchmark
|
||||||
|
|
||||||
This is a collection of simple streaming kernels for teaching purposes.
|
This is a collection of simple streaming kernels for teaching purposes.
|
||||||
It is heavily inspired by John McCalpin's https://www.cs.virginia.edu/stream/ benchmark.
|
|
||||||
|
|
||||||
It contains the following streaming kernels with corresponding data access pattern (Notation: S - store, L - load, WA - write allocate). All variables are vectors, s is a scalar:
|
It consists of two banchmark applications:
|
||||||
|
|
||||||
* init (S1, WA): Initilize an array: `a = s`. Store only.
|
* [MainMemory](https://github.com/RRZE-HPC/TheBandwidthBenchmark/wiki/MainMemory)
|
||||||
* sum (L1): Vector reduction: `s += a`. Load only.
|
* [MemoryHierarchy](https://github.com/RRZE-HPC/TheBandwidthBenchmark/wiki/MemoryHierarchy)
|
||||||
* copy (L1, S1, WA): Classic memcopy: `a = b`.
|
|
||||||
* update (L1, S1): Update vector: `a = a * scalar`. Also load + store but without write allocate.
|
|
||||||
* triad (L2, S1, WA): Stream triad: `a = b + c * scalar`.
|
|
||||||
* daxpy (L2, S1): Daxpy: `a = a + b * scalar`.
|
|
||||||
* striad (L3, S1, WA): Schoenauer triad: `a = b + c * d`.
|
|
||||||
* sdaxpy (L3, S1): Schoenauer triad without write allocate: `a = a + b * c`.
|
|
||||||
|
|
||||||
As added benefit the code is a blueprint for a minimal benchmarking application with a generic makefile and modules for aligned array allocation, accurate timing and affinity settings. Those components can be used standalone in your own project.
|
|
||||||
|
|
||||||
## Build
|
|
||||||
|
|
||||||
1. Configure the toolchain and additional options in `config.mk`:
|
|
||||||
```
|
|
||||||
# Supported: GCC, CLANG, ICC
|
|
||||||
TAG ?= GCC
|
|
||||||
ENABLE_OPENMP ?= false
|
|
||||||
|
|
||||||
OPTIONS = -DSIZE=40000000ull
|
|
||||||
OPTIONS += -DNTIMES=10
|
|
||||||
OPTIONS += -DARRAY_ALIGNMENT=64
|
|
||||||
#OPTIONS += -DVERBOSE_AFFINITY
|
|
||||||
#OPTIONS += -DVERBOSE_DATASIZE
|
|
||||||
#OPTIONS += -DVERBOSE_TIMER
|
|
||||||
```
|
|
||||||
|
|
||||||
The verbosity options enable detailed output about affinity settings, allocation sizes and timer resolution.
|
|
||||||
|
|
||||||
2. Build with:
|
|
||||||
```
|
|
||||||
make
|
|
||||||
```
|
|
||||||
|
|
||||||
You can build multiple toolchains in the same directory, but notice that the Makefile is only acting on the one currently set. Intermediate build results are located in the `<TOOLCHAIN>` directory.
|
|
||||||
|
|
||||||
To output the executed commands use:
|
|
||||||
```
|
|
||||||
make Q=
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Clean up with:
|
|
||||||
```
|
|
||||||
make clean
|
|
||||||
```
|
|
||||||
to clean intermediate build results.
|
|
||||||
|
|
||||||
```
|
|
||||||
make distclean
|
|
||||||
```
|
|
||||||
to clean intermediate build results and binary.
|
|
||||||
|
|
||||||
4. (Optional) Generate assembler:
|
|
||||||
```
|
|
||||||
make asm
|
|
||||||
```
|
|
||||||
The assembler files will also be located in the `<TOOLCHAIN>` directory.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
To run the benchmark call:
|
|
||||||
```
|
|
||||||
./bwBench-<TOOLCHAIN>
|
|
||||||
```
|
|
||||||
|
|
||||||
The benchmark will output the results similar to the stream benchmark. Results are validated.
|
|
||||||
For threaded execution it is recommended to control thread affinity.
|
|
||||||
|
|
||||||
We recommend to use likwid-pin for benchmarking:
|
|
||||||
```
|
|
||||||
likwid-pin -c 0-3 ./bwbench-GCC
|
|
||||||
```
|
|
||||||
|
|
||||||
Example output for threaded execution:
|
|
||||||
```
|
|
||||||
-------------------------------------------------------------
|
|
||||||
[pthread wrapper]
|
|
||||||
[pthread wrapper] MAIN -> 0
|
|
||||||
[pthread wrapper] PIN_MASK: 0->1 1->2 2->3
|
|
||||||
[pthread wrapper] SKIP MASK: 0x0
|
|
||||||
threadid 140271463495424 -> core 1 - OK
|
|
||||||
threadid 140271455102720 -> core 2 - OK
|
|
||||||
threadid 140271446710016 -> core 3 - OK
|
|
||||||
OpenMP enabled, running with 4 threads
|
|
||||||
----------------------------------------------------------------------------
|
|
||||||
Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time
|
|
||||||
Init: 22111.53 - 0.0148 0.0145 0.0165
|
|
||||||
Sum: 46808.59 46808.59 0.0077 0.0068 0.0140
|
|
||||||
Copy: 30983.06 - 0.0207 0.0207 0.0208
|
|
||||||
Update: 43778.69 21889.34 0.0147 0.0146 0.0148
|
|
||||||
Triad: 34476.64 22984.43 0.0282 0.0278 0.0305
|
|
||||||
Daxpy: 45908.82 30605.88 0.0214 0.0209 0.0242
|
|
||||||
STriad: 37502.37 18751.18 0.0349 0.0341 0.0388
|
|
||||||
SDaxpy: 46822.63 23411.32 0.0281 0.0273 0.0325
|
|
||||||
----------------------------------------------------------------------------
|
|
||||||
Solution Validates
|
|
||||||
```
|
|
||||||
|
|
||||||
A perl wrapper script (bench.pl) is also provided to scan ranges of thread counts and determine the absolute highest sustained main memory bandwidth. In order to use it `likwid-pin` has to be in your path. The script has three required and one optional command line arguments:
|
|
||||||
```
|
|
||||||
$./bench.pl <executable> <thread count range> <repititions> [<SMT setting>]
|
|
||||||
```
|
|
||||||
Example usage:
|
|
||||||
```
|
|
||||||
$./bench.pl ./bwbench-GCC 2-8 6
|
|
||||||
```
|
|
||||||
The script will always use physical cores only, where two SMT threads is the default. For different SMT thread counts use the 4th command line argument. Example for a processor without SMT:
|
|
||||||
```
|
|
||||||
$./bench.pl ./bwbench-GCC 14-24 10 1
|
|
||||||
```
|
|
||||||
|
|||||||
Reference in New Issue
Block a user