Add affinity module.
Enable configuration in config.mk file. Introduce timer resolution report.
This commit is contained in:
4
Makefile
4
Makefile
@@ -10,6 +10,7 @@ Q ?= @
|
|||||||
|
|
||||||
#DO NOT EDIT BELOW
|
#DO NOT EDIT BELOW
|
||||||
include $(MAKE_DIR)/include_$(TAG).mk
|
include $(MAKE_DIR)/include_$(TAG).mk
|
||||||
|
include $(MAKE_DIR)/config.mk
|
||||||
INCLUDES += -I./src/includes
|
INCLUDES += -I./src/includes
|
||||||
|
|
||||||
VPATH = $(SRC_DIR)
|
VPATH = $(SRC_DIR)
|
||||||
@@ -20,7 +21,7 @@ OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*
|
|||||||
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
|
OBJ += $(patsubst $(SRC_DIR)/%.cpp, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cpp))
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
|
OBJ += $(patsubst $(SRC_DIR)/%.f90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.f90))
|
||||||
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
|
OBJ += $(patsubst $(SRC_DIR)/%.F90, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.F90))
|
||||||
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES)
|
CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(OPTIONS) $(INCLUDES)
|
||||||
|
|
||||||
|
|
||||||
${TARGET}: $(BUILD_DIR) $(OBJ)
|
${TARGET}: $(BUILD_DIR) $(OBJ)
|
||||||
@@ -64,7 +65,6 @@ tags:
|
|||||||
@echo "===> GENERATE TAGS"
|
@echo "===> GENERATE TAGS"
|
||||||
$(Q)ctags -R
|
$(Q)ctags -R
|
||||||
|
|
||||||
|
|
||||||
$(BUILD_DIR):
|
$(BUILD_DIR):
|
||||||
@mkdir $(BUILD_DIR)
|
@mkdir $(BUILD_DIR)
|
||||||
|
|
||||||
|
|||||||
6
config.mk
Normal file
6
config.mk
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
OPTIONS = -DSIZE=40000000ull
|
||||||
|
OPTIONS += -DNTIMES=10
|
||||||
|
OPTIONS += -DARRAY_ALIGNMENT=64
|
||||||
|
#OPTIONS += -DVERBOSE_AFFINITY
|
||||||
|
#OPTIONS += -DVERBOSE_DATASIZE
|
||||||
|
#OPTIONS += -DVERBOSE_TIMER
|
||||||
85
src/affinity.c
Normal file
85
src/affinity.c
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.#include <stdlib.h>
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
#define MAX_NUM_THREADS 128
|
||||||
|
#define gettid() syscall(SYS_gettid)
|
||||||
|
|
||||||
|
static int
|
||||||
|
getProcessorID(cpu_set_t* cpu_set)
|
||||||
|
{
|
||||||
|
int processorId;
|
||||||
|
|
||||||
|
for ( processorId = 0; processorId < MAX_NUM_THREADS; processorId++ )
|
||||||
|
{
|
||||||
|
if ( CPU_ISSET(processorId,cpu_set) )
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return processorId;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
affinity_getProcessorId()
|
||||||
|
{
|
||||||
|
cpu_set_t cpu_set;
|
||||||
|
CPU_ZERO(&cpu_set);
|
||||||
|
sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
|
||||||
|
|
||||||
|
return getProcessorID(&cpu_set);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
affinity_pinThread(int processorId)
|
||||||
|
{
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
pthread_t thread;
|
||||||
|
|
||||||
|
thread = pthread_self();
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
CPU_SET(processorId, &cpuset);
|
||||||
|
pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
affinity_pinProcess(int processorId)
|
||||||
|
{
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
CPU_SET(processorId, &cpuset);
|
||||||
|
sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
|
||||||
|
}
|
||||||
36
src/includes/affinity.h
Normal file
36
src/includes/affinity.h
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* =======================================================================================
|
||||||
|
*
|
||||||
|
* Author: Jan Eitzinger (je), jan.treibig@gmail.com
|
||||||
|
* Copyright (c) 2019 RRZE, University Erlangen-Nuremberg
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.#include <stdlib.h>
|
||||||
|
*
|
||||||
|
* =======================================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef AFFINITY_H
|
||||||
|
#define AFFINITY_H
|
||||||
|
|
||||||
|
extern int affinity_getProcessorId();
|
||||||
|
extern void affinity_pinProcess(int);
|
||||||
|
extern void affinity_pinThread(int);
|
||||||
|
|
||||||
|
#endif /*AFFINITY_H*/
|
||||||
|
|
||||||
83
src/main.c
83
src/main.c
@@ -35,13 +35,11 @@
|
|||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <timing.h>
|
||||||
#include <allocate.h>
|
#include <allocate.h>
|
||||||
|
#include <affinity.h>
|
||||||
|
|
||||||
#define ARRAY_ALIGNMENT 64
|
#define HLINE "----------------------------------------------------------------------------\n"
|
||||||
#define SIZE 20000000ull
|
|
||||||
#define NTIMES 10
|
|
||||||
|
|
||||||
# define HLINE "-------------------------------------------------------------\n"
|
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(x,y) ((x)<(y)?(x):(y))
|
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||||
@@ -65,6 +63,12 @@ typedef enum benchmark {
|
|||||||
NUMBENCH
|
NUMBENCH
|
||||||
} benchmark;
|
} benchmark;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char* label;
|
||||||
|
int words;
|
||||||
|
int flops;
|
||||||
|
} benchmarkType;
|
||||||
|
|
||||||
extern double init(double*, double, int);
|
extern double init(double*, double, int);
|
||||||
extern double sum(double*, int);
|
extern double sum(double*, int);
|
||||||
extern double copy(double*, double*, int);
|
extern double copy(double*, double*, int);
|
||||||
@@ -82,6 +86,7 @@ int main (int argc, char** argv)
|
|||||||
size_t N = SIZE;
|
size_t N = SIZE;
|
||||||
double *a, *b, *c, *d;
|
double *a, *b, *c, *d;
|
||||||
double scalar, tmp;
|
double scalar, tmp;
|
||||||
|
double E, S;
|
||||||
|
|
||||||
double avgtime[NUMBENCH],
|
double avgtime[NUMBENCH],
|
||||||
maxtime[NUMBENCH],
|
maxtime[NUMBENCH],
|
||||||
@@ -89,33 +94,29 @@ int main (int argc, char** argv)
|
|||||||
|
|
||||||
double times[NUMBENCH][NTIMES];
|
double times[NUMBENCH][NTIMES];
|
||||||
|
|
||||||
double bytes[NUMBENCH] = {
|
benchmarkType benchmarks[NUMBENCH] = {
|
||||||
1 * sizeof(double) * N, /* init */
|
{"Init: ", 1, 0},
|
||||||
1 * sizeof(double) * N, /* sum */
|
{"Sum: ", 1, 1},
|
||||||
2 * sizeof(double) * N, /* copy */
|
{"Copy: ", 2, 0},
|
||||||
2 * sizeof(double) * N, /* update */
|
{"Update: ", 2, 1},
|
||||||
3 * sizeof(double) * N, /* triad */
|
{"Triad: ", 3, 2},
|
||||||
3 * sizeof(double) * N, /* daxpy */
|
{"Daxpy: ", 3, 2},
|
||||||
4 * sizeof(double) * N, /* striad */
|
{"STriad: ", 4, 2},
|
||||||
4 * sizeof(double) * N /* sdaxpy */
|
{"SDaxpy: ", 4, 2}
|
||||||
};
|
};
|
||||||
|
|
||||||
char *label[NUMBENCH] = {
|
|
||||||
"Init: ",
|
|
||||||
"Sum: ",
|
|
||||||
"Copy: ",
|
|
||||||
"Update: ",
|
|
||||||
"Triad: ",
|
|
||||||
"Daxpy: ",
|
|
||||||
"STriad: ",
|
|
||||||
"SDaxpy: "};
|
|
||||||
|
|
||||||
a = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
a = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
b = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
b = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
c = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
c = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
d = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
d = (double*) allocate( ARRAY_ALIGNMENT, N * bytesPerWord );
|
||||||
|
|
||||||
|
printf(HLINE);
|
||||||
|
printf ("Total allocated datasize: %8.2f MB\n", 4.0 * bytesPerWord * N * 1.0E-06);
|
||||||
|
|
||||||
for (int i=0; i<NUMBENCH; i++) {
|
for (int i=0; i<NUMBENCH; i++) {
|
||||||
|
#ifdef VERBOSE_DATASIZE
|
||||||
|
printf ("\t%s: %8.2f MB\n", benchmarks[i].label, benchmarks[i].words * bytesPerWord * N * 1.0E-06);
|
||||||
|
#endif
|
||||||
avgtime[i] = 0;
|
avgtime[i] = 0;
|
||||||
maxtime[i] = 0;
|
maxtime[i] = 0;
|
||||||
mintime[i] = FLT_MAX;
|
mintime[i] = FLT_MAX;
|
||||||
@@ -126,12 +127,18 @@ int main (int argc, char** argv)
|
|||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
{
|
{
|
||||||
int k = omp_get_num_threads();
|
int k = omp_get_num_threads();
|
||||||
|
int i = omp_get_thread_num();
|
||||||
|
|
||||||
#pragma omp single
|
#pragma omp single
|
||||||
printf ("OpenMP enabled, running with %d threads\n", k);
|
printf ("OpenMP enabled, running with %d threads\n", k);
|
||||||
|
|
||||||
|
#ifdef VERBOSE_AFFINITY
|
||||||
|
printf ("\tThread %d running on processor %d\n", i, affinity_getProcessorId());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
S = getTimeStamp();
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int i=0; i<N; i++) {
|
for (int i=0; i<N; i++) {
|
||||||
a[i] = 2.0;
|
a[i] = 2.0;
|
||||||
@@ -139,6 +146,11 @@ int main (int argc, char** argv)
|
|||||||
c[i] = 0.5;
|
c[i] = 0.5;
|
||||||
d[i] = 1.0;
|
d[i] = 1.0;
|
||||||
}
|
}
|
||||||
|
E = getTimeStamp();
|
||||||
|
#ifdef VERBOSE_TIMER
|
||||||
|
printf ("Timer resolution %.2e ", getTimeResolution());
|
||||||
|
printf ("Ticks used %.0e\n", (E-S) / getTimeResolution());
|
||||||
|
#endif
|
||||||
|
|
||||||
scalar = 3.0;
|
scalar = 3.0;
|
||||||
|
|
||||||
@@ -164,15 +176,26 @@ int main (int argc, char** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
printf("Function Rate (MB/s) Avg time Min time Max time\n");
|
printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n");
|
||||||
for (int j=0; j<NUMBENCH; j++) {
|
for (int j=0; j<NUMBENCH; j++) {
|
||||||
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
|
avgtime[j] = avgtime[j]/(double)(NTIMES-1);
|
||||||
|
double bytes = (double) benchmarks[j].words * sizeof(double) * N;
|
||||||
|
double flops = (double) benchmarks[j].flops * sizeof(double) * N;
|
||||||
|
|
||||||
printf("%s%11.4f %11.4f %11.4f %11.4f\n", label[j],
|
if (flops > 0){
|
||||||
1.0E-06 * bytes[j]/mintime[j],
|
printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
||||||
avgtime[j],
|
1.0E-06 * bytes/mintime[j],
|
||||||
mintime[j],
|
1.0E-06 * flops/mintime[j],
|
||||||
maxtime[j]);
|
avgtime[j],
|
||||||
|
mintime[j],
|
||||||
|
maxtime[j]);
|
||||||
|
} else {
|
||||||
|
printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label,
|
||||||
|
1.0E-06 * bytes/mintime[j],
|
||||||
|
avgtime[j],
|
||||||
|
mintime[j],
|
||||||
|
maxtime[j]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
printf(HLINE);
|
printf(HLINE);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user