/* * ======================================================================================= * * Author: Jan Eitzinger (je), jan.eitzinger@fau.de * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * ======================================================================================= */ #define _GNU_SOURCE #include #include #include #include #include #include #ifdef _OPENMP #include #endif #include #define SIZE 120000000ull #define NTIMES 5 #define ARRAY_ALIGNMENT 64 #define HLINE "----------------------------------------------------------------------------\n" #ifndef MIN #define MIN(x,y) ((x)<(y)?(x):(y)) #endif #ifndef MAX #define MAX(x,y) ((x)>(y)?(x):(y)) #endif #ifndef ABS #define ABS(a) ((a) >= 0 ? (a) : -(a)) #endif #define LIKWID_PROFILE(tag,call) \ _Pragma ("omp parallel") \ {LIKWID_MARKER_START(#tag);} \ times[tag][k] = call; \ _Pragma ("omp parallel") \ {LIKWID_MARKER_STOP(#tag);} typedef enum benchmark { INIT = 0, COPY, UPDATE, TRIAD, DAXPY, STRIAD, SDAXPY, NUMBENCH } benchmark; typedef struct { char* label; int words; int flops; } benchmarkType; extern double init(double*, double, int); extern double copy(double*, double*, int); extern double update(double*, double, int); extern double triad(double*, double*, double*, double, int); extern double daxpy(double*, double*, double, int); extern double striad(double*, double*, double*, double*, int); extern double sdaxpy(double*, double*, double*, int); extern void check(double*, double*, double*, double*, int); extern double getTimeStamp(); int main (int argc, char** argv) { size_t bytesPerWord = sizeof(double); size_t N = SIZE; double *a, *b, *c, *d; double scalar, tmp; double E, S; double avgtime[NUMBENCH], maxtime[NUMBENCH], mintime[NUMBENCH]; double times[NUMBENCH][NTIMES]; benchmarkType benchmarks[NUMBENCH] = { {"Init: ", 1, 0}, {"Copy: ", 2, 0}, {"Update: ", 2, 1}, {"Triad: ", 3, 2}, {"Daxpy: ", 3, 2}, {"STriad: ", 4, 2}, {"SDaxpy: ", 4, 2} }; LIKWID_MARKER_INIT; #pragma omp parallel { LIKWID_MARKER_REGISTER("INIT"); LIKWID_MARKER_REGISTER("COPY"); LIKWID_MARKER_REGISTER("UPDATE"); LIKWID_MARKER_REGISTER("TRIAD"); LIKWID_MARKER_REGISTER("DAXPY"); LIKWID_MARKER_REGISTER("STRIAD"); LIKWID_MARKER_REGISTER("SDAXPY"); } posix_memalign((void**) &a, ARRAY_ALIGNMENT, N * bytesPerWord ); posix_memalign((void**) &b, ARRAY_ALIGNMENT, N * bytesPerWord ); posix_memalign((void**) &c, ARRAY_ALIGNMENT, N * bytesPerWord ); posix_memalign((void**) &d, ARRAY_ALIGNMENT, N * bytesPerWord ); for (int i=0; i 0){ printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label, 1.0E-06 * bytes/mintime[j], 1.0E-06 * flops/mintime[j], avgtime[j], mintime[j], maxtime[j]); } else { printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label, 1.0E-06 * bytes/mintime[j], avgtime[j], mintime[j], maxtime[j]); } } printf(HLINE); check(a, b, c, d, N); LIKWID_MARKER_CLOSE; return EXIT_SUCCESS; } void check( double * a, double * b, double * c, double * d, int N ) { double aj, bj, cj, dj, scalar; double asum, bsum, csum, dsum; double epsilon; /* reproduce initialization */ aj = 2.0; bj = 2.0; cj = 0.5; dj = 1.0; /* now execute timing loop */ scalar = 3.0; for (int k=0; k epsilon) { printf ("Failed Validation on array a[]\n"); printf (" Expected : %f \n",aj); printf (" Observed : %f \n",asum); } else if (ABS(bj-bsum)/bsum > epsilon) { printf ("Failed Validation on array b[]\n"); printf (" Expected : %f \n",bj); printf (" Observed : %f \n",bsum); } else if (ABS(cj-csum)/csum > epsilon) { printf ("Failed Validation on array c[]\n"); printf (" Expected : %f \n",cj); printf (" Observed : %f \n",csum); } else if (ABS(dj-dsum)/dsum > epsilon) { printf ("Failed Validation on array d[]\n"); printf (" Expected : %f \n",dj); printf (" Observed : %f \n",dsum); } else { printf ("Solution Validates\n"); } } double getTimeStamp() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9; } double init( double * restrict a, double scalar, int N ) { double S, E; S = getTimeStamp(); #pragma omp parallel for schedule(static) for (int i=0; i