diff --git a/util/golang/bwBench.go b/util/golang/bwBench.go new file mode 100644 index 0000000..9bd8608 --- /dev/null +++ b/util/golang/bwBench.go @@ -0,0 +1,266 @@ +/* + * ======================================================================================= + * + * Author: Jan Eitzinger (je), jan.eitzinger@fau.de + * Copyright (c) 2020 RRZE, University Erlangen-Nuremberg + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * ======================================================================================= + */ +package main + +import ( + "flag" + "fmt" + "math" + "sync" + "time" +) + +type bench func(int, *sync.WaitGroup) + +type benchmark struct { + label string + words float64 + flops float64 + fct bench +} + +func Min(x, y int) int { + if x < y { + return x + } + return y +} + +func getChunk(N int, tid int, numThreads int) (is, ie int) { + cs := N / numThreads + is = tid * cs + ie = Min(N, is+cs) + return +} + +func main() { + const NTIMES int = 4 + var N int = 40000000 + var scalar float64 = 3.0 + a := make([]float64, N) + b := make([]float64, N) + c := make([]float64, N) + d := make([]float64, N) + + numThreads := flag.Int("nt", 4, "Number of threads") + flag.Parse() + + for i := 0; i < N; i++ { + a[i] = 2.0 + b[i] = 2.0 + c[i] = 0.5 + d[i] = 1.0 + } + + benchmarks := [...]benchmark{ + {label: "Init", words: 1, flops: 0, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + b[i] = scalar + } + }}, + {label: "Copy", words: 2, flops: 0, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + c[i] = a[i] + } + }}, + {label: "Update", words: 2, flops: 1, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + a[i] = a[i] * scalar + } + }}, + {label: "Triad", words: 3, flops: 2, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + a[i] = b[i] + scalar*c[i] + } + }}, + {label: "Daxpy", words: 3, flops: 2, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + a[i] = a[i] + scalar*b[i] + } + }}, + {label: "STriad", words: 4, flops: 2, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + a[i] = b[i] + d[i]*c[i] + } + }}, + {label: "SDaxpy", words: 4, flops: 2, + fct: func(threadId int, wg *sync.WaitGroup) { + defer wg.Done() + is, ie := getChunk(N, threadId, *numThreads) + for i := is; i < ie; i++ { + a[i] = a[i] + b[i]*c[i] + } + }}} + + var min, max, avg [len(benchmarks)]float64 + var times [len(benchmarks)][NTIMES]float64 + + for i := 0; i < len(benchmarks); i++ { + avg[i], max[i] = 0.0, 0.0 + min[i] = math.MaxFloat64 + } + + for k := 0; k < NTIMES; k++ { + for j := 0; j < len(benchmarks); j++ { + times[j][k] = execBench(*numThreads, benchmarks[j].fct) + } + } + + for j := 0; j < len(benchmarks); j++ { + for k := 0; k < NTIMES; k++ { + avg[j] = avg[j] + times[j][k] + min[j] = math.Min(min[j], times[j][k]) + max[j] = math.Max(max[j], times[j][k]) + } + } + + fmt.Println("----------------------------------------------------------------------------") + fmt.Printf("Function Rate(MB/s) Rate(MFlop/s) Avg time Min time Max time\n") + for j := 0; j < len(benchmarks); j++ { + avg[j] = avg[j] / float64(NTIMES-1) + bytes := benchmarks[j].words * 8.0 * float64(N) + flops := benchmarks[j].flops * float64(N) + + if flops > 0 { + fmt.Printf("%s%11.2f %11.2f %11.4f %11.4f %11.4f\n", benchmarks[j].label, + 1.0E-06*bytes/min[j], 1.0E-06*flops/min[j], + avg[j], min[j], max[j]) + } else { + fmt.Printf("%s%11.2f - %11.4f %11.4f %11.4f\n", benchmarks[j].label, + 1.0E-06*bytes/min[j], avg[j], min[j], max[j]) + } + } + fmt.Println("----------------------------------------------------------------------------") + + check(a, b, c, d, N, NTIMES) +} + +func check( + a []float64, + b []float64, + c []float64, + d []float64, + N int, NTIMES int) { + var aj, bj, cj, dj, scalar float64 + var asum, bsum, csum, dsum float64 + var epsilon float64 + + /* reproduce initialization */ + aj = 2.0 + bj = 2.0 + cj = 0.5 + dj = 1.0 + + /* now execute timing loop */ + scalar = 3.0 + + for k := 0; k < NTIMES; k++ { + bj = scalar + cj = aj + aj = aj * scalar + aj = bj + scalar*cj + aj = aj + scalar*bj + aj = bj + cj*dj + aj = aj + bj*cj + } + + aj = aj * float64(N) + bj = bj * float64(N) + cj = cj * float64(N) + dj = dj * float64(N) + + asum = 0.0 + bsum = 0.0 + csum = 0.0 + dsum = 0.0 + + for i := 0; i < N; i++ { + asum += a[i] + bsum += b[i] + csum += c[i] + dsum += d[i] + } + + epsilon = 1.e-8 + + if math.Abs(aj-asum)/asum > epsilon { + fmt.Printf("Failed Validation on array a[]\n") + fmt.Printf(" Expected : %f \n", aj) + fmt.Printf(" Observed : %f \n", asum) + } else if math.Abs(bj-bsum)/bsum > epsilon { + fmt.Printf("Failed Validation on array b[]\n") + fmt.Printf(" Expected : %f \n", bj) + fmt.Printf(" Observed : %f \n", bsum) + } else if math.Abs(cj-csum)/csum > epsilon { + fmt.Printf("Failed Validation on array c[]\n") + fmt.Printf(" Expected : %f \n", cj) + fmt.Printf(" Observed : %f \n", csum) + } else if math.Abs(dj-dsum)/dsum > epsilon { + fmt.Printf("Failed Validation on array d[]\n") + fmt.Printf(" Expected : %f \n", dj) + fmt.Printf(" Observed : %f \n", dsum) + } else { + fmt.Printf("Solution Validates\n") + } + +} + +func execBench( + numThreads int, + fnc bench) float64 { + + var wg sync.WaitGroup + wg.Add(numThreads) + + S := time.Now() + + for id := 0; id < numThreads; id++ { + go fnc(id, &wg) + } + wg.Wait() + + E := time.Now() + return E.Sub(S).Seconds() +}