From ef9e672eb9d6bbc4aee7bb3c5c8bad03c5d796a2 Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Mon, 1 Mar 2021 14:51:15 -0800 Subject: [PATCH 1/6] python: Add search functions to pystats groups This change adds three functions, a `children` function which will iterate through all of the children of group based (optionally) on some predicate. Then, it implements a `find` function and a `find_re` function using the `children` function. The `find` function allows users to match statistics or groups within a group. For instance, you might want to find all of the groups within the system which have the name "cpu{i}". This is useful for aggregate statistic values across multiple components. Example: total_instruuctions = sum([cpu.exec_context.thread_0.numInsts.value for cpu in simstat.system.find('cpu')]) The find function matches based on substring. If the name given the find function is a substring of the stat name or the group name the stat/group will be returned. The `find_re` function is the same as find, but matches a regular expression instead of a simple substring match. Note: this was originally reviewed on https://gem5-review.googlesource.com/c/public/gem5/+/41603 was rebased incorrectly before merging. This change fixes the rebase and adds back the children() and re_find() functions. Change-Id: Idaa1e9efc56fd26de3285d3fa505087ddd78ac8a Signed-off-by: Jason Lowe-Power Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42014 Maintainer: Jason Lowe-Power Tested-by: kokoro Reviewed-by: Andreas Sandberg --- src/python/m5/ext/pystats/group.py | 68 +++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/python/m5/ext/pystats/group.py b/src/python/m5/ext/pystats/group.py index 10887e20a1..bde1c40ca2 100644 --- a/src/python/m5/ext/pystats/group.py +++ b/src/python/m5/ext/pystats/group.py @@ -24,7 +24,8 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from typing import Dict, Iterator, List, Optional, Union +import re +from typing import Callable, Dict, Iterator, List, Optional, Union from .jsonserializable import JsonSerializable from .statistic import Scalar, Statistic @@ -53,25 +54,72 @@ class Group(JsonSerializable): for key,value in kwargs.items(): setattr(self, key, value) + def children(self, predicate: Optional[Callable[[str], bool]] = None + ) -> Iterator[Union["Group", Statistic]]: + """ Iterate through all of the children, optionally with a predicate + + ``` + >>> system.children(lambda _name: 'cpu' in name) + [cpu0, cpu1, cpu2] + ``` + + :param: predicate(str) -> bool: Optional. Each child's name is passed + to this function. If it returns true, then the child is + yielded. Otherwise, the child is skipped. + If not provided then all children are returned. + """ + for attr in self.__dict__: + # Check the provided predicate. If not a match, skip this child + if predicate and not predicate(attr): continue + obj = getattr(self, attr) + if isinstance(obj, Group) or isinstance(obj, Statistic): + yield obj + def find(self, name: str) -> Iterator[Union["Group", Statistic]]: """ Find all stats that match the name + This function searches all of the "children" in this group. It yields the set of attributes (children) that have the `name` as a substring. The order of the objects returned by the generator is arbitrary. + ``` - system.find('cpu') -> [cpu0, cpu1, cpu2, cpu3, other_cpu, ...] + >>> system.find('cpu') + [cpu0, cpu1, cpu2, cpu3, other_cpu, ...] ``` + This is useful for performing aggregates over substats. For instance: + ``` - total_instruuctions = sum([cpu.exec_context.thread_0.numInsts.value - for cpu in simstat.system.find('cpu')]) + >>> total_instructions = sum([cpu.exec_context.thread_0.numInsts.value + for cpu in simstat.system.find('cpu')]) + 100000 ``` + + :param: name: The name to search for """ - for attr in self.__dict__: - if name in attr: - obj = getattr(self, attr) - if isinstance(obj, Group) or isinstance(obj, Statistic): - yield obj + yield from self.children(lambda _name: _name in name) + + def find_re(self, regex: Union[str, re.Pattern] + ) -> Iterator[Union["Group", Statistic]]: + """ Find all stats that match the name + + This function searches all of the "children" in this group. It yields + the set of attributes (children) that have the `name` mathing the + regex provided. The order of the objects returned by the generator is + arbitrary. + + ``` + >>> system.find_re('cpu[0-9]') + [cpu0, cpu1, cpu2] + ``` + Note: The above will not match `cpu_other`. + + :param: regex: The regular expression used to search. Can be a + precompiled regex or a string in regex format + """ + if isinstance(regex, str): + regex = re.compile(regex) + yield from self.children(lambda _name: regex.search(_name)) class Vector(Group): """ @@ -86,4 +134,4 @@ class Vector(Group): type="Vector", time_conversion=None, **scalar_map, - ) \ No newline at end of file + ) From c47920d81c4c29e8c868c005ff4330c46becbad5 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 3 Mar 2021 11:38:06 +0000 Subject: [PATCH 2/6] arch-arm: Fix atomics permission checks in TLB For stage 2 translations, atomic accesses were not checking the access permission bits in the page table descriptors, and were instead wrongly using the nature of the request itself (r/w booleans). Change-Id: I27fbc95f04ea659e77ad5a3afb551873c9c971f0 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42073 Reviewed-by: Jason Lowe-Power Reviewed-by: Richard Cooper Reviewed-by: Bobby R. Bruce Maintainer: Jason Lowe-Power Maintainer: Bobby R. Bruce Tested-by: kokoro --- src/arch/arm/tlb.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index fd72d258b9..8e5b3ca9b8 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -872,8 +872,7 @@ TLB::checkPermissions64(TlbEntry *te, const RequestPtr &req, Mode mode, // sctlr.wxn overrides the xn bit grant = !wxn && !xn; } else if (is_atomic) { - grant = r && w; - grant_read = r; + grant = hap; } else if (is_write) { grant = hap & 0x2; } else { // is_read From f5383a573354f8299249116a489fd5aee09af68f Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Wed, 17 Feb 2021 16:52:40 -0600 Subject: [PATCH 3/6] gpu-compute: Fix accidental execution when stopped at barrier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due the compute unit pipeline being executed in reverse order, there exists a scenario where a compute unit will execute an extra instruction when it's supposed to be stopped at a barrier. It occurs as follows: * The ScheduleStage sets a barrier instruction ready to execute. * The ScoreboardCheckStage adds another instruction to the readyList. This is where the barrier is checked, but because the barrier isn't executing yet, the instruction can be passed along to ScheduleStage * The barrier executes, and stalls * The ScheduleStage sees that there's a new instruction and schedules it to be executed. * Only now will the ScoreboardCheckStage realize a barrier is active and stall accordingly * The subsequent instruction executes This patch sets the wavefront status to be S_BARRIER in ScheduleStage instead of in the barrier instruction execution in order to have ScoreboardCheckStage realize that we're going to execute a barrier, preventing it from marking another instruciton as ready. Change-Id: Ib683e2c68f361d7ee60a3beaf53b4b6c888c9f8d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41573 Reviewed-by: Matt Sinclair Reviewed-by: Alexandru Duțu Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/gcn3/insts/instructions.cc | 2 -- src/gpu-compute/schedule_stage.cc | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/gcn3/insts/instructions.cc index 29de1a8a95..bde87efeea 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/gcn3/insts/instructions.cc @@ -4114,8 +4114,6 @@ namespace Gcn3ISA if (wf->hasBarrier()) { int bar_id = wf->barrierId(); - assert(wf->getStatus() != Wavefront::S_BARRIER); - wf->setStatus(Wavefront::S_BARRIER); cu->incNumAtBarrier(bar_id); DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " "barrier Id%d. %d waves now at barrier, %d waves " diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc index 8a2ea18294..ace6d0c3f5 100644 --- a/src/gpu-compute/schedule_stage.cc +++ b/src/gpu-compute/schedule_stage.cc @@ -314,6 +314,9 @@ ScheduleStage::addToSchList(int exeType, const GPUDynInstPtr &gpu_dyn_inst) computeUnit.insertInPipeMap(wf); wavesInSch.emplace(wf->wfDynId); schList.at(exeType).push_back(std::make_pair(gpu_dyn_inst, RFBUSY)); + if (wf->isOldestInstBarrier() && wf->hasBarrier()) { + wf->setStatus(Wavefront::S_BARRIER); + } if (wf->isOldestInstWaitcnt()) { wf->setStatus(Wavefront::S_WAITCNT); } From 3acc6af5c20ffbaf7f5dea5d9c1cabe9ec500edc Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Fri, 5 Mar 2021 14:00:40 +0000 Subject: [PATCH 4/6] configs: NVM missing the xor_low_bit argument in create_mem_intf Change-Id: Ie197cec1eaa82ca61a6bbb82c33307a16d779dbd Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42321 Reviewed-by: Daniel Carvalho Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- configs/common/MemConfig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index 6e78be5291..b8907c0ac7 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -200,7 +200,7 @@ def config_mem(options, system): if opt_mem_type and (not opt_nvm_type or range_iter % 2 != 0): # Create the DRAM interface dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls, - intlv_bits, intlv_size, opt_xor_low_bit) + intlv_bits, intlv_size, opt_xor_low_bit) # Set the number of ranks based on the command-line # options if it was explicitly set @@ -241,7 +241,7 @@ def config_mem(options, system): elif opt_nvm_type and (not opt_mem_type or range_iter % 2 == 0): nvm_intf = create_mem_intf(n_intf, r, i, nbr_mem_ctrls, - intlv_bits, intlv_size) + intlv_bits, intlv_size, opt_xor_low_bit) # Set the number of ranks based on the command-line # options if it was explicitly set if issubclass(n_intf, m5.objects.NVMInterface) and \ From ee1837d313c8b9fc81977e7daab6bf351d2ed884 Mon Sep 17 00:00:00 2001 From: Yu-hsin Wang Date: Mon, 8 Mar 2021 13:33:57 +0800 Subject: [PATCH 5/6] system-arm: update armv8 cpu-release-addr In the cl below, cpu-release-addr is changed to 0x87fffff8. https://gem5-review.googlesource.com/c/public/gem5/+/35076 By fixing cpu-release-addr, we are able to bring up multi-core platform. Change-Id: I48bb678f67b677e9fc0136c378407e06ce7a46f4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42484 Reviewed-by: Giacomo Travaglini Maintainer: Giacomo Travaglini Tested-by: kokoro --- system/arm/dt/armv8.dts | 2 +- system/arm/dt/armv8_big_little.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/system/arm/dt/armv8.dts b/system/arm/dt/armv8.dts index 42c6ef654f..6ea7820e8d 100644 --- a/system/arm/dt/armv8.dts +++ b/system/arm/dt/armv8.dts @@ -38,7 +38,7 @@ compatible = "gem5,armv8", "arm,armv8"; \ reg = < n >; \ enable-method = "spin-table"; \ - cpu-release-addr = <0 0x8000fff8>; \ + cpu-release-addr = <0 0x87fffff8>; \ }; / { diff --git a/system/arm/dt/armv8_big_little.dts b/system/arm/dt/armv8_big_little.dts index 9d1e7acfd9..39d6d9b17f 100644 --- a/system/arm/dt/armv8_big_little.dts +++ b/system/arm/dt/armv8_big_little.dts @@ -46,7 +46,7 @@ compatible = "gem5,armv8", "arm,armv8"; \ reg = < ## id >; \ enable-method = "spin-table"; \ - cpu-release-addr = <0 0x8000fff8>; \ + cpu-release-addr = <0 0x87fffff8>; \ }; From 89ec39a1479bf532a5fc2ef7b2f9a4afdfde075f Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Tue, 9 Mar 2021 11:12:21 -0800 Subject: [PATCH 6/6] python: Use Pattern from typing Python <3.7 must import Pattern from typing not re Change-Id: I56acb466aef40b7188e4cd58d3ba5d0b0cc9b090 Signed-off-by: Jason Lowe-Power Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42643 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- src/python/m5/ext/pystats/group.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/m5/ext/pystats/group.py b/src/python/m5/ext/pystats/group.py index bde1c40ca2..22d11b2702 100644 --- a/src/python/m5/ext/pystats/group.py +++ b/src/python/m5/ext/pystats/group.py @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import re -from typing import Callable, Dict, Iterator, List, Optional, Union +from typing import Callable, Dict, Iterator, List, Optional, Pattern, Union from .jsonserializable import JsonSerializable from .statistic import Scalar, Statistic @@ -99,7 +99,7 @@ class Group(JsonSerializable): """ yield from self.children(lambda _name: _name in name) - def find_re(self, regex: Union[str, re.Pattern] + def find_re(self, regex: Union[str, Pattern] ) -> Iterator[Union["Group", Statistic]]: """ Find all stats that match the name