x86 isa: This patch attempts an implementation at mwait.

Mwait works as follows: 1. A cpu monitors an address of interest (monitor instruction) 2. A cpu calls mwait - this loads the cache line into that cpu's cache. 3. The cpu goes to sleep. 4. When another processor requests write permission for the line, it is evicted from the sleeping cpu's cache. This eviction is forwarded to the sleeping cpu, which then wakes up. Committed by: Nilay Vaish <nilay@cs.wisc.edu>
2014-11-06 05:42:22 -06:00
parent 3947f88d0f
commit bf80734b2c
26 changed files with 381 additions and 16 deletions
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -34,6 +34,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts

 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -101,7 +102,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):

            l0_cntrl = L0Cache_Controller(version = i*num_cpus_per_cluster + j,
                          Icache = l0i_cache, Dcache = l0d_cache,
-                          send_evictions = (options.cpu_type == "detailed"),
+                          send_evictions = send_evicts(options),
                          clk_domain=system.cpu[i].clk_domain,
                          ruby_system = ruby_system)

--- a/configs/ruby/MESI_Two_Level.py
+++ b/configs/ruby/MESI_Two_Level.py
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts

 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -91,8 +92,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                      L1Icache = l1i_cache,
                                      L1Dcache = l1d_cache,
                                      l2_select_num_bits = l2_bits,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                      prefetcher = prefetcher,
                                      ruby_system = ruby_system,
                                      clk_domain=system.cpu[i].clk_domain,
--- a/configs/ruby/MI_example.py
+++ b/configs/ruby/MI_example.py
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts

 #
 # Note: the cache latency is only used by the sequencer on fast path hits
@@ -79,8 +80,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
        #
        l1_cntrl = L1Cache_Controller(version = i,
                                      cacheMemory = cache,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                      transitions_per_cycle = options.ports,
                                      clk_domain=system.cpu[i].clk_domain,
                                      ruby_system = ruby_system)
--- a/configs/ruby/MOESI_CMP_directory.py
+++ b/configs/ruby/MOESI_CMP_directory.py
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts

 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -89,8 +90,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                      L1Icache = l1i_cache,
                                      L1Dcache = l1d_cache,
                                      l2_select_num_bits = l2_bits,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                      transitions_per_cycle = options.ports,
                                      clk_domain=system.cpu[i].clk_domain,
                                      ruby_system = ruby_system)
--- a/configs/ruby/MOESI_CMP_token.py
+++ b/configs/ruby/MOESI_CMP_token.py
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts

 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -109,8 +110,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                        not options.disable_dyn_timeouts,
                                      no_mig_atomic = not \
                                        options.allow_atomic_migration,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                      transitions_per_cycle = options.ports,
                                      clk_domain=system.cpu[i].clk_domain,
                                      ruby_system = ruby_system)
--- a/configs/ruby/MOESI_hammer.py
+++ b/configs/ruby/MOESI_hammer.py
@@ -32,6 +32,7 @@ import m5
 from m5.objects import *
 from m5.defines import buildEnv
 from Ruby import create_topology
+from Ruby import send_evicts

 #
 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
@@ -102,8 +103,7 @@ def create_system(options, full_system, system, dma_ports, ruby_system):
                                      L2cache = l2_cache,
                                      no_mig_atomic = not \
                                        options.allow_atomic_migration,
-                                      send_evictions = (
-                                          options.cpu_type == "detailed"),
+                                      send_evictions = send_evicts(options),
                                      transitions_per_cycle = options.ports,
                                      clk_domain=system.cpu[i].clk_domain,
                                      ruby_system = ruby_system)
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -233,6 +233,14 @@ def create_system(options, full_system, system, piobus = None, dma_ports = []):
    ruby.num_of_sequencers = len(cpu_sequencers)
    ruby.random_seed    = options.random_seed

+def send_evicts(options):
+    # currently, 2 scenarios warrant forwarding evictions to the CPU:
+    # 1. The O3 model must keep the LSQ coherent with the caches
+    # 2. The x86 mwait instruction is built on top of coherence invalidations
+    if options.cpu_type == "detailed" or buildEnv['TARGET_ISA'] == 'x86':
+        return True
+    return False
+
    # Create a backing copy of physical memory in case required
    if options.access_backing_store:
        ruby.phys_mem = SimpleMemory(range=AddrRange(options.mem_size),