diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py index 66e6a08d07..b5b8c78782 100644 --- a/src/python/m5/simulate.py +++ b/src/python/m5/simulate.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012,2019 ARM Limited +# Copyright (c) 2012, 2019, 2021 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -348,6 +348,9 @@ def fork(simout="%(parent)s.f%(fork_seq)i"): drain() + # Terminate helper threads that service parallel event queues. + _m5.event.terminateEventQueueThreads() + try: pid = os.fork() except OSError as e: diff --git a/src/python/pybind11/event.cc b/src/python/pybind11/event.cc index aefe50a221..7a02221611 100644 --- a/src/python/pybind11/event.cc +++ b/src/python/pybind11/event.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited + * Copyright (c) 2017, 2021 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -107,6 +107,7 @@ pybind_init_event(py::module_ &m_native) m.def("simulate", &simulate, py::arg("ticks") = MaxTick); + m.def("terminateEventQueueThreads", &terminateEventQueueThreads); m.def("exitSimLoop", &exitSimLoop); m.def("getEventQueue", []() { return curEventQueue(); }, py::return_value_policy::reference); diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc index 4a008696b0..ec46cbfcda 100644 --- a/src/sim/simulate.cc +++ b/src/sim/simulate.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2021 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * Copyright (c) 2013 Mark D. Hill and David A. Wood @@ -30,6 +42,7 @@ #include "sim/simulate.hh" +#include #include #include @@ -45,35 +58,125 @@ namespace gem5 { -//! Mutex for handling async events. -std::mutex asyncEventMutex; - -//! Global barrier for synchronizing threads entering/exiting the -//! simulation loop. -Barrier *threadBarrier; - //! forward declaration Event *doSimLoop(EventQueue *); -/** - * The main function for all subordinate threads (i.e., all threads - * other than the main thread). These threads start by waiting on - * threadBarrier. Once all threads have arrived at threadBarrier, - * they enter the simulation loop concurrently. When they exit the - * loop, they return to waiting on threadBarrier. This process is - * repeated until the simulation terminates. - */ -static void -thread_loop(EventQueue *queue) -{ - while (true) { - threadBarrier->wait(); - doSimLoop(queue); - } -} - GlobalSimLoopExitEvent *simulate_limit_event = nullptr; +class SimulatorThreads +{ + public: + SimulatorThreads() = delete; + SimulatorThreads(const SimulatorThreads &) = delete; + SimulatorThreads &operator=(SimulatorThreads &) = delete; + + SimulatorThreads(uint32_t num_queues) + : terminate(false), + numQueues(num_queues), + barrier(num_queues) + { + threads.reserve(num_queues); + } + + ~SimulatorThreads() + { + // This should only happen after exit has been + // called. Subordinate event queues should normally (assuming + // exit is called from Python) be waiting on the barrier when + // this happens. + // + // N.B.: Not terminating here would make it impossible to + // safely destroy the barrier. + terminateThreads(); + } + + void runUntilLocalExit() + { + assert(!terminate); + + // Start subordinate threads if needed. + if (threads.empty()) { + // the main thread (the one running Python) handles queue 0, + // so we only need to allocate new threads for queues 1..N-1. + // We'll call these the "subordinate" threads. + for (uint32_t i = 1; i < numQueues; i++) { + threads.emplace_back( + [this](EventQueue *eq) { + thread_main(eq); + }, mainEventQueue[i]); + } + } + + // This method is called from the main thread. All subordinate + // threads should be waiting on the barrier when the function + // is called. The arrival of the main thread here will satisfy + // the barrier and start another iteration in the thread loop. + barrier.wait(); + } + + void + terminateThreads() + { + assert(!terminate); + + /* This function should only be called when the simulator is + * handling a global exit event (typically from Python). This + * means that the helper threads will be waiting on the + * barrier. Tell the helper threads to exit and release them from + * their barrier. */ + terminate = true; + barrier.wait(); + + /* Wait for all of the threads to terminate */ + for (auto &t : threads) { + t.join(); + } + + terminate = false; + threads.clear(); + } + + protected: + /** + * The main function for all subordinate threads (i.e., all threads + * other than the main thread). These threads start by waiting on + * threadBarrier. Once all threads have arrived at threadBarrier, + * they enter the simulation loop concurrently. When they exit the + * loop, they return to waiting on threadBarrier. This process is + * repeated until the simulation terminates. + */ + void + thread_main(EventQueue *queue) + { + /* Wait for all initialisation to complete */ + barrier.wait(); + + while (!terminate) { + doSimLoop(queue); + barrier.wait(); + } + } + + std::atomic terminate; + uint32_t numQueues; + std::vector threads; + Barrier barrier; +}; + +static std::unique_ptr simulatorThreads; + +struct DescheduleDeleter +{ + void operator()(BaseGlobalEvent *event) + { + if (!event) + return; + + event->deschedule(); + delete event; + } +}; + /** Simulate for num_cycles additional cycles. If num_cycles is -1 * (the default), do not limit simulation; some other event must * terminate the loop. Exported to Python. @@ -82,75 +185,57 @@ GlobalSimLoopExitEvent *simulate_limit_event = nullptr; GlobalSimLoopExitEvent * simulate(Tick num_cycles) { - // The first time simulate() is called from the Python code, we need to - // create a thread for each of event queues referenced by the - // instantiated sim objects. - static bool threads_initialized = false; - static std::vector threads; - - if (!threads_initialized) { - threadBarrier = new Barrier(numMainEventQueues); - - // the main thread (the one we're currently running on) - // handles queue 0, so we only need to allocate new threads - // for queues 1..N-1. We'll call these the "subordinate" threads. - for (uint32_t i = 1; i < numMainEventQueues; i++) { - threads.push_back(new std::thread(thread_loop, mainEventQueue[i])); - } - - threads_initialized = true; - simulate_limit_event = - new GlobalSimLoopExitEvent(mainEventQueue[0]->getCurTick(), - "simulate() limit reached", 0); - } + std::unique_ptr quantum_event; + const Tick exit_tick = num_cycles < MaxTick - curTick() ? + curTick() + num_cycles : MaxTick; inform("Entering event queue @ %d. Starting simulation...\n", curTick()); - if (num_cycles < MaxTick - curTick()) - num_cycles = curTick() + num_cycles; - else // counter would roll over or be set to MaxTick anyhow - num_cycles = MaxTick; + if (!simulatorThreads) + simulatorThreads.reset(new SimulatorThreads(numMainEventQueues)); - simulate_limit_event->reschedule(num_cycles); + if (!simulate_limit_event) { + simulate_limit_event = new GlobalSimLoopExitEvent( + mainEventQueue[0]->getCurTick(), + "simulate() limit reached", 0); + } + simulate_limit_event->reschedule(exit_tick); - GlobalSyncEvent *quantum_event = NULL; if (numMainEventQueues > 1) { - if (simQuantum == 0) { - fatal("Quantum for multi-eventq simulation not specified"); - } + fatal_if(simQuantum == 0, + "Quantum for multi-eventq simulation not specified"); - quantum_event = new GlobalSyncEvent(curTick() + simQuantum, simQuantum, - EventBase::Progress_Event_Pri, 0); + quantum_event.reset( + new GlobalSyncEvent(curTick() + simQuantum, simQuantum, + EventBase::Progress_Event_Pri, 0)); inParallelMode = true; } - // all subordinate (created) threads should be waiting on the - // barrier; the arrival of the main thread here will satisfy the - // barrier, and all threads will enter doSimLoop in parallel - threadBarrier->wait(); + simulatorThreads->runUntilLocalExit(); Event *local_event = doSimLoop(mainEventQueue[0]); - assert(local_event != NULL); + assert(local_event); inParallelMode = false; // locate the global exit event and return it to Python BaseGlobalEvent *global_event = local_event->globalEvent(); - assert(global_event != NULL); + assert(global_event); GlobalSimLoopExitEvent *global_exit_event = dynamic_cast(global_event); - assert(global_exit_event != NULL); - - //! Delete the simulation quantum event. - if (quantum_event != NULL) { - quantum_event->deschedule(); - delete quantum_event; - } + assert(global_exit_event); return global_exit_event; } +void +terminateEventQueueThreads() +{ + simulatorThreads->terminateThreads(); +} + + /** * Test and clear the global async_event flag, such that each time the * flag is cleared, only one thread returns true (and thus is assigned @@ -159,15 +244,17 @@ simulate(Tick num_cycles) static bool testAndClearAsyncEvent() { + static std::mutex mutex; + bool was_set = false; - asyncEventMutex.lock(); + mutex.lock(); if (async_event) { was_set = true; async_event = false; } - asyncEventMutex.unlock(); + mutex.unlock(); return was_set; } diff --git a/src/sim/simulate.hh b/src/sim/simulate.hh index 0817bbde1a..5ef499541f 100644 --- a/src/sim/simulate.hh +++ b/src/sim/simulate.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2021 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * @@ -34,6 +46,15 @@ namespace gem5 class GlobalSimLoopExitEvent; GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick); + +/** + * Terminate helper threads when running in parallel mode. + * + * @pre Simulator must have returned from simulate() to service a + * GlobalExitEvent prior to calling this function. + */ +void terminateEventQueueThreads(); + extern GlobalSimLoopExitEvent *simulate_limit_event; } // namespace gem5