python,stdlib: Add multiprocessing module

This changeset replicates some of the multiprocessing module
implementation from the python standard library in gem5. The goal of
this and following changesets is to enable users to use a *single* set
of python scripts to run and analyze a suite of gem5 simulations.

We must reimplement some of the multiprocessing module becaue it is not
flexible enough to allow for customized command line parameter to the
"python" executable (gem5 in our case). To get around this, I extended
the Process and context objects to be gem5 specific.

The next steps is to wrap the Process and Pool types with gem5-specific
versions that will improve their usability for our needs. With this
changeset, these objects are usable, but it will require significant
user effort to reach the goal of running/analyzing many different gem5
simulations.

There are some limitation:
- The pool will only work if the max tasks per child is 1
- The functions that are executed must come from another module

As an example, the following code should work after applying this
change.

test.py:
```python
from gem5.utils.multiprocessing import Process, Pool
from sim import info, run_sim
if __name__ == '__m5_main__' or __name__ == '__main__':
    info('main line')
    p1 = Process(target=run_sim, args=('bob',))
    p2 = Process(target=run_sim, args=('jane',))
    p1.start()
    p2.start()
    p2.join()
    p1.join()
    with Pool(processes=4, maxtasksperchild=1) as pool:
        pool.map(run_sim, range(10))
```

sim.py:
```
import os
def info(title):
    print(title)
    print('module name:', __name__)
    print('parent process:', os.getppid())
    print('process id:', os.getpid())
def run_sim(name):
    info('function g')
    from gem5.prebuilt.demo.x86_demo_board import X86DemoBoard
    from gem5.resources.resource import Resource
    from gem5.simulate.simulator import Simulator
    board = X86DemoBoard()
    board.set_kernel_disk_workload(
        kernel=Resource("x86-linux-kernel-5.4.49"),
        disk_image=Resource("x86-ubuntu-18.04-img"),
    )
    simulator = Simulator(board=board)
    simulator.run(max_ticks=10000000)
```

Change-Id: I4348ebaa75d006949ec96e732f5dc2a5173c6048
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63432
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
This commit is contained in:
Jason Lowe-Power
2022-09-09 13:01:51 -07:00
committed by Bobby Bruce
parent aead8fb0fd
commit c6918c8f74
6 changed files with 383 additions and 0 deletions

View File

@@ -241,6 +241,14 @@ PySource('gem5.utils', 'gem5/utils/__init__.py')
PySource('gem5.utils', 'gem5/utils/filelock.py')
PySource('gem5.utils', 'gem5/utils/override.py')
PySource('gem5.utils', 'gem5/utils/requires.py')
PySource('gem5.utils.multiprocessing',
'gem5/utils/multiprocessing/__init__.py')
PySource('gem5.utils.multiprocessing',
'gem5/utils/multiprocessing/_command_line.py')
PySource('gem5.utils.multiprocessing',
'gem5/utils/multiprocessing/context.py')
PySource('gem5.utils.multiprocessing',
'gem5/utils/multiprocessing/popen_spawn_gem5.py')
PySource('', 'importer.py')
PySource('m5', 'm5/__init__.py')

View File

@@ -0,0 +1,71 @@
# gem5's wrapper around python multiprocessing
This module wraps python's multiprocessing module so that it works with gem5.
The multiprocessing module creates new python processes, but there is no way to customize the way these processes are created.
This wrapper extends the python multiprocessing to support passing new arguments to the python (or gem5 in this case) executable when a new process is created.
This code replicates some of the multiprocessing module implementation from the python standard library in gem5.
The goal of this code is to enable users to use a *single* set of python scripts to run and analyze a suite of gem5 simulations.
We must reimplement some of the multiprocessing module because it is not flexible enough to allow for customized command line parameter to the "python" executable (gem5 in our case).
To get around this, I extended the Process and context objects to be gem5 specific.
The next steps is to wrap the Process and Pool types with gem5-specific versions that will improve their usability for our needs.
With this changeset, these objects are usable, but it will require significant user effort to reach the goal of running/analyzing many different gem5 simulations.
## Example use
test.py:
```python
from gem5.utils.multiprocessing import Process, Pool
from sim import info, run_sim
if __name__ == '__m5_main__' or __name__ == '__main__':
info('main line')
p1 = Process(target=run_sim, args=('bob',))
p2 = Process(target=run_sim, args=('jane',))
p1.start()
p2.start()
p2.join()
p1.join()
with Pool(processes=4, maxtasksperchild=1) as pool:
pool.map(run_sim, range(10))
```
sim.py:
```python
import os
def info(title):
print(title)
print('module name:', __name__)
print('parent process:', os.getppid())
print('process id:', os.getpid())
def run_sim(name):
info('function g')
from gem5.prebuilt.demo.x86_demo_board import X86DemoBoard
from gem5.resources.resource import Resource
from gem5.simulate.simulator import Simulator
board = X86DemoBoard()
board.set_kernel_disk_workload(
kernel=Resource("x86-linux-kernel-5.4.49"),
disk_image=Resource("x86-ubuntu-18.04-img"),
)
simulator = Simulator(board=board)
simulator.run(max_ticks=10000000)
```
Then, you can run `gem5 test.py`.
This will execute `run_sim` 12 times.
The first two will run in parallel, then the last 10 will run in parallel with up to 4 running at once.
## Limitations
- This only supports the spawn context. This is important because we need a fresh gem5 process for every subprocess.
- When using `Pool`, the `maxtasksperchild` must be 1.
- Process synchronization (queues, pipes, etc.) hasn't been tested
- Functions that are used to execute in the subprocess must be imported from another module. In other words, we cannot pickle functions in the main/runner module.
## Implementation notes
- The `_start_method` must be `None` for the `Spawn_gem5Process` class. Otherwise, in `_bootstrap` in the `BaseProcess` it will try to force the `_start_method` to be gem5-specific, which the `multiprocessing` module doesn't understand.

View File

@@ -0,0 +1,33 @@
# Copyright (c) 2022 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from .context import Process
from .context import gem5Context
Pool = gem5Context().Pool
__all__ = ["Process", "Pool"]

View File

@@ -0,0 +1,101 @@
# Copyright (c) 2022 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This file contains extensions of the multiprocessing module to be used with gem5.
Specifically, it contains the code to produce the command line for spawned processes.
Some code inspired by the Python standard library implementation of the
multiprocessing module (i.e., cpython/Lib/multiprocessing/).
"""
import sys
from multiprocessing import spawn, util
def _gem5_args_for_multiprocessing(name):
from m5 import options
# Options that are disallowed with multiprocessing
disallowed = [
options.build_info,
options.copyright,
options.readme,
options.interactive,
options.pdb,
options.verbose,
options.debug_break,
options.debug_help,
options.debug_flags,
options.debug_start,
options.debug_end,
options.debug_ignore,
options.list_sim_objects,
]
if any(disallowed):
raise Exception(
f"Disallowed option for multiprocessing. "
f"See {__file__} for details."
)
# Options not forwarded:
# --allow-remote-connections, --listener-mode, --dump-config, --json-config
# --dot-config, --dot-dvfs-config, --debug-file, --remote-gdb-port, -c
arguments = [
f"--outdir={options.outdir}/{name}",
f"--stdout-file={options.stdout_file}",
f"--stderr-file={options.stderr_file}",
f"--stats-file={options.stats_file}",
]
if options.redirect_stdout:
arguments.append("--redirect-stdout")
if options.redirect_stderr:
arguments.append("--redirect-stderr")
if options.silent_redirect:
arguments.append("--silent-redirect")
if options.path:
arguments.append(f"--path={':'.join(options.path)}")
if options.quiet:
arguments.append("--quiet")
return arguments
def get_command_line(name, **kwds):
"""
Returns prefix of command line used for spawning a child process
"""
if getattr(sys, "frozen", False):
return [sys.executable, "--multiprocessing-fork"] + [
"%s=%r" % item for item in kwds.items()
]
else:
prog = "from multiprocessing.spawn import spawn_main; spawn_main(%s)"
prog %= ", ".join("%s=%r" % item for item in kwds.items())
opts = util._args_from_interpreter_flags()
opts.extend(_gem5_args_for_multiprocessing(name))
exe = spawn.get_executable()
return [exe] + opts + ["-c", prog, "--multiprocessing-fork"]

View File

@@ -0,0 +1,75 @@
# Copyright (c) 2022 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This file contains extensions of the multiprocessing module to be used with gem5
Some code inspired by the Python standard library implementation of the
multiprocessing module (i.e., cpython/Lib/multiprocessing/).
"""
from multiprocessing import context, process
from multiprocessing.context import DefaultContext
# The `_start_method` must be `None` for the `Spawn_gem5Process` class.
# Otherwise, in `_bootstrap` in the `BaseProcess` it will try to force the
# `_start_method` to be gem5-specific, which the `multiprocessing` module
# doesn't understand.
class Spawn_gem5Process(process.BaseProcess):
_start_method = None
@staticmethod
def _Popen(process_obj):
from .popen_spawn_gem5 import Popen
return Popen(process_obj)
class Process(process.BaseProcess):
_start_method = None
@staticmethod
def _Popen(process_obj):
return _default_context.get_context().Process._Popen(process_obj)
class gem5Context(context.BaseContext):
_name = "spawn_gem5"
Process = Spawn_gem5Process
def get_context(self, method=None):
if method is None:
return self
try:
ctx = _concrete_contexts[method]
except KeyError:
raise ValueError("cannot find context for %r" % method) from None
ctx._check_available()
return ctx
_concrete_contexts = {"spawn_gem5": gem5Context()}
_default_context = DefaultContext(_concrete_contexts["spawn_gem5"])

View File

@@ -0,0 +1,95 @@
# Copyright (c) 2022 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This file contains extensions of the multiprocessing module to be used with gem5.
Specifically, it contains the code to spawn a new gem5 process with Popen.
Some code is from the Python standard library implementation of the
multiprocessing module (i.e., cpython/Lib/multiprocessing/).
"""
import io
import os
from multiprocessing.context import reduction, set_spawning_popen
from multiprocessing import popen_spawn_posix
from multiprocessing import spawn
from multiprocessing import util
from ._command_line import get_command_line
__all__ = ["Popen"]
class Popen(popen_spawn_posix.Popen):
method = "spawn_gem5"
def __init__(self, process_obj):
super().__init__(process_obj)
# Copyright (c) 2001-2022 Python Software Foundation; All Rights Reserved
# from cpython/Lib/multiprocessing/popen_spawn_posix.py
def _launch(self, process_obj):
from multiprocessing import resource_tracker
tracker_fd = resource_tracker.getfd()
self._fds.append(tracker_fd)
prep_data = spawn.get_preparation_data(process_obj._name)
fp = io.BytesIO()
set_spawning_popen(self)
try:
reduction.dump(prep_data, fp)
reduction.dump(process_obj, fp)
finally:
set_spawning_popen(None)
parent_r = child_w = child_r = parent_w = None
try:
parent_r, child_w = os.pipe()
child_r, parent_w = os.pipe()
# Note: This next line is the only modification
cmd = get_command_line(
tracker_fd=tracker_fd,
pipe_handle=child_r,
name=process_obj.name,
)
self._fds.extend([child_r, child_w])
self.pid = util.spawnv_passfds(
spawn.get_executable(), cmd, self._fds
)
self.sentinel = parent_r
with open(parent_w, "wb", closefd=False) as f:
f.write(fp.getbuffer())
finally:
fds_to_close = []
for fd in (parent_r, parent_w):
if fd is not None:
fds_to_close.append(fd)
self.finalizer = util.Finalize(self, util.close_fds, fds_to_close)
for fd in (child_r, child_w):
if fd is not None:
os.close(fd)