From c634b23305ed4e26ca0efa41b859ca7e3da11ee6 Mon Sep 17 00:00:00 2001 From: Luming Wang Date: Tue, 23 May 2023 22:32:12 +0800 Subject: [PATCH] sim,python: follow the new CPython startup sequence Currently, gem5 suffers from several bugs related to Python interpreter's locale encoding issues. gem5 will crash when the working directory contains Non-ASCII characters. The reason is that Python 3.8+ introduces a new interpreter startup sequence [1]. The startup sequence consists of three phases: 1. Python core runtime preinitialization 2. Python core runtime initialization 3. Main interpreter configuration Stage 1 determining the encodings used for system interfaces. However, gem5 doesn't preinitialize the Python interpreter. Thus, the locale settings do not take effect. This patch preinitialize the Python for Python 3.8+. Also, this patch avoid the use of `Py_SetProgramName`, which is deprecated since Python 3.11[3]. [1] https://peps.python.org/pep-0432/ [2] https://peps.python.org/pep-0587/ [3] https://docs.python.org/3/c-api/init.html#c.Py_SetProgramName Change-Id: I08a2ec6ab2b39a95ab194909932c8fc578c745ce Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70898 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Reviewed-by: Roger Chang --- src/python/gem5py.cc | 15 +++++++++++++++ src/sim/main.cc | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/python/gem5py.cc b/src/python/gem5py.cc index f2d87596f0..37ddee2e7c 100644 --- a/src/python/gem5py.cc +++ b/src/python/gem5py.cc @@ -51,6 +51,21 @@ namespace py = pybind11; int main(int argc, const char **argv) { +#if PY_VERSION_HEX >= 0x03080000 + // Preinitialize Python for Python 3.8+ + // This ensures that the locale configuration takes effect + PyStatus status; + PyPreConfig preconfig; + PyPreConfig_InitPythonConfig(&preconfig); + + preconfig.utf8_mode = 1; + + status = Py_PreInitialize(&preconfig); + if (PyStatus_Exception(status)) { + Py_ExitStatusException(status); + } +#endif + py::scoped_interpreter guard; // Embedded python doesn't set up sys.argv, so we'll do that ourselves. diff --git a/src/sim/main.cc b/src/sim/main.cc index 81a691d15d..1c42891816 100644 --- a/src/sim/main.cc +++ b/src/sim/main.cc @@ -50,6 +50,7 @@ main(int argc, char **argv) // Initialize gem5 special signal handling. initSignals(); +#if PY_VERSION_HEX < 0x03080000 // Convert argv[0] to a wchar_t string, using python's locale and cleanup // functions. std::unique_ptr program( @@ -59,6 +60,23 @@ main(int argc, char **argv) // This can help python find libraries at run time relative to this binary. // It's probably not necessary, but is mostly harmless and might be useful. Py_SetProgramName(program.get()); +#else + // Preinitialize Python for Python 3.8+ + // This ensures that the locale configuration takes effect + PyStatus status; + + PyConfig config; + PyConfig_InitPythonConfig(&config); + + /* Set the program name. Implicitly preinitialize Python. */ + status = PyConfig_SetBytesString(&config, &config.program_name, + argv[0]); + if (PyStatus_Exception(status)) { + PyConfig_Clear(&config); + Py_ExitStatusException(status); + return 1; + } +#endif py::scoped_interpreter guard(true, argc, argv);