Thermal simulation period adjustment mechanism implemented.
This commit is contained in:
@@ -1,8 +1,33 @@
|
||||
<power_thresholds>
|
||||
<cpu id="0" value="1.0" />
|
||||
<cpu id="1" value="2.0" />
|
||||
<gpu id="0" value="2.0" />
|
||||
<dram id="2" value="5.0" />
|
||||
<dram id="3" value="1.0" />
|
||||
<!-- TODO Maybe change this file format -->
|
||||
<CPUs id="0" value="1.0" />
|
||||
<GPU id="1" value="1.0" />
|
||||
<BASEBAND1 id="2" value="1.0" />
|
||||
<BASEBAND2 id="3" value="1.0" />
|
||||
<LLCACHE id="4" value="1.0" />
|
||||
<DRAMCTRL1 id="5" value="1.0" />
|
||||
<DRAMCTRL2 id="6" value="1.0" />
|
||||
<TSVs id="7" value="1.0" />
|
||||
<ACELLERATORS id="8" value="1.0" />
|
||||
<C0 id="9" value="1.0" />
|
||||
<C1 id="10" value="1.0" />
|
||||
<C2 id="11" value="1.0" />
|
||||
<C3 id="12" value="1.0" />
|
||||
<TSVs id="13" value="1.0" />
|
||||
<C0 id="14" value="1.0" />
|
||||
<C1 id="15" value="1.0" />
|
||||
<C2 id="16" value="1.0" />
|
||||
<C3 id="17" value="1.0" />
|
||||
<TSVs id="18" value="1.0" />
|
||||
<C0 id="19" value="1.0" />
|
||||
<C1 id="20" value="1.0" />
|
||||
<C2 id="21" value="1.0" />
|
||||
<C3 id="22" value="1.0" />
|
||||
<TSVs id="23" value="1.0" />
|
||||
<C0 id="24" value="1.0" />
|
||||
<C1 id="25" value="1.0" />
|
||||
<C2 id="26" value="1.0" />
|
||||
<C3 id="27" value="1.0" />
|
||||
<TSVs id="28" value="1.0" />
|
||||
</power_thresholds>
|
||||
|
||||
|
||||
@@ -156,7 +156,7 @@ void Configuration::setParameter(std::string name, std::string value)
|
||||
else if (name == "StaticTemperatureDefaultValue")
|
||||
temperatureSim.StaticTemperatureDefaultValue = string2int(value);
|
||||
else if (name == "DynTemperatureSimPeriod")
|
||||
temperatureSim.DynTemperatureSimPeriod = string2int(value);
|
||||
temperatureSim.DynTemperatureSimPeriod = std::stod(value.c_str());
|
||||
else if (name == "DynTemperatureSimUnit")
|
||||
temperatureSim.DynTemperatureSimUnit = string2TimeUnit(value);
|
||||
else if (name == "PowerThresholdsFile") {
|
||||
|
||||
@@ -51,15 +51,13 @@ struct TemperatureSimConfig {
|
||||
int StaticTemperatureDefaultValue;
|
||||
|
||||
// Dynamic Temeperature Simulation parameters
|
||||
unsigned int DynTemperatureSimPeriod;
|
||||
double DynTemperatureSimPeriod;
|
||||
enum sc_time_unit DynTemperatureSimUnit;
|
||||
std::string IceServerIp;
|
||||
unsigned int IceServerPort;
|
||||
|
||||
std::string powerThresholdsFile;
|
||||
std::map<int, float> cpuPowerThresholds;
|
||||
std::map<int, float> gpuPowerThresholds;
|
||||
std::map<int, float> dramPowerThresholds;
|
||||
std::vector<float> powerThresholds;
|
||||
|
||||
void parsePowerThresholdsFile()
|
||||
{
|
||||
@@ -68,9 +66,9 @@ struct TemperatureSimConfig {
|
||||
// Load the XML file into memory and parse it
|
||||
tinyxml2::XMLDocument xml;
|
||||
loadXML(powerThresholdsFile, xml);
|
||||
tinyxml2::XMLElement *powerThresholds = xml.FirstChildElement("power_thresholds");
|
||||
tinyxml2::XMLElement *powThrElem = xml.FirstChildElement("power_thresholds");
|
||||
|
||||
if (powerThresholds == NULL) {
|
||||
if (powThrElem == NULL) {
|
||||
// Invalid file
|
||||
std::string errormsg = "Invalid Power Thresholds File " + powerThresholdsFile;
|
||||
printDebugMessage(errormsg);
|
||||
@@ -78,24 +76,10 @@ struct TemperatureSimConfig {
|
||||
throw;
|
||||
}
|
||||
|
||||
for (tinyxml2::XMLElement *e = powerThresholds->FirstChildElement(); e != NULL; e = e->NextSiblingElement()) {
|
||||
|
||||
std::string id_str = e->Attribute("id");
|
||||
for (tinyxml2::XMLElement *e = powThrElem->FirstChildElement(); e != NULL; e = e->NextSiblingElement()) {
|
||||
std::string thr_str = e->Attribute("value");
|
||||
|
||||
int id = std::stoi(id_str);
|
||||
float thr = std::stof(thr_str);
|
||||
|
||||
std::string name = e->Name();
|
||||
if (name == "cpu") {
|
||||
cpuPowerThresholds.insert(std::map<int, float>::value_type(id, thr));
|
||||
} else if (name == "gpu") {
|
||||
gpuPowerThresholds.insert(std::map<int, float>::value_type(id, thr));
|
||||
} else if (name == "dram") {
|
||||
dramPowerThresholds.insert(std::map<int, float>::value_type(id, thr));
|
||||
} else {
|
||||
printDebugMessage("Unknown element ignored: " + name);
|
||||
}
|
||||
powerThresholds.push_back(thr);
|
||||
}
|
||||
|
||||
showTemperatureSimConfig();
|
||||
@@ -103,12 +87,10 @@ struct TemperatureSimConfig {
|
||||
|
||||
void showTemperatureSimConfig()
|
||||
{
|
||||
for (auto e : cpuPowerThresholds)
|
||||
printDebugMessage("CPU[" + std::to_string(e.first) + "] threshold: " + std::to_string(e.second));
|
||||
for (auto e : gpuPowerThresholds)
|
||||
printDebugMessage("GPU[" + std::to_string(e.first) + "] threshold: " + std::to_string(e.second));
|
||||
for (auto e : dramPowerThresholds)
|
||||
printDebugMessage("DRAM[" + std::to_string(e.first) + "] threshold: " + std::to_string(e.second));
|
||||
int i = 0;
|
||||
for (auto e : powerThresholds) {
|
||||
printDebugMessage("powerThreshold[" + std::to_string(i++) + "]: " + std::to_string(e));
|
||||
}
|
||||
}
|
||||
|
||||
void printDebugMessage(std::string message)
|
||||
|
||||
@@ -495,7 +495,7 @@ unsigned int errorModel::getBit(int row, int column, int byteInColumn, int bitIn
|
||||
|
||||
double errorModel::getTemperature()
|
||||
{
|
||||
double temperature = TemperatureController::getInstance().getTemperature();
|
||||
double temperature = TemperatureController::getInstance().getTemperature(0, 0);
|
||||
return temperature;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,77 +34,104 @@
|
||||
* Matthias Jung
|
||||
*/
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "TemperatureController.h"
|
||||
#include "../controller/core/configuration/Configuration.h"
|
||||
|
||||
void TemperatureController::setTemperature(double t)
|
||||
double TemperatureController::getTemperature(int deviceId, float currentPower)
|
||||
{
|
||||
// XXX
|
||||
// mutex for dynamic temperature simulation
|
||||
temperature = t;
|
||||
if (dynamicTempSimEnabled == true) {
|
||||
// XXX do I need a mutex here?
|
||||
currentPowerValues.at(deviceId) = currentPower;
|
||||
return temperatureValues.at(deviceId);
|
||||
} else {
|
||||
printDebugMessage("Temperature is " + std::to_string(staticTemperature));
|
||||
return staticTemperature;
|
||||
}
|
||||
}
|
||||
|
||||
double TemperatureController::getTemperature()
|
||||
void TemperatureController::updateTemperatures()
|
||||
{
|
||||
printDebugMessage("Temperature is " + std::to_string(temperature));
|
||||
return temperature;
|
||||
thermalSimulation->sendPowerValues(¤tPowerValues);
|
||||
thermalSimulation->simulate();
|
||||
thermalSimulation->getTemperature(temperaturesBuffer, TDICE_OUTPUT_INSTANT_SLOT, TDICE_OUTPUT_TYPE_TCELL, TDICE_OUTPUT_QUANTITY_NONE);
|
||||
// save values just obtained for posterior use
|
||||
temperatureValues = temperaturesBuffer;
|
||||
// clear the buffer, otherwise it will grow every request
|
||||
temperaturesBuffer.clear();
|
||||
}
|
||||
|
||||
double TemperatureController::adjustThermalSimPeriod()
|
||||
{
|
||||
// Temperature Simulation Period Dynamic Adjustment
|
||||
//
|
||||
// 1. Adjustment is requierd when:
|
||||
//
|
||||
// 1.1. The power dissipation of one or more devices change considerably
|
||||
// (reaches the configured threshold for that device in any direction,
|
||||
// i.e. increases or decreases substantially) during the current
|
||||
// simulaiton period.
|
||||
//
|
||||
// 1.1.1. The simulation period will be reduced by a factor of 'n' so the
|
||||
// simulation occurs 'n' times more often.
|
||||
//
|
||||
// 1.1.2. The step 1.1.1 will be repeated until the point that there are
|
||||
// no sustantial changes in power dissipation between two consecutive
|
||||
// executions of the thermal simulation, i.e. all changes for all devices
|
||||
// are less than the configured threshold.
|
||||
//
|
||||
// 1.2. The current simulation period differs from the target period
|
||||
// defined in the configuration by the user.
|
||||
//
|
||||
// 1.2.1 The situation period will be kept for a number of simulation
|
||||
// executions 'ne' and after ne' the period will be increased again in
|
||||
// steps of 'n/2' until it achieves the desired value given by
|
||||
// configuration or the described in 1.1 occurs.
|
||||
|
||||
bool decreaseSimPeriod = false;
|
||||
|
||||
for (unsigned i = 0; i < currentPowerValues.size(); i++) {
|
||||
if (std::abs(lastPowerValues.at(i) - currentPowerValues.at(i)) > powerThresholds.at(i)) {
|
||||
cyclesSinceLastPeriodAdjust = 0;
|
||||
decreaseSimPeriod = true;
|
||||
}
|
||||
}
|
||||
|
||||
lastPowerValues = currentPowerValues;
|
||||
|
||||
if (decreaseSimPeriod == true) {
|
||||
period = period / periodAdjustFactor;
|
||||
printDebugMessage("Thermal Simulation period reduced to " + std::to_string(period) + ". Target is " + std::to_string(targetPeriod));
|
||||
} else {
|
||||
if (period != targetPeriod) {
|
||||
cyclesSinceLastPeriodAdjust++;
|
||||
if (cyclesSinceLastPeriodAdjust >= nPowStableCyclesToIncreasePeriod) {
|
||||
cyclesSinceLastPeriodAdjust = 0;
|
||||
period = period * (periodAdjustFactor / 2);
|
||||
printDebugMessage("Thermal Simulation period increased to " + std::to_string(period) + ". Target is " + std::to_string(targetPeriod));
|
||||
if (period > targetPeriod)
|
||||
period = targetPeriod;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return period;
|
||||
}
|
||||
|
||||
void TemperatureController::temperatureThread()
|
||||
{
|
||||
unsigned int period = Configuration::getInstance().temperatureSim.DynTemperatureSimPeriod;
|
||||
enum sc_time_unit t_unit = Configuration::getInstance().temperatureSim.DynTemperatureSimUnit;
|
||||
|
||||
std::vector<float> powerValues = {
|
||||
0, // 1: CPUs
|
||||
0, // 2: GPU
|
||||
1, // 3: BASEBAND1
|
||||
0, // 4: BASEBAND2
|
||||
0, // 5: LLCACHE
|
||||
0, // 6: DRAMCTRL1
|
||||
0, // 7: DRAMCTRL2
|
||||
0, // 8: TSVs
|
||||
0, // 9: ACELLERATORS
|
||||
1, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0, //14: TSVs
|
||||
0, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0, //14: TSVs
|
||||
0, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0, //14: TSVs
|
||||
0, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0 //14: TSVs
|
||||
};
|
||||
|
||||
while (true) {
|
||||
std::vector<float> temperatureValues;
|
||||
updateTemperatures();
|
||||
double p = adjustThermalSimPeriod();
|
||||
|
||||
thermalSimulation->sendPowerValues(&powerValues);
|
||||
int i = 0;
|
||||
for (auto t : temperatureValues) {
|
||||
printDebugMessage("Temperature[" + std::to_string(i++) + "] is " + std::to_string(t));
|
||||
}
|
||||
printDebugMessage("Thermal simulation period is " + std::to_string(p));
|
||||
|
||||
thermalSimulation->simulate();
|
||||
|
||||
thermalSimulation->getTemperature(temperatureValues, TDICE_OUTPUT_INSTANT_SLOT, TDICE_OUTPUT_TYPE_TCELL, TDICE_OUTPUT_QUANTITY_NONE);
|
||||
|
||||
for (auto t : temperatureValues)
|
||||
printDebugMessage("Temperature is " + std::to_string(t));
|
||||
|
||||
// TODO
|
||||
// store temperatures
|
||||
// evaluate thresholds
|
||||
// take a look in all the available getTemperature() options
|
||||
|
||||
wait(sc_time(period, t_unit));
|
||||
wait(sc_time(p, t_unit));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,32 +62,85 @@ public:
|
||||
|
||||
std::string ip = Configuration::getInstance().temperatureSim.IceServerIp;
|
||||
unsigned int port = Configuration::getInstance().temperatureSim.IceServerPort;
|
||||
thermalSimulation = new IceWrapper(ip, port);
|
||||
|
||||
printDebugMessage("Dynamic temperature simulation. Server @ " + ip + ":" + std::to_string(port));
|
||||
|
||||
thermalSimulation = new IceWrapper(ip, port);
|
||||
// get from config the initial power dissipation value
|
||||
currentPowerValues = {
|
||||
0, // 1: CPUs
|
||||
0, // 2: GPU
|
||||
1, // 3: BASEBAND1
|
||||
0, // 4: BASEBAND2
|
||||
0, // 5: LLCACHE
|
||||
0, // 6: DRAMCTRL1
|
||||
0, // 7: DRAMCTRL2
|
||||
0, // 8: TSVs
|
||||
0, // 9: ACELLERATORS
|
||||
1, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0, //14: TSVs
|
||||
0, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0, //14: TSVs
|
||||
0, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0, //14: TSVs
|
||||
0, //10: C0
|
||||
0, //11: C1
|
||||
0, //12: C2
|
||||
0, //13: C3
|
||||
0 //14: TSVs
|
||||
};
|
||||
|
||||
lastPowerValues = currentPowerValues;
|
||||
|
||||
powerThresholds = Configuration::getInstance().temperatureSim.powerThresholds;
|
||||
periodAdjustFactor = 10;
|
||||
nPowStableCyclesToIncreasePeriod = 5;
|
||||
cyclesSinceLastPeriodAdjust = 0;
|
||||
|
||||
targetPeriod = Configuration::getInstance().temperatureSim.DynTemperatureSimPeriod;
|
||||
period = targetPeriod;
|
||||
t_unit = Configuration::getInstance().temperatureSim.DynTemperatureSimUnit;
|
||||
SC_THREAD(temperatureThread);
|
||||
|
||||
} else {
|
||||
double temperature = Configuration::getInstance().temperatureSim.StaticTemperatureDefaultValue;
|
||||
setTemperature(temperature);
|
||||
printDebugMessage("Static temperature simulation. Temperature set to " + std::to_string(temperature));
|
||||
staticTemperature = Configuration::getInstance().temperatureSim.StaticTemperatureDefaultValue;
|
||||
printDebugMessage("Static temperature simulation. Temperature set to " + std::to_string(staticTemperature));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO
|
||||
// Implement many signatures for getTemperature():
|
||||
// - by (x,y,z) coordinates
|
||||
// - device average temperature
|
||||
double getTemperature();
|
||||
|
||||
double getTemperature(int deviceId, float currentPower);
|
||||
|
||||
private:
|
||||
IceWrapper *thermalSimulation;
|
||||
double temperature;
|
||||
double staticTemperature;
|
||||
|
||||
bool dynamicTempSimEnabled;
|
||||
|
||||
void setTemperature(double t);
|
||||
IceWrapper *thermalSimulation;
|
||||
std::vector<float> temperaturesBuffer;
|
||||
std::vector<float> temperatureValues;
|
||||
|
||||
std::vector<float> currentPowerValues;
|
||||
std::vector<float> lastPowerValues;
|
||||
std::vector<float> powerThresholds;
|
||||
|
||||
double targetPeriod;
|
||||
double period;
|
||||
enum sc_time_unit t_unit;
|
||||
void temperatureThread();
|
||||
void updateTemperatures();
|
||||
double adjustThermalSimPeriod();
|
||||
unsigned int periodAdjustFactor;
|
||||
unsigned int cyclesSinceLastPeriodAdjust;
|
||||
unsigned int nPowStableCyclesToIncreasePeriod;
|
||||
|
||||
void printDebugMessage(std::string message);
|
||||
};
|
||||
|
||||
@@ -249,7 +249,7 @@ Below are listed the configuration sections and configuration fields.
|
||||
- **Temperature Simulator Configuration**
|
||||
- *StaticTemperatureDefaultValue* (int)
|
||||
- Temperature value for simulations with static temperature
|
||||
- *DynTemperatureSimPeriod* (unsigned int)
|
||||
- *DynTemperatureSimPeriod* (double)
|
||||
- Period of the dynamic temperature simulation
|
||||
- *DynTemperatureSimUnit* (string)
|
||||
- "s": seconds
|
||||
|
||||
Reference in New Issue
Block a user