diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index b25ffbf79f..48f450c2b2 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -678,10 +678,13 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const uint64_t regs_size = regs.size(); uint64_t doorbells_size = doorbells.size(); uint64_t sdma_engs_size = sdmaEngs.size(); + uint64_t used_vmid_map_size = usedVMIDs.size(); SERIALIZE_SCALAR(regs_size); SERIALIZE_SCALAR(doorbells_size); SERIALIZE_SCALAR(sdma_engs_size); + // Save the number of vmids used + SERIALIZE_SCALAR(used_vmid_map_size); // Make a c-style array of the regs to serialize uint32_t reg_addrs[regs_size]; @@ -690,6 +693,9 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const QueueType doorbells_queues[doorbells_size]; uint32_t sdma_engs_offset[sdma_engs_size]; int sdma_engs[sdma_engs_size]; + int used_vmids[used_vmid_map_size]; + int used_queue_id_sizes[used_vmid_map_size]; + std::vector used_vmid_sets; int idx = 0; for (auto & it : regs) { @@ -712,6 +718,20 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const ++idx; } + idx = 0; + for (auto & it : usedVMIDs) { + used_vmids[idx] = it.first; + used_queue_id_sizes[idx] = it.second.size(); + std::vector set_vector(it.second.begin(), it.second.end()); + used_vmid_sets.insert(used_vmid_sets.end(), + set_vector.begin(), set_vector.end()); + ++idx; + } + + int num_queue_id = used_vmid_sets.size(); + int* vmid_array = new int[num_queue_id]; + std::copy(used_vmid_sets.begin(), used_vmid_sets.end(), vmid_array); + SERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0])); SERIALIZE_ARRAY(reg_values, sizeof(reg_values)/sizeof(reg_values[0])); SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/ @@ -721,10 +741,21 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/ sizeof(sdma_engs_offset[0])); SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0])); + // Save the vmids used in an array + SERIALIZE_ARRAY(used_vmids, sizeof(used_vmids)/sizeof(used_vmids[0])); + // Save the size of the set of queue ids mapped to each vmid + SERIALIZE_ARRAY(used_queue_id_sizes, + sizeof(used_queue_id_sizes)/sizeof(used_queue_id_sizes[0])); + // Save all the queue ids used for all the vmids + SERIALIZE_ARRAY(vmid_array, num_queue_id); + // Save the total number of queue idsused + SERIALIZE_SCALAR(num_queue_id); // Serialize the device memory deviceMem.serializeSection(cp, "deviceMem"); gpuvm.serializeSection(cp, "GPUVM"); + + delete[] vmid_array; } void @@ -736,10 +767,13 @@ AMDGPUDevice::unserialize(CheckpointIn &cp) uint64_t regs_size = 0; uint64_t doorbells_size = 0; uint64_t sdma_engs_size = 0; + uint64_t used_vmid_map_size = 0; UNSERIALIZE_SCALAR(regs_size); UNSERIALIZE_SCALAR(doorbells_size); UNSERIALIZE_SCALAR(sdma_engs_size); + UNSERIALIZE_SCALAR(used_vmid_map_size); + if (regs_size > 0) { uint32_t reg_addrs[regs_size]; @@ -786,6 +820,33 @@ AMDGPUDevice::unserialize(CheckpointIn &cp) } } + if (used_vmid_map_size > 0) { + int used_vmids[used_vmid_map_size]; + int used_queue_id_sizes[used_vmid_map_size]; + int num_queue_id = 0; + std::vector used_vmid_sets; + // Extract the total number of queue ids used + UNSERIALIZE_SCALAR(num_queue_id); + int* vmid_array = new int[num_queue_id]; + // Extract the number of vmids used + UNSERIALIZE_ARRAY(used_vmids, used_vmid_map_size); + // Extract the size of the queue id set for each vmid + UNSERIALIZE_ARRAY(used_queue_id_sizes, used_vmid_map_size); + // Extract all the queue ids used + UNSERIALIZE_ARRAY(vmid_array, num_queue_id); + // Populate the usedVMIDs map with the queue ids per vm + int idx = 0; + for (int it = 0; it < used_vmid_map_size; it++) { + int vmid = used_vmids[it]; + int vmid_set_size = used_queue_id_sizes[it]; + for (int j = 0; j < vmid_set_size; j++) { + usedVMIDs[vmid].insert(vmid_array[idx + j]); + } + idx += vmid_set_size; + } + delete[] vmid_array; + } + // Unserialize the device memory deviceMem.unserializeSection(cp, "deviceMem"); gpuvm.unserializeSection(cp, "GPUVM"); diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index 1e889799ef..352af400b0 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -1197,8 +1197,6 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp) PM4MapQueues* pkt = new PM4MapQueues; memset(pkt, 0, sizeof(PM4MapQueues)); newQueue(mqd, offset[i], pkt, id[i]); - PM4Queue *new_q = queuesMap[offset[i]]; - gpuDevice->insertQId(gpuDevice->lastVMID(), new_q->id()); if (ib[i]) { queues[id[i]]->wptr(ib_wptr[i]);