Staging buffer
概念
- 配置兩種記憶體空間:CPU-to-GPU、及GPU only,第一個透過map方式把data上傳到GPU,接著用buffer copy的方式把它轉(zhuǎn)到GPU local的記憶體上。
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 僅GPU能用,CPU無法access。
- 原理:先從CPU copy到GPU,再從GPU copy到GPU local location
- buffer copy command 需要 transfer queue (VK_QUEUE_TRANSFER_BIT),但VK_QUEUE_GRAPHICS_BIT 與VK_QUEUE_COMPUTE_BIT 已支援VK_QUEUE_TRANSFER_BIT 操作,因此無須額外增加queue family。
Without Staging buffer
void ltn::Model::createVBO() { VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size(); createBuffer( coreInstance.get_device(), coreInstance.get_physical_device(), bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, vertexBuffer, vertexBufferMemory); //copy data void* data; vkMapMemory(coreInstance.get_device(), vertexBufferMemory, 0, bufferSize, 0, &data); memcpy(data, vertices.data(), (size_t)bufferSize); vkUnmapMemory(coreInstance.get_device(), vertexBufferMemory); } |
With Staging buffer
void ltn::Model::createVBO() { VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size(); VkBuffer stagingBuffer; VkDeviceMemory stagingBufferMemory; createBuffer( coreInstance.get_device(), coreInstance.get_physical_device(), bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); //Copy to Staging buffer void* data; vkMapMemory(coreInstance.get_device(), stagingBufferMemory, 0, bufferSize, 0, &data); memcpy(data, vertices.data(), (size_t)bufferSize); vkUnmapMemory(coreInstance.get_device(), stagingBufferMemory); // Allocate a memory on GPU that CPU can not access createBuffer( coreInstance.get_device(), coreInstance.get_physical_device(), bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertexBuffer, vertexBufferMemory); copyBuffer( coreInstance.get_device(), coreInstance.get_queuefailmy_indexs()->graphic_queuefamily_index.value(), coreInstance.graphic_queue(), stagingBuffer, vertexBuffer, bufferSize); vkDestroyBuffer(coreInstance.get_device(), stagingBuffer, nullptr); vkFreeMemory(coreInstance.get_device(), stagingBufferMemory, nullptr); } |
注意事項:
Index Buffer
const std::vector<uint16_t> indices = { 0, 1, 2, 2, 3, 0 }; |
void ltn::Model::createIndexBuffer() { VkDeviceSize bufferSize = sizeof(indices[0]) * indices.size(); VkBuffer stagingBuffer; VkDeviceMemory stagingBufferMemory; createBuffer(coreInstance.get_device(),coreInstance.get_physical_device(), bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory); void* data; vkMapMemory(coreInstance.get_device(), stagingBufferMemory, 0, bufferSize, 0, &data); memcpy(data, indices.data(), (size_t)bufferSize); vkUnmapMemory(coreInstance.get_device(), stagingBufferMemory); //GPU local buffer createBuffer(coreInstance.get_device(), coreInstance.get_physical_device(), bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory); //Copy from staging buffer copyBuffer( coreInstance.get_device(), coreInstance.get_queuefailmy_indexs()->graphic_queuefamily_index.value(), coreInstance.graphic_queue(),stagingBuffer, indexBuffer, bufferSize); vkDestroyBuffer(coreInstance.get_device(), stagingBuffer, nullptr); vkFreeMemory(coreInstance.get_device(), stagingBufferMemory, nullptr); } |
//vertex VkBuffer vertexBuffers[] = { vertexBuffer }; VkDeviceSize offsets[] = { 0 }; vkCmdBindVertexBuffers(cmdbuffer, 0, 1, vertexBuffers, offsets); //index vkCmdBindIndexBuffer(cmdbuffer, indexBuffer, 0, VK_INDEX_TYPE_UINT16); vkCmdDrawIndexed(cmdbuffer, static_cast<uint32_t>(indices.size()), 1, 0, 0, 0); |
備註
其他: Copy Buffer
static void copyBuffer( VkDevice device , uint32_t graphics_queue_family , VkQueue graphics_queue , VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) { // Create temp command pool VkCommandPool commandPool; VkCommandPoolCreateInfo poolInfo{}; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; poolInfo.queueFamilyIndex = graphics_queue_family; if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) { throw std::runtime_error("failed to create graphics command pool!"); } // Copy buffer VkCommandBufferAllocateInfo allocInfo{}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandPool = commandPool; allocInfo.commandBufferCount = 1; VkCommandBuffer commandBuffer; vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer); VkCommandBufferBeginInfo beginInfo{}; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; // this cmdbuf only used once vkBeginCommandBuffer(commandBuffer, &beginInfo); VkBufferCopy copyRegion{}; copyRegion.srcOffset = 0; // Optional copyRegion.dstOffset = 0; // Optional copyRegion.size = size; vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ?Region); vkEndCommandBuffer(commandBuffer); VkSubmitInfo submitInfo{}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submitInfo.commandBufferCount = 1; submitInfo.pCommandBuffers = &commandBuffer; vkQueueSubmit(graphics_queue, 1, &submitInfo, VK_NULL_HANDLE); vkQueueWaitIdle(graphics_queue); // wait for the transfer queue to become idle vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer); vkDestroyCommandPool(device , commandPool,nullptr); } |
後記: 趕快學(xué)完,趕快回去做老本行(?)