ETH官方钱包

前往
大廳
主題 達人專欄

Vulkan 學(xué)習(xí)筆記 Index buffer與staging buffer

%%鼠 拒收病婿 | 2024-12-25 20:18:49 | 巴幣 2028 | 人氣 589

上一篇是建立vertex buffer,這一篇一來建立index buffer,並把buffer轉(zhuǎn)移到GPU local的記憶體上。

Staging buffer

Vulkan需要Staging buffer的主要原因是為了提高圖形數(shù)據(jù)的傳輸效率。當圖形數(shù)據(jù)從CPU傳輸?shù)紾PU時,Staging buffer作為中介,可以減少直接傳輸?shù)念l繁性和延遲。這樣可以更有效地利用系統(tǒng)的資源,提高整體性能。

概念

  • 配置兩種記憶體空間:CPU-to-GPU、及GPU only,第一個透過map方式把data上傳到GPU,接著用buffer copy的方式把它轉(zhuǎn)到GPU local的記憶體上。
  • VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT  : 僅GPU能用,CPU無法access。
  • 原理:先從CPU copy到GPU,再從GPU copy到GPU local location
    • buffer copy command 需要 transfer queue (VK_QUEUE_TRANSFER_BIT),但VK_QUEUE_GRAPHICS_BIT 與VK_QUEUE_COMPUTE_BIT 已支援VK_QUEUE_TRANSFER_BIT 操作,因此無須額外增加queue family。


Without Staging buffer


void ltn::Model::createVBO()
{
    VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size();

    createBuffer(
        coreInstance.get_device(),
        coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, vertexBuffer,
        vertexBufferMemory);


    //copy data
    void* data;
    vkMapMemory(coreInstance.get_device(), vertexBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, vertices.data(), (size_t)bufferSize);
    vkUnmapMemory(coreInstance.get_device(), vertexBufferMemory);
}

With Staging buffer

程式碼會變長一點

void ltn::Model::createVBO()
{
    VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size();

    VkBuffer stagingBuffer;
    VkDeviceMemory stagingBufferMemory;
    createBuffer(
        coreInstance.get_device(),
        coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
        stagingBuffer, stagingBufferMemory);

    //Copy to Staging buffer
    void* data;
    vkMapMemory(coreInstance.get_device(), stagingBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, vertices.data(), (size_t)bufferSize);
    vkUnmapMemory(coreInstance.get_device(), stagingBufferMemory);

    // Allocate a memory on GPU that CPU can not access
    createBuffer(
        coreInstance.get_device(),
        coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
        vertexBuffer,
        vertexBufferMemory);

    copyBuffer(
        coreInstance.get_device(),
        coreInstance.get_queuefailmy_indexs()->graphic_queuefamily_index.value(),
        coreInstance.graphic_queue(),
        stagingBuffer,
        vertexBuffer,
        bufferSize);

    vkDestroyBuffer(coreInstance.get_device(), stagingBuffer, nullptr);
    vkFreeMemory(coreInstance.get_device(), stagingBufferMemory, nullptr);
}


注意事項:

實務(wù)上不會每組vertex都建立buffer,能建立的buffer數(shù)量( maxMemoryAllocationCount ) 可能小於4096個。應(yīng)該搭配VulkanMemoryAllocator,使用offset欄位將資料pack在一起。



Index Buffer


Hard-code的index資料:
const std::vector<uint16_t> indices = {
     0, 1, 2, 2, 3, 0
};

一樣透過staging buffer的方式搬到GPU

void ltn::Model::createIndexBuffer()
{
    VkDeviceSize bufferSize = sizeof(indices[0]) * indices.size();

    VkBuffer stagingBuffer;
    VkDeviceMemory stagingBufferMemory;
    createBuffer(coreInstance.get_device(),coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
        stagingBuffer, stagingBufferMemory);

    void* data;
    vkMapMemory(coreInstance.get_device(), stagingBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, indices.data(), (size_t)bufferSize);
    vkUnmapMemory(coreInstance.get_device(), stagingBufferMemory);

    //GPU local buffer
    createBuffer(coreInstance.get_device(), coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory);

    //Copy from staging buffer
    copyBuffer(
        coreInstance.get_device(),
        coreInstance.get_queuefailmy_indexs()->graphic_queuefamily_index.value(),
        coreInstance.graphic_queue(),stagingBuffer, indexBuffer, bufferSize);

    vkDestroyBuffer(coreInstance.get_device(), stagingBuffer, nullptr);
    vkFreeMemory(coreInstance.get_device(), stagingBufferMemory, nullptr);
}

每次draw時,bind vertex與index buffer,並改使用vkCmdDrawIndexed
//vertex
VkBuffer vertexBuffers[] = { vertexBuffer };
VkDeviceSize offsets[] = { 0 };
vkCmdBindVertexBuffers(cmdbuffer, 0, 1, vertexBuffers, offsets);

//index
vkCmdBindIndexBuffer(cmdbuffer, indexBuffer, 0, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmdbuffer, static_cast<uint32_t>(indices.size()), 1, 0, 0, 0);

備註

index 可以是uint16_t 或 uint32_t ,看index需要的數(shù)量。


其他: Copy Buffer

概念: 建立臨時的command pool與command buffer,錄製一個"vkCmdCopyBuffer"指令,然後submit到支援transfer的queue (graphic或present都可以)

static void copyBuffer( VkDevice device , uint32_t graphics_queue_family , VkQueue graphics_queue ,  VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) {
    // Create temp command pool
    VkCommandPool commandPool;
    VkCommandPoolCreateInfo poolInfo{};
    poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    poolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
    poolInfo.queueFamilyIndex = graphics_queue_family;
    

    if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) {
        throw std::runtime_error("failed to create graphics command pool!");
    }
    
    // Copy buffer
    VkCommandBufferAllocateInfo allocInfo{};
    allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    allocInfo.commandPool = commandPool;
    allocInfo.commandBufferCount = 1;

    VkCommandBuffer commandBuffer;
    vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer);

    VkCommandBufferBeginInfo beginInfo{};
    beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;  // this cmdbuf only used once
    
    vkBeginCommandBuffer(commandBuffer, &beginInfo);
    VkBufferCopy copyRegion{};
    copyRegion.srcOffset = 0; // Optional
    copyRegion.dstOffset = 0; // Optional
    copyRegion.size = size;
    vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ?Region);
    vkEndCommandBuffer(commandBuffer);

    VkSubmitInfo submitInfo{};
    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submitInfo.commandBufferCount = 1;
    submitInfo.pCommandBuffers = &commandBuffer;

    vkQueueSubmit(graphics_queue, 1, &submitInfo, VK_NULL_HANDLE);
    vkQueueWaitIdle(graphics_queue); // wait for the transfer queue to become idle
    vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer);
    vkDestroyCommandPool(device , commandPool,nullptr);

}




後記: 趕快學(xué)完,趕快回去做老本行(?)

送禮物贊助創(chuàng)作者 !
0
留言

創(chuàng)作回應(yīng)

qsnz
加油 Vulkan是好東西,只管玩遊戲的我覺得用Vulkan API的遊戲資源利用率比較好 同樣耗能下畫面呈現(xiàn)也比較優(yōu)秀,臺灣蠻少人會去研究/學(xué)這塊
2024-12-26 16:45:58
%%鼠 拒收病婿
感謝。
Vulkan程式如果沒有管理跟優(yōu)化好,可能性能會比Opengl還慘。這大概要花好幾年去精通了
2025-01-08 18:49:03

相關(guān)創(chuàng)作

更多創(chuàng)作