主題達人專欄

Vulkan 學(xué)習(xí)筆記 Index buffer與staging buffer

%%鼠拒收病婿 | 2024-12-25 20:18:49 | 巴幣 2028 | 人氣 589

上一篇是建立vertex buffer，這一篇一來建立index buffer，並把buffer轉(zhuǎn)移到GPU local的記憶體上。

Staging buffer

Vulkan需要Staging buffer的主要原因是為了提高圖形數(shù)據(jù)的傳輸效率。當圖形數(shù)據(jù)從CPU傳輸?shù)紾PU時，Staging buffer作為中介，可以減少直接傳輸?shù)念l繁性和延遲。這樣可以更有效地利用系統(tǒng)的資源，提高整體性能。

概念

配置兩種記憶體空間：CPU-to-GPU、及GPU only，第一個透過map方式把data上傳到GPU，接著用buffer copy的方式把它轉(zhuǎn)到GPU local的記憶體上。
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 僅GPU能用，CPU無法access。
原理：先從CPU copy到GPU，再從GPU copy到GPU local location
- buffer copy command 需要 transfer queue (VK_QUEUE_TRANSFER_BIT)，但VK_QUEUE_GRAPHICS_BIT 與VK_QUEUE_COMPUTE_BIT 已支援VK_QUEUE_TRANSFER_BIT 操作，因此無須額外增加queue family。

Without Staging buffer

void ltn::Model::createVBO()
{
    VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size();

    createBuffer(
        coreInstance.get_device(),
        coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, vertexBuffer,
        vertexBufferMemory);

    //copy data
    void* data;
    vkMapMemory(coreInstance.get_device(), vertexBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, vertices.data(), (size_t)bufferSize);
    vkUnmapMemory(coreInstance.get_device(), vertexBufferMemory);
}

With Staging buffer

程式碼會變長一點

void ltn::Model::createVBO()
{
    VkDeviceSize bufferSize = sizeof(vertices[0]) * vertices.size();

    VkBuffer stagingBuffer;
    VkDeviceMemory stagingBufferMemory;
    createBuffer(
        coreInstance.get_device(),
        coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
        stagingBuffer, stagingBufferMemory);

    //Copy to Staging buffer
    void* data;
    vkMapMemory(coreInstance.get_device(), stagingBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, vertices.data(), (size_t)bufferSize);
    vkUnmapMemory(coreInstance.get_device(), stagingBufferMemory);

    // Allocate a memory on GPU that CPU can not access
    createBuffer(
        coreInstance.get_device(),
        coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
        vertexBuffer,
        vertexBufferMemory);

    copyBuffer(
        coreInstance.get_device(),
        coreInstance.get_queuefailmy_indexs()->graphic_queuefamily_index.value(),
        coreInstance.graphic_queue(),
        stagingBuffer,
        vertexBuffer,
        bufferSize);

    vkDestroyBuffer(coreInstance.get_device(), stagingBuffer, nullptr);
    vkFreeMemory(coreInstance.get_device(), stagingBufferMemory, nullptr);
}

注意事項:

實務(wù)上不會每組vertex都建立buffer，能建立的buffer數(shù)量( maxMemoryAllocationCount ) 可能小於4096個。應(yīng)該搭配VulkanMemoryAllocator，使用offset欄位將資料pack在一起。

Index Buffer

Hard-code的index資料:

const std::vector<uint16_t> indices = {
0, 1, 2, 2, 3, 0
};

一樣透過staging buffer的方式搬到GPU

void ltn::Model::createIndexBuffer()
{
    VkDeviceSize bufferSize = sizeof(indices[0]) * indices.size();

    VkBuffer stagingBuffer;
    VkDeviceMemory stagingBufferMemory;
    createBuffer(coreInstance.get_device(),coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
        stagingBuffer, stagingBufferMemory);

    void* data;
    vkMapMemory(coreInstance.get_device(), stagingBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, indices.data(), (size_t)bufferSize);
    vkUnmapMemory(coreInstance.get_device(), stagingBufferMemory);

    //GPU local buffer
    createBuffer(coreInstance.get_device(), coreInstance.get_physical_device(),
        bufferSize,
        VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, indexBuffer, indexBufferMemory);

    //Copy from staging buffer
    copyBuffer(
        coreInstance.get_device(),
        coreInstance.get_queuefailmy_indexs()->graphic_queuefamily_index.value(),
        coreInstance.graphic_queue(),stagingBuffer, indexBuffer, bufferSize);

    vkDestroyBuffer(coreInstance.get_device(), stagingBuffer, nullptr);
    vkFreeMemory(coreInstance.get_device(), stagingBufferMemory, nullptr);
}

每次draw時，bind vertex與index buffer，並改使用vkCmdDrawIndexed

//vertex
VkBuffer vertexBuffers[] = { vertexBuffer };
VkDeviceSize offsets[] = { 0 };
vkCmdBindVertexBuffers(cmdbuffer, 0, 1, vertexBuffers, offsets);

//index
vkCmdBindIndexBuffer(cmdbuffer, indexBuffer, 0, VK_INDEX_TYPE_UINT16);
vkCmdDrawIndexed(cmdbuffer, static_cast<uint32_t>(indices.size()), 1, 0, 0, 0);

備註

index 可以是uint16_t 或 uint32_t ，看index需要的數(shù)量。

其他: Copy Buffer

概念: 建立臨時的command pool與command buffer，錄製一個"vkCmdCopyBuffer"指令，然後submit到支援transfer的queue (graphic或present都可以)

static void copyBuffer( VkDevice device , uint32_t graphics_queue_family , VkQueue graphics_queue ,  VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) {
    // Create temp command pool
    VkCommandPool commandPool;
    VkCommandPoolCreateInfo poolInfo{};
    poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    poolInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
    poolInfo.queueFamilyIndex = graphics_queue_family;


    if (vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool) != VK_SUCCESS) {
        throw std::runtime_error("failed to create graphics command pool!");
    }

    // Copy buffer
    VkCommandBufferAllocateInfo allocInfo{};
    allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    allocInfo.commandPool = commandPool;
    allocInfo.commandBufferCount = 1;

    VkCommandBuffer commandBuffer;
    vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer);

    VkCommandBufferBeginInfo beginInfo{};
    beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
    beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;  // this cmdbuf only used once

    vkBeginCommandBuffer(commandBuffer, &beginInfo);
    VkBufferCopy copyRegion{};
    copyRegion.srcOffset = 0; // Optional
    copyRegion.dstOffset = 0; // Optional
    copyRegion.size = size;
    vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ?Region);
    vkEndCommandBuffer(commandBuffer);

    VkSubmitInfo submitInfo{};
    submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submitInfo.commandBufferCount = 1;
    submitInfo.pCommandBuffers = &commandBuffer;

    vkQueueSubmit(graphics_queue, 1, &submitInfo, VK_NULL_HANDLE);
    vkQueueWaitIdle(graphics_queue); // wait for the transfer queue to become idle
    vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer);
    vkDestroyCommandPool(device , commandPool,nullptr);

}

Github code : https://github.com/Lontoone/LearnVulkan/releases/tag/simple_ubo

後記: 趕快學(xué)完，趕快回去做老本行(?)