Przeglądaj źródła

GPU: Query UnrestrictedBufferTextureCopyPitchSupported to avoid D3D12 realignment copies

(cherry picked from commit 742a6fd092836796d26a33590ef57d852142bf14)
Caleb Cornett 1 miesiąc temu
rodzic
commit
de5e0f1784
2 zmienionych plików z 52 dodań i 22 usunięć
  1. 6 5
      include/SDL3/SDL_gpu.h
  2. 46 17
      src/gpu/d3d12/SDL_gpu_d3d12.c

+ 6 - 5
include/SDL3/SDL_gpu.h

@@ -1398,11 +1398,12 @@ typedef struct SDL_GPUViewport
  * SDL_DownloadFromGPUTexture are used as default values respectively and data
  * is considered to be tightly packed.
  *
- * **WARNING**: Direct3D 12 requires texture data row pitch to be 256 byte
- * aligned, and offsets to be aligned to 512 bytes. If they are not, SDL will
- * make a temporary copy of the data that is properly aligned, but this adds
- * overhead to the transfer process. Apps can avoid this by aligning their
- * data appropriately, or using a different GPU backend than Direct3D 12.
+ * **WARNING**: On some older/integrated hardware, Direct3D 12 requires texture
+ * data row pitch to be 256 byte aligned, and offsets to be aligned to 512 bytes.
+ * If they are not, SDL will make a temporary copy of the data that is properly
+ * aligned, but this adds overhead to the transfer process. Apps can avoid this
+ * by aligning their data appropriately, or using a different GPU backend than
+ * Direct3D 12.
  *
  * \since This struct is available since SDL 3.2.0.
  *

+ 46 - 17
src/gpu/d3d12/SDL_gpu_d3d12.c

@@ -897,6 +897,7 @@ struct D3D12Renderer
 
     bool debug_mode;
     bool GPUUploadHeapSupported;
+    bool UnrestrictedBufferTextureCopyPitchSupported;
     // FIXME: these might not be necessary since we're not using custom heaps
     bool UMA;
     bool UMACacheCoherent;
@@ -5914,6 +5915,7 @@ static void D3D12_UploadToTexture(
     bool cycle)
 {
     D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
+    D3D12Renderer *renderer = (D3D12Renderer *)d3d12CommandBuffer->renderer;
     D3D12BufferContainer *transferBufferContainer = (D3D12BufferContainer *)source->transfer_buffer;
     D3D12Buffer *temporaryBuffer = NULL;
     D3D12_TEXTURE_COPY_LOCATION sourceLocation;
@@ -5941,11 +5943,12 @@ static void D3D12_UploadToTexture(
         cycle,
         D3D12_RESOURCE_STATE_COPY_DEST);
 
-    /* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane.
-     * Instead of exposing that restriction to the client, which is a huge rake to step on,
-     * and a restriction that no other backend requires, we're going to copy data to a temporary buffer,
-     * copy THAT data to the texture, and then get rid of the temporary buffer ASAP.
-     * If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that.
+    /* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires
+     * texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing
+     * that restriction to the client, which is a huge rake to step on, and a restriction that no
+     * other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the
+     * texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and
+     * depth pitch are already aligned, we can skip all of that.
      *
      * D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well.
      *
@@ -5967,10 +5970,16 @@ static void D3D12_UploadToTexture(
 
     bytesPerSlice = rowsPerSlice * rowPitch;
 
-    alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize;
-    alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-    needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch;
-    needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
+    if (renderer->UnrestrictedBufferTextureCopyPitchSupported) {
+        alignedRowPitch = rowPitch;
+        needsRealignment = false;
+        needsPlacementCopy = false;
+    } else {
+        alignedRowPitch = (destination->w + (blockWidth - 1)) / blockWidth * blockSize;
+        alignedRowPitch = D3D12_INTERNAL_Align(alignedRowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+        needsRealignment = rowsPerSlice != destination->h || rowPitch != alignedRowPitch;
+        needsPlacementCopy = source->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
+    }
 
     alignedBytesPerSlice = alignedRowPitch * destination->h;
 
@@ -6249,6 +6258,7 @@ static void D3D12_DownloadFromTexture(
     const SDL_GPUTextureTransferInfo *destination)
 {
     D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
+    D3D12Renderer *renderer = d3d12CommandBuffer->renderer;
     D3D12_TEXTURE_COPY_LOCATION sourceLocation;
     D3D12_TEXTURE_COPY_LOCATION destinationLocation;
     Uint32 pixelsPerRow = destination->pixels_per_row;
@@ -6266,11 +6276,12 @@ static void D3D12_DownloadFromTexture(
     D3D12BufferContainer *destinationContainer = (D3D12BufferContainer *)destination->transfer_buffer;
     D3D12Buffer *destinationBuffer = destinationContainer->activeBuffer;
 
-    /* D3D12 requires texture data row pitch to be 256 byte aligned, which is obviously insane.
-     * Instead of exposing that restriction to the client, which is a huge rake to step on,
-     * and a restriction that no other backend requires, we're going to copy data to a temporary buffer,
-     * copy THAT data to the texture, and then get rid of the temporary buffer ASAP.
-     * If we're lucky and the row pitch and depth pitch are already aligned, we can skip all of that.
+    /* Unless the UnrestrictedBufferTextureCopyPitchSupported feature is supported, D3D12 requires
+     * texture data row pitch to be 256 byte aligned, which is obviously insane. Instead of exposing
+     * that restriction to the client, which is a huge rake to step on, and a restriction that no
+     * other backend requires, we're going to copy data to a temporary buffer, copy THAT data to the
+     * texture, and then get rid of the temporary buffer ASAP. If we're lucky and the row pitch and
+     * depth pitch are already aligned, we can skip all of that.
      *
      * D3D12 also requires offsets to be 512 byte aligned. We'll fix that for the client and warn them as well.
      *
@@ -6290,9 +6301,15 @@ static void D3D12_DownloadFromTexture(
         rowsPerSlice = source->h;
     }
 
-    alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
-    needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch;
-    needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
+    if (renderer->UnrestrictedBufferTextureCopyPitchSupported) {
+        alignedRowPitch = rowPitch;
+        needsRealignment = false;
+        needsPlacementCopy = false;
+    } else {
+        alignedRowPitch = D3D12_INTERNAL_Align(rowPitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
+        needsRealignment = rowsPerSlice != source->h || rowPitch != alignedRowPitch;
+        needsPlacementCopy = destination->offset % D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT != 0;
+    }
 
     sourceLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
     sourceLocation.SubresourceIndex = sourceSubresource->index;
@@ -9149,6 +9166,18 @@ static SDL_GPUDevice *D3D12_CreateDevice(bool debugMode, bool preferLowPower, SD
     }
 #endif
 
+    // Check for unrestricted texture-buffer copy pitch support
+    D3D12_FEATURE_DATA_D3D12_OPTIONS13 options13;
+    res = ID3D12Device_CheckFeatureSupport(
+        renderer->device,
+        D3D12_FEATURE_D3D12_OPTIONS13,
+        &options13,
+        sizeof(options13));
+
+    if (SUCCEEDED(res)) {
+        renderer->UnrestrictedBufferTextureCopyPitchSupported = options13.UnrestrictedBufferTextureCopyPitchSupported;
+    }
+
     // Create command queue
 #if (defined(SDL_PLATFORM_XBOXONE) || defined(SDL_PLATFORM_XBOXSERIES))
     if (s_CommandQueue != NULL) {