Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit 31f6a70

Browse files
committed
Update pal from commit fa251280
Updates to ExecuteIndirect on Gfx9 Update submodule address-lib Update submodule devdriver Remove supportReleaseAcquireInterface from DeviceProperties CacheSyncOps related change in gfx9 HWL expose AmdgpuCsCtxOverridePriority GPUProfiler Forward the SQTT control flags Add supportMixedSignIntDot to DeviceProperties Update meshDispatchDimsReg by Task Shader in RPM GenerateCmdDisptachTaskMesh on Gfx11 Put Task Shader AceChunks into CmdIf for Conditional Rendering [CodeQL] textwrite template update & rpm regen Add function GenLogFilename Improve the refresh rate precision Wide char path fixes The CmdBufferLogger output for CmdResolveImage ommitted the destination mip level in its output Comparison between uint8 and uint32 AutoBuffer warning removal Refactor Pal::Queue::Destroy() to account for pre-destroy submissions Optimize barrier with layout blt transition StringTableTraceSource private to protected Remove engineType from ReleaseMemGeneric meshDispatchDimsReg in Mesh Shader in Gfx11 Add all of the parameters of CmdFillMemory to the logger output Remove unused gfxiplevel parameters in CalcScratchMemSize functions CopyMemToImg8x is broken Add support for batch RenderOp submission Unify WaitIdle timeouts Convert an assert to static_assert and fix the assert Add nullptr check for pQueueSemaphore to avoid unexpected crash Add GpaSession flags for TTRACE_EXEC Double destroy cause possible segfault when using RDP Convert a few CmdBarrier() calls to CmdReleaseThenAcquire() Initial userEntries should be marked as 'not mapped' instead of '0' Allow null layout info in ImgBarrier GpuProfiler tweaks and logging Update submodule SwWarDetection Change GpaSampleConfig::timing::preSample/postSample from HwPipePoint to PipelineStageFlag
1 parent eca6b99 commit 31f6a70

File tree

107 files changed

+47891
-46813
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+47891
-46813
lines changed

doc/process/palCodingStandards.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,6 @@ General
166166
* In VS Code you could also enable trailing whitespace trimming by pressing Ctrl + Shift + P and then searching for Trim Trailing Whitespace.
167167
* For Visual Studio check out Trailing Whitespace Visualizer extension (found on the Visual Studio marketplace).
168168
169-
- AI generated code **must** not be added to the PAL code base.
170-
171169
General Language Restrictions
172170
-----------------------------
173171

inc/core/palCmdBuffer.h

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -374,13 +374,12 @@ enum CacheCoherencyUsageFlags : uint32
374374
CoherMemory = 0x00020000, ///< Data read or written directly from/to memory
375375
CoherSampleRate = 0x00040000, ///< CmdBindSampleRateImage() source.
376376
CoherPresent = 0x00080000, ///< Source of present.
377-
CoherCp = CoherTimestamp, ///< HW Command Processor (CP) encompassing the front - end command
378-
/// processing of any queue, including SDMA.
377+
CoherCp = 0x00200000, ///< HW Command Processor (CP) encompassing the front - end command
378+
CoherAllUsages = 0x003FFFFF, ///< processing of any queue, including SDMA.
379+
379380
CoherShader = CoherShaderRead | CoherShaderWrite,
380381
CoherCopy = CoherCopySrc | CoherCopyDst,
381382
CoherResolve = CoherResolveSrc | CoherResolveDst,
382-
383-
CoherAllUsages = 0x000FFFFF,
384383
};
385384

386385
/// Bitmask values for the flags parameter of ICmdBuffer::CmdClearColorImage().
@@ -1077,13 +1076,15 @@ struct ImgBarrier
10771076
/// engines up to this point. These masks imply the previous compression state. No
10781077
/// usage flags should ever be set in oldLayout.usages that correspond to usages
10791078
/// that are not supported by the engine that is performing the transition. The
1080-
/// engine type performing the transition must be set in oldLayout.engines.
1079+
/// engine type performing the transition must be set in oldLayout.engines. Can set
1080+
/// both oldLayout and newLayout to zero value for no layout transition case.
10811081
ImageLayout newLayout; ///< Specifies the upcoming image layout based on bitmasks of allowed operations and
10821082
/// engines after this point. These masks imply the upcoming compression state.
10831083
/// point. A difference between oldLayoutUsageMask and newLayoutUsageMask may result
10841084
/// in a decompression. PAL's implementation will ensure the results of any layout
10851085
/// operations are consistent with the requested availability and visibility
1086-
/// operations.
1086+
/// operations. Can set both oldLayout and newLayout to zero value for no layout
1087+
/// transition case.
10871088

10881089
/// Specifies a custom sample pattern over a 2x2 pixel quad. The position for each sample is specified on a grid
10891090
/// where the pixel center is <0,0>, the top left corner of the pixel is <-8,-8>, and <7,7> is the maximum valid
@@ -2865,9 +2866,6 @@ class ICmdBuffer : public IDestroyable
28652866
/// CmdAcquire() call is expected to wait on one or a list of such synchronization tokens and perform any necessary
28662867
/// visibility operations and/or layout transitions that could not be predicted at release-time.
28672868
///
2868-
/// @note Not all hardware can support the acquire/release mechanism with good performance. This call is only
2869-
/// valid if supportReleaseAcquireInterface is set in the GFXIP properties section of @ref DeviceProperties.
2870-
///
28712869
/// @param [in] releaseInfo Describes the synchronization scope, availability operations, and required layout
28722870
/// transitions.
28732871
/// @returns Synchronization token for the release operation. Pass this token to CmdAcquire to confirm completion.
@@ -2881,9 +2879,6 @@ class ICmdBuffer : public IDestroyable
28812879
/// Performs the acquire portion of an acquire/release-based barrier. This acquire a set of resources for a new
28822880
/// set of usages, assuming CmdRelease() was called to release access for the resource's past usage.
28832881
///
2884-
/// @note Not all hardware can support the acquire/release mechanism with good performance. This call is only
2885-
/// valid if supportReleaseAcquireInterface is set in the GFXIP properties section of @ref DeviceProperties.
2886-
///
28872882
/// Conceptually, this method will:
28882883
/// - Ensure all specified resources are visible in memory. The visibility operation will invalidate all
28892884
/// relevant caches above the last-level-cache.
@@ -2919,9 +2914,6 @@ class ICmdBuffer : public IDestroyable
29192914
/// CmdAcquireEvent() call is expected to wait on this event and perform any necessary visibility operations and/or
29202915
/// layout transitions that could not be predicted at release-time.
29212916
///
2922-
/// @note Not all hardware can support the acquire/release mechanism with good performance. This call is only
2923-
/// valid if supportReleaseAcquireInterface is set in the GFXIP properties section of @ref DeviceProperties.
2924-
///
29252917
/// @param [in] releaseInfo Describes the synchronization scope, availability operations, and required layout
29262918
/// transitions.
29272919
/// @param [in] pGpuEvent Event to be signaled once the release has completed. Must be a valid (non-null) GPU
@@ -2941,9 +2933,6 @@ class ICmdBuffer : public IDestroyable
29412933
/// relevant caches above the last-level-cache.
29422934
/// - Perform any requested layout transitions.
29432935
///
2944-
/// @note Not all hardware can support the acquire/release mechanism with good performance. This call is only
2945-
/// valid if supportReleaseAcquireInterface is set in the GFXIP properties section of @ref DeviceProperties.
2946-
///
29472936
/// @param [in] acquireInfo Describes the synchronization scope, visibility operations, and the required layout
29482937
/// layout transitions.
29492938
/// @param [in] gpuEventCount Number of entries in pGpuEvents.
@@ -2963,9 +2952,6 @@ class ICmdBuffer : public IDestroyable
29632952
///
29642953
/// Effectively equivalent to @ref ICmdBuffer::CmdBarrier.
29652954
///
2966-
/// @note Not all hardware can support the acquire/release mechanism with good performance. This call is only
2967-
/// valid if supportReleaseAcquireInterface is set in the GFXIP properties section of @ref DeviceProperties.
2968-
///
29692955
/// @param [in] barrierInfo Describes the synchronization scopes, availability/visibility operations, and the
29702956
/// required layout transitions.
29712957
virtual void CmdReleaseThenAcquire(
@@ -3370,6 +3356,11 @@ class ICmdBuffer : public IDestroyable
33703356
/// The source and destination images must to be of the same type (1D, 2D or 3D), or optionally 2D and 3D with the
33713357
/// number of slices matching the depth. MSAA source and destination images must have the same number of samples.
33723358
///
3359+
/// Each region must satisfy these restrictions.
3360+
/// - srcOffset >= 0 and dstOffset >= 0
3361+
/// - srcOffset + extent <= srcSubres's extent
3362+
/// - dstOffset + extent <= dstSubres's extent
3363+
///
33733364
/// Images copied via this function must have x/y/z offsets and width/height/depth extents aligned to the minimum
33743365
/// tiled copy alignment specified in @ref DeviceProperties for the engine this function is executed on. Note that
33753366
/// the DMA engine supports tiled copies regardless of the alignment; the reported minimum tiled copy alignments
@@ -3416,6 +3407,8 @@ class ICmdBuffer : public IDestroyable
34163407
/// The source memory offset has to be aligned to the smaller of the copied texel size or 4 bytes. A destination
34173408
/// subresource cannot be present more than once per CmdCopyMemoryToImage() call.
34183409
///
3410+
/// Each region's imageOffset must be >= 0 and imageOffset + imageExtent must be <= imageSubres's extent.
3411+
///
34193412
/// This function requires use of the following barrier flags:
34203413
/// - PipelineStage: @ref PipelineStageBlt
34213414
/// - CacheCoherency: @ref CoherCopySrc for the source and @ref CoherCopyDst for the destination.
@@ -3445,6 +3438,8 @@ class ICmdBuffer : public IDestroyable
34453438
/// The destination memory offset has to be aligned to the smaller of the copied texel size or 4 bytes. A
34463439
/// destination region cannot be present more than once per CmdCopyImageToMemory() call.
34473440
///
3441+
/// Each region's imageOffset must be >= 0 and imageOffset + imageExtent must be <= imageSubres's extent.
3442+
///
34483443
/// This function requires use of the following barrier flags:
34493444
/// - PipelineStage: @ref PipelineStageBlt
34503445
/// - CacheCoherency: @ref CoherCopySrc for the source and @ref CoherCopyDst for the destination.
@@ -3478,6 +3473,8 @@ class ICmdBuffer : public IDestroyable
34783473
/// The source memory offset has to be aligned to the smaller of the copied texel size or 4 bytes. A destination
34793474
/// subresource cannot be present more than once per CmdCopyMemoryToTiledImage() call.
34803475
///
3476+
/// Each region's imageOffset must be >= 0 and imageOffset + imageExtent must be <= imageSubres's extent.
3477+
///
34813478
/// This function requires use of the following barrier flags:
34823479
/// - PipelineStage: @ref PipelineStageBlt
34833480
/// - CacheCoherency: @ref CoherCopySrc for the source and @ref CoherCopyDst for the destination.
@@ -3511,6 +3508,8 @@ class ICmdBuffer : public IDestroyable
35113508
/// The destination memory offset has to be aligned to the smaller of the copied texel size or 4 bytes. A
35123509
/// destination region cannot be present more than once per CmdCopyTiledImageToMemory() call.
35133510
///
3511+
/// Each region's imageOffset must be >= 0 and imageOffset + imageExtent must be <= imageSubres's extent.
3512+
///
35143513
/// This function requires use of the following barrier flags:
35153514
/// - PipelineStage: @ref PipelineStageBlt
35163515
/// - CacheCoherency: @ref CoherCopySrc for the source and @ref CoherCopyDst for the destination.
@@ -4033,6 +4032,7 @@ class ICmdBuffer : public IDestroyable
40334032
const IGpuEvent& gpuEvent,
40344033
uint32 stageMask) = 0;
40354034

4035+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 900
40364036
/// Puts the specified GPU event into the _set_ state when all previous GPU work reaches the specified point in the
40374037
/// pipeline.
40384038
///
@@ -4062,6 +4062,7 @@ class ICmdBuffer : public IDestroyable
40624062
const IGpuEvent& gpuEvent,
40634063
HwPipePoint resetPoint)
40644064
{ CmdResetEvent(gpuEvent, HwPipePointToStage[resetPoint]); }
4065+
#endif
40654066

40664067
/// Predicate the subsequent jobs in the command buffer if the event is set.
40674068
///
@@ -4202,6 +4203,7 @@ class ICmdBuffer : public IDestroyable
42024203
ImmediateDataWidth dataSize,
42034204
gpusize address) = 0;
42044205

4206+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 900
42054207
/// Writes a HwPipePostPrefetch or HwPipeBottom timestamp to the specified memory location.
42064208
///
42074209
/// The timestamp data is a 64-bit value that increments once per clock. timestampFrequency in DeviceProperties
@@ -4249,6 +4251,7 @@ class ICmdBuffer : public IDestroyable
42494251
ImmediateDataWidth dataSize,
42504252
gpusize address)
42514253
{ CmdWriteImmediate(HwPipePointToStage[pipePoint], data, dataSize, address); }
4254+
#endif
42524255

42534256
/// Loads the current stream-out buffer-filled-sizes stored on the GPU from memory, typically from a target of a
42544257
/// prior CmdSaveBufferFilledSizes() call.
@@ -4911,6 +4914,7 @@ class ICmdBuffer : public IDestroyable
49114914
/// For non-top-layer objects, this will point to the layer above the current object.
49124915
void* m_pClientData;
49134916

4917+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 900
49144918
/// @internal Some back-compat glue for some of the HwPipePoint interfaces in this file.
49154919
static constexpr uint32 HwPipePointToStage[] =
49164920
{
@@ -4928,6 +4932,7 @@ class ICmdBuffer : public IDestroyable
49284932
PipelineStageBlt, // HwPipePostBlt = 0x6
49294933
PipelineStageBottomOfPipe, // HwPipeBottom = 0x7
49304934
};
4935+
#endif
49314936
};
49324937

49334938
} // Pal

inc/core/palDevice.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,9 @@ struct DeviceProperties
11191119
uint32 reserved744 : 1;
11201120
/// Set if the queue supports additional split barrier feature on top of basic acquire/release
11211121
/// interface support. This provides CmdAcquire() and CmdRelease() to implement split barriers.
1122+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 893
11221123
/// Note: supportReleaseAcquireInterface is a prerequisite to supportSplitReleaseAcquire.
1124+
#endif
11231125
uint32 supportSplitReleaseAcquire : 1;
11241126

11251127
/// Reserved for future use.
@@ -1366,11 +1368,15 @@ struct DeviceProperties
13661368
/// timestamps will increase monotonically across
13671369
/// command buffer submissions.
13681370
uint64 support1xMsaaSampleLocations : 1; ///< HW supports 1xMSAA custom quad sample patterns
1371+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 893
13691372
uint64 supportReleaseAcquireInterface : 1; ///< Set if HW supports the basic functionalities of
13701373
/// acquire/release-based barrier interface. This
13711374
/// provides CmdReleaseThenAcquire() as a convenient
13721375
/// way to replace the legacy barrier interface's
13731376
/// CmdBarrier() to handle single point barriers.
1377+
#else
1378+
uint64 placeholder4 : 1; ///< Placeholder for backward compatibility, no use it.
1379+
#endif
13741380
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 883
13751381
uint64 supportSplitReleaseAcquire : 1; ///< Set if HW supports additional split barrier feature
13761382
/// on top of basic acquire/release interface support.
@@ -1408,6 +1414,8 @@ struct DeviceProperties
14081414
uint64 supportTextureGatherBiasLod : 1; ///< HW supports SQ_IMAGE_GATHER4_L_O
14091415
uint64 supportInt8Dot : 1; ///< Hardware supports a dot product 8bit.
14101416
uint64 supportInt4Dot : 1; ///< Hardware supports a dot product 4bit.
1417+
uint64 supportMixedSignIntDot : 1; ///< Hardware supports a integer dot product with mixed
1418+
/// sign inputs.
14111419
uint64 support2DRectList : 1; ///< HW supports PrimitiveTopology::TwoDRectList.
14121420
uint64 supportHsaAbi : 1; ///< PAL supports HSA ABI compute pipelines.
14131421
uint64 supportImageViewMinLod : 1; ///< Indicates image srd supports min_lod.
@@ -1416,12 +1424,13 @@ struct DeviceProperties
14161424
/// with zRange specified.
14171425
uint64 supportCooperativeMatrix : 1; ///< HW supports cooperative matrix
14181426
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 808
1419-
uint64 support1dDispatchInterleave : 1; // Indicates support for 1D Dispatch Interleave.
1427+
uint64 support1dDispatchInterleave : 1; ///< Indicates support for 1D Dispatch Interleave.
14201428
uint64 placeholder12 : 1;
14211429
#endif
1422-
uint64 reserved : 2; ///< Reserved for future use.
1430+
uint64 supportBFloat16 : 1; ///< HW supports bf16 instructions.
1431+
uint64 reserved : 64; ///< Reserved for future use.
14231432
};
1424-
uint64 u64All; ///< Flags packed as 32-bit uint.
1433+
uint64 u64All[2]; ///< Flags packed as 32-bit uint.
14251434
} flags; ///< Device IP property flags.
14261435

14271436
struct

inc/core/palGpuMemoryBindable.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class IGpuMemoryBindable : public IDestroyable
9292
///
9393
/// Binding memory to objects other than images automatically initializes the object memory as necessary. Image
9494
/// objects used as color or depth-stencil targets have to be explicitly initialized in command buffers using a
95-
/// ICmdBuffer::CmdBarrier() command to transition them out of the LayoutUninitializedTarget usage.
95+
/// ICmdBuffer::CmdReleaseThenAcquire() command to transition them out of the LayoutUninitializedTarget usage.
9696
///
9797
/// Binding memory to an object automatically unbinds any previously bound memory. There is no need to bind null to
9898
/// an object to explicitly unbind a previously bound allocation before binding a new allocation.

inc/core/palImage.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,13 @@ struct ExternalImageOpenInfo
463463
uint64 modifier; ///< Drm format modifier, if flags.hasModifier is set.
464464
uint32 modifierPlaneCount; ///< Number of memory planes of drm format modifier.
465465
#endif
466+
/// The following members must be set to zero unless the client is opening a @ref ImageTiling::Linear image with
467+
/// specified row and depth pitches. In that case, they must be integer multiples of the alignments given by
468+
/// @ref IDevice::GetLinearImageAlignments, called with an appropriate maxElementSize.
469+
gpusize rowPitch; ///< Offset in bytes between the same X position on two consecutive lines
470+
/// of the subresource.
471+
gpusize depthPitch; ///< Offset in bytes between the same X,Y position of two consecutive
472+
/// slices.
466473
};
467474

468475
/// Reports the overall GPU memory layout of the entire image. Output structure for IImage::GetMemoryLayout(). Unused

inc/core/palLib.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
#endif
4848
///
4949
/// @ingroup LibInit
50-
#define PAL_INTERFACE_MAJOR_VERSION 892
50+
#define PAL_INTERFACE_MAJOR_VERSION 900
5151

5252
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 831
5353
/// Minor interface version. Note that the interface version is distinct from the PAL version itself, which is returned

inc/core/palPerfExperiment.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,12 @@ struct ThreadTraceInfo
328328
uint32 threadTraceTokenConfig : 1;
329329
uint32 placeholder1 : 1;
330330
uint32 threadTraceExcludeNonDetailShaderData : 1;
331-
uint32 reserved : 17;
331+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 899
332+
uint32 threadTraceEnableExecPop : 1;
333+
#else
334+
uint32 placeholder2 : 1;
335+
#endif
336+
uint32 reserved : 16;
332337
};
333338
uint32 u32All;
334339
} optionFlags;
@@ -352,6 +357,9 @@ struct ThreadTraceInfo
352357
bool threadTraceWrapBuffer;
353358
uint32 threadTraceStallBehavior;
354359
bool threadTraceExcludeNonDetailShaderData;
360+
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 899
361+
bool threadTraceEnableExecPop;
362+
#endif
355363
} optionValues;
356364
};
357365

inc/core/palPipelineAbi.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
#pragma once
3333

34+
#include "palInlineFuncs.h"
3435
#include "palUtil.h"
3536
#include "palElf.h"
3637
#include <cstring>
@@ -273,6 +274,22 @@ enum class HardwareStage : uint32
273274
Count
274275
};
275276

277+
/// HardwareStage enum to string conversion table.
278+
constexpr const char* HardwareStageStrings[] =
279+
{
280+
"LS",
281+
"HS",
282+
"ES",
283+
"GS",
284+
"VS",
285+
"PS",
286+
"CS",
287+
"INVALID",
288+
};
289+
290+
static_assert(Util::ArrayLen32(HardwareStageStrings) == static_cast<uint32>(HardwareStage::Count) + 1,
291+
"HardwareStageStrings is not the same size as HardwareStage enum!");
292+
276293
/// Helper enum which is used along with the @ref GetMetadataHashForApiShader function to easily find
277294
/// a metadata hash dword for a particular API shader type.
278295
enum class ApiShaderType : uint32

0 commit comments

Comments
 (0)