Skip to content

Commit cfc5204

Browse files
committed
Don't require dxc for compiling RT patching shaders
* We do some manual uint64 emulation with uint2 which compiles on fxc, re-used from the execute indirect patching.
1 parent c26c4b1 commit cfc5204

File tree

6 files changed

+206
-72
lines changed

6 files changed

+206
-72
lines changed

renderdoc/data/hlsl/hlsl_cbuffers.h

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#define float2 Vec2f
3535
#define float3 Vec3f
3636
#define uint4 Vec4u
37+
#define uint2 Vec2u
3738
#define int4 Vec4i
3839
#define float4 Vec4f
3940
#define float4x4 Matrix4f
@@ -195,8 +196,76 @@ cbuffer AccStructPatchInfo REG(b0)
195196
uint addressCount;
196197
};
197198

198-
#if defined(SHADER_MODEL_MIN_6_0_REQUIRED) || defined(__cplusplus)
199+
// INCLUDE_GPUADDRESS_HELPERS should only be set for unit tests to check these functions below,
200+
// otherwise it pollutes the interface
201+
202+
#if defined(__cplusplus) && !defined(INCLUDE_GPUADDRESS_HELPERS)
203+
// on the GPU this will be uint2 {.x = LSB, .y = MSB} to match uint64 order
199204
typedef uint64_t GPUAddress;
205+
#else
206+
typedef uint2 GPUAddress;
207+
#endif
208+
209+
// don't define the helpers in C++ by default, unless we're using them for unit tests
210+
#if !defined(__cplusplus) || defined(INCLUDE_GPUADDRESS_HELPERS)
211+
212+
#if defined(__cplusplus)
213+
#define max RDCMAX
214+
#define min RDCMIN
215+
#endif
216+
217+
bool lessThan(GPUAddress a, GPUAddress b)
218+
{
219+
// either MSB is less, or MSB is equal and LSB is less-equal
220+
return a.y < b.y || (a.y == b.y && a.x < b.x);
221+
}
222+
223+
bool lessEqual(GPUAddress a, GPUAddress b)
224+
{
225+
return lessThan(a, b) || (a.y == b.y && a.x == b.x);
226+
}
227+
228+
GPUAddress add(GPUAddress a, GPUAddress b)
229+
{
230+
uint msb = 0, lsb = 0;
231+
if(b.x > 0 && a.x > 0xffffffff - b.x)
232+
{
233+
uint x = max(a.x, b.x) - 0x80000000;
234+
uint y = min(a.x, b.x);
235+
236+
uint sum = x + y;
237+
238+
msb = a.y + b.y + 1;
239+
lsb = sum - 0x80000000;
240+
}
241+
else
242+
{
243+
msb = a.y + b.y;
244+
lsb = a.x + b.x;
245+
}
246+
247+
return GPUAddress(lsb, msb);
248+
}
249+
250+
GPUAddress sub(GPUAddress a, GPUAddress b)
251+
{
252+
uint msb = 0, lsb = 0;
253+
if(a.x < b.x)
254+
{
255+
uint diff = b.x - a.x;
256+
257+
msb = a.y - b.y - 1;
258+
lsb = 0xffffffff - (diff - 1);
259+
}
260+
else
261+
{
262+
msb = a.y - b.y;
263+
lsb = a.x - b.x;
264+
}
265+
266+
return GPUAddress(lsb, msb);
267+
}
268+
#endif
200269

201270
struct BlasAddressRange
202271
{
@@ -213,10 +282,9 @@ struct BlasAddressPair
213282
// This corresponds to D3D12_RAYTRACING_INSTANCE_DESC structure
214283
struct InstanceDesc
215284
{
216-
uint64_t padding[7];
285+
uint2 padding[7];
217286
GPUAddress blasAddress;
218287
};
219-
#endif
220288

221289
cbuffer DebugSampleOperation REG(b0)
222290
{

renderdoc/data/hlsl/misc.hlsl

Lines changed: 7 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -154,77 +154,24 @@ cbuffer countbuffer : register(b2)
154154

155155
struct buffermapping
156156
{
157-
// {.x = LSB, .y = MSB} to match uint64 order
158-
uint2 origBase;
159-
uint2 origEnd;
160-
uint2 newBase;
161-
uint2 pad;
157+
GPUAddress origBase;
158+
GPUAddress origEnd;
159+
GPUAddress newBase;
160+
GPUAddress pad;
162161
};
163162

164163
StructuredBuffer<buffermapping> buffers : register(t0);
165164
RWByteAddressBuffer arguments : register(u0);
166165

167-
bool uint64LessThan(uint2 a, uint2 b)
168-
{
169-
// either MSB is less, or MSB is equal and LSB is less-equal
170-
return a.y < b.y || (a.y == b.y && a.x < b.x);
171-
}
172-
173-
bool uint64LessEqual(uint2 a, uint2 b)
174-
{
175-
return uint64LessThan(a, b) || (a.y == b.y && a.x == b.x);
176-
}
177-
178-
uint2 uint64Add(uint2 a, uint2 b)
179-
{
180-
uint msb = 0, lsb = 0;
181-
if(b.x > 0 && a.x > 0xffffffff - b.x)
182-
{
183-
uint x = max(a.x, b.x) - 0x80000000;
184-
uint y = min(a.x, b.x);
185-
186-
uint sum = x + y;
187-
188-
msb = a.y + b.y + 1;
189-
lsb = sum - 0x80000000;
190-
}
191-
else
192-
{
193-
msb = a.y + b.y;
194-
lsb = a.x + b.x;
195-
}
196-
197-
return uint2(lsb, msb);
198-
}
199-
200-
uint2 uint64Sub(uint2 a, uint2 b)
201-
{
202-
uint msb = 0, lsb = 0;
203-
if(a.x < b.x)
204-
{
205-
uint diff = b.x - a.x;
206-
207-
msb = a.y - b.y - 1;
208-
lsb = 0xffffffff - (diff - 1);
209-
}
210-
else
211-
{
212-
msb = a.y - b.y;
213-
lsb = a.x - b.x;
214-
}
215-
216-
return uint2(lsb, msb);
217-
}
218-
219-
uint2 PatchAddress(uint2 addr)
166+
GPUAddress PatchAddress(GPUAddress addr)
220167
{
221168
for(uint i = 0; i < bufCount; i++)
222169
{
223170
buffermapping b = buffers[i];
224171

225-
if(uint64LessEqual(b.origBase, addr) && uint64LessThan(addr, b.origEnd))
172+
if(lessEqual(b.origBase, addr) && lessThan(addr, b.origEnd))
226173
{
227-
return uint64Add(b.newBase, uint64Sub(addr, b.origBase));
174+
return add(b.newBase, sub(addr, b.origBase));
228175
}
229176
}
230177

renderdoc/data/hlsl/raytracing.hlsl

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,14 @@
2222
* THE SOFTWARE.
2323
******************************************************************************/
2424

25-
#ifndef SHADER_MODEL_MIN_6_0_REQUIRED
26-
#define SHADER_MODEL_MIN_6_0_REQUIRED
27-
#endif
2825
#include "hlsl_cbuffers.h"
2926

30-
RWStructuredBuffer<InstanceDesc> instanceDescs : register(u0, space0);
31-
StructuredBuffer<BlasAddressPair> oldNewAddressesPair : register(t0, space0);
27+
RWStructuredBuffer<InstanceDesc> instanceDescs : register(u0);
28+
StructuredBuffer<BlasAddressPair> oldNewAddressesPair : register(t0);
3229

3330
bool InRange(BlasAddressRange addressRange, GPUAddress address)
3431
{
35-
if(addressRange.start <= address && address <= addressRange.end)
32+
if(lessEqual(addressRange.start, address) && lessThan(address, addressRange.end))
3633
{
3734
return true;
3835
}
@@ -49,8 +46,9 @@ bool InRange(BlasAddressRange addressRange, GPUAddress address)
4946
{
5047
if(InRange(oldNewAddressesPair[i].oldAddress, instanceBlasAddress))
5148
{
52-
uint64_t offset = instanceBlasAddress - oldNewAddressesPair[i].oldAddress.start;
53-
instanceDescs[dispatchGroup.x].blasAddress = oldNewAddressesPair[i].newAddress.start + offset;
49+
GPUAddress offset = sub(instanceBlasAddress, oldNewAddressesPair[i].oldAddress.start);
50+
instanceDescs[dispatchGroup.x].blasAddress =
51+
add(oldNewAddressesPair[i].newAddress.start, offset);
5452
return;
5553
}
5654
}

renderdoc/driver/d3d12/d3d12_common.cpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,3 +1814,107 @@ D3D12_PACKED_PIPELINE_STATE_STREAM_DESC &D3D12_PACKED_PIPELINE_STATE_STREAM_DESC
18141814

18151815
return *this;
18161816
}
1817+
1818+
#if ENABLED(ENABLE_UNIT_TESTS)
1819+
#include "catch/catch.hpp"
1820+
1821+
#define INCLUDE_GPUADDRESS_HELPERS
1822+
1823+
#include "data/hlsl/hlsl_cbuffers.h"
1824+
1825+
GPUAddress toaddr(uint64_t addr)
1826+
{
1827+
GPUAddress ret;
1828+
RDCCOMPILE_ASSERT(sizeof(ret) == sizeof(addr), "GPU address isn't 64-bit");
1829+
memcpy(&ret, &addr, sizeof(ret));
1830+
return ret;
1831+
}
1832+
1833+
uint64_t fromaddr(GPUAddress addr)
1834+
{
1835+
uint64_t ret;
1836+
RDCCOMPILE_ASSERT(sizeof(ret) == sizeof(addr), "GPU address isn't 64-bit");
1837+
memcpy(&ret, &addr, sizeof(ret));
1838+
return ret;
1839+
}
1840+
1841+
TEST_CASE("HLSL uint64 helpers", "[d3d]")
1842+
{
1843+
rdcarray<uint64_t> testValues = {
1844+
0,
1845+
1,
1846+
2,
1847+
3,
1848+
4,
1849+
5,
1850+
6,
1851+
7,
1852+
8,
1853+
9,
1854+
10,
1855+
11,
1856+
100,
1857+
128,
1858+
1000,
1859+
1860+
0xfffffffa,
1861+
0xfffffffb,
1862+
0xfffffffc,
1863+
0xfffffffd,
1864+
0xfffffffe,
1865+
0xffffffff,
1866+
1867+
0x100000000ULL,
1868+
0x100000001ULL,
1869+
0x100000002ULL,
1870+
0x100000003ULL,
1871+
0x100000004ULL,
1872+
0x100000005ULL,
1873+
0x100000006ULL,
1874+
1875+
0x1000000000001000ULL,
1876+
0x100000000fffffffULL,
1877+
0x1000000010000000ULL,
1878+
0x1000000010000001ULL,
1879+
0x1000000010000002ULL,
1880+
0x1000000010000002ULL,
1881+
1882+
0x4000000000001000ULL,
1883+
0x400000000fffffffULL,
1884+
0x4000000010000000ULL,
1885+
0x4000000010000001ULL,
1886+
0x4000000010000002ULL,
1887+
0x4000000010000002ULL,
1888+
// don't test anything that could overflow if summed together for simplicity
1889+
};
1890+
1891+
for(uint64_t first : testValues)
1892+
{
1893+
for(uint64_t second : testValues)
1894+
{
1895+
GPUAddress a, b;
1896+
a = toaddr(first);
1897+
b = toaddr(second);
1898+
1899+
// sanity check
1900+
CHECK(fromaddr(a) == first);
1901+
CHECK(fromaddr(b) == second);
1902+
1903+
CHECK(lessThan(a, b) == (first < second));
1904+
CHECK(lessEqual(a, b) == (first <= second));
1905+
1906+
CHECK(lessThan(b, a) == (second < first));
1907+
CHECK(lessEqual(b, a) == (second <= first));
1908+
1909+
CHECK(fromaddr(add(a, b)) == (first + second));
1910+
CHECK(fromaddr(add(b, a)) == (first + second));
1911+
1912+
if(first >= second)
1913+
CHECK(fromaddr(sub(a, b)) == (first - second));
1914+
else
1915+
CHECK(fromaddr(sub(b, a)) == (second - first));
1916+
}
1917+
}
1918+
};
1919+
1920+
#endif // ENABLED(ENABLE_UNIT_TESTS)

renderdoc/driver/d3d12/d3d12_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ void D3D12RaytracingResourceAndUtilHandler::InitReplayBlasPatchingResources()
840840
ID3DBlob *shader = NULL;
841841
rdcstr hlsl = GetEmbeddedResource(raytracing_hlsl);
842842
shaderCache->GetShaderBlob(hlsl.c_str(), "RENDERDOC_PatchAccStructAddressCS",
843-
D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "cs_6_0", &shader);
843+
D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "cs_5_0", &shader);
844844

845845
if(shader)
846846
{

renderdoc/maths/vec.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,23 @@ inline Vec4f operator+=(Vec4f &a, const Vec4f &b)
167167
return a;
168168
}
169169

170+
struct Vec2u
171+
{
172+
Vec2u(uint32_t X = 0, uint32_t Y = 0)
173+
{
174+
x = X;
175+
y = Y;
176+
}
177+
union
178+
{
179+
struct
180+
{
181+
uint32_t x, y;
182+
};
183+
uint32_t uv[2];
184+
};
185+
};
186+
170187
struct Vec4u
171188
{
172189
Vec4u(uint32_t X = 0, uint32_t Y = 0, uint32_t Z = 0, uint32_t W = 0)

0 commit comments

Comments
 (0)