|
581 | 581 | ]]>
|
582 | 582 | </Shader>
|
583 | 583 | </ShaderOp>
|
| 584 | + <ShaderOp Name="ProgOffset" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE"> |
| 585 | + <RootSignature> |
| 586 | + RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), |
| 587 | + DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)), |
| 588 | + StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_POINT), |
| 589 | + StaticSampler(s1, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_COMPARISON_MIN_MAG_MIP_POINT) |
| 590 | + </RootSignature> |
| 591 | + <Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST"> |
| 592 | + { { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } }, |
| 593 | + { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, |
| 594 | + { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, |
| 595 | + |
| 596 | + { { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } }, |
| 597 | + { { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } }, |
| 598 | + { { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } } |
| 599 | + </Resource> |
| 600 | + <Resource Name="T0" Dimension="Texture2D" Width="1000" Height="1000" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_FLOAT" /> |
| 601 | + <Resource Name="RTarget" Dimension="TEXTURE2D" Width="18" Height="18" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" /> |
| 602 | + <Resource Name="U0" Dimension="BUFFER" Width="11552" |
| 603 | + Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" |
| 604 | + Init="Zero" ReadBack="true" /> |
| 605 | + <Resource Name="U1" Dimension="BUFFER" Width="11552" |
| 606 | + Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" |
| 607 | + Init="Zero" ReadBack="true" /> |
| 608 | + <Resource Name="U2" Dimension="BUFFER" Width="11552" |
| 609 | + Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST" |
| 610 | + Init="Zero" ReadBack="true" /> |
| 611 | + |
| 612 | + <RootValues> |
| 613 | + <RootValue HeapName="ResHeap" /> |
| 614 | + </RootValues> |
| 615 | + <DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV"> |
| 616 | + <Descriptor Name='T0' Kind='SRV' ResName='T0' /> |
| 617 | + <Descriptor Name='U0' Kind='UAV' ResName='U0' |
| 618 | + NumElements="722" StructureByteStride="16" /> |
| 619 | + <Descriptor Name='U1' Kind='UAV' ResName='U1' |
| 620 | + NumElements="722" StructureByteStride="16" /> |
| 621 | + <Descriptor Name='U2' Kind='UAV' ResName='U2' |
| 622 | + NumElements="722" StructureByteStride="16" /> |
| 623 | + </DescriptorHeap> |
| 624 | + <DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV"> |
| 625 | + <Descriptor Name="RTarget" Kind="RTV"/> |
| 626 | + </DescriptorHeap> |
| 627 | + |
| 628 | + <InputElements> |
| 629 | + <InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" /> |
| 630 | + <InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" /> |
| 631 | + </InputElements> |
| 632 | + <RenderTargets> |
| 633 | + <RenderTarget Name="RTarget"/> |
| 634 | + </RenderTargets> |
| 635 | + <Shader Name="CS" Target="cs_6_5" EntryPoint="CSMain" Text="@PS"/> |
| 636 | + <Shader Name="CS66" Target="cs_6_6" EntryPoint="CSMain" Text="@PS"/> |
| 637 | + <Shader Name="CS67" Target="cs_6_7" EntryPoint="CSMain" Text="@PS"/> |
| 638 | + <Shader Name="VS" Target="vs_6_5" EntryPoint="VSMain" Text="@PS"/> |
| 639 | + <Shader Name="MS" Target="ms_6_5" EntryPoint="MSMain" Text="@PS"/> |
| 640 | + <Shader Name="MS66" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/> |
| 641 | + <Shader Name="MS67" Target="ms_6_7" EntryPoint="MSMain" Text="@PS"/> |
| 642 | + <Shader Name="MS66D" Target="ms_6_6" EntryPoint="MSMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/> |
| 643 | + <Shader Name="MS67D" Target="ms_6_7" EntryPoint="MSMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/> |
| 644 | + <Shader Name="AS" Target="as_6_5" EntryPoint="ASMain" Text="@PS"/> |
| 645 | + <Shader Name="AS66" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/> |
| 646 | + <Shader Name="AS67" Target="as_6_7" EntryPoint="ASMain" Text="@PS"/> |
| 647 | + <Shader Name="AS66D" Target="as_6_6" EntryPoint="ASMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/> |
| 648 | + <Shader Name="AS67D" Target="as_6_7" EntryPoint="ASMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/> |
| 649 | + <Shader Name="PS67" Target="ps_6_7" EntryPoint="PSMain" Text="@PS"/> |
| 650 | + <Shader Name="PS" Target="ps_6_5" EntryPoint="PSMain"> |
| 651 | + <![CDATA[ |
| 652 | + #define OFFSETS 18 |
| 653 | + #ifndef DERIV_MESH_AMP |
| 654 | + #define DERIV_MESH_AMP false |
| 655 | + #endif |
| 656 | + struct PSInput { |
| 657 | + float4 position : SV_POSITION; |
| 658 | + float2 uv : TEXCOORD; |
| 659 | + }; |
| 660 | +
|
| 661 | + Texture2D<float> g_tex : register(t0); |
| 662 | + RWStructuredBuffer<uint4> g_bufMain : register(u0); |
| 663 | + RWStructuredBuffer<uint4> g_bufMesh : register(u1); |
| 664 | + RWStructuredBuffer<uint4> g_bufAmp : register(u2); |
| 665 | +
|
| 666 | + SamplerState g_samp : register(s0); |
| 667 | + SamplerComparisonState g_sampCmp : register(s1); |
| 668 | +
|
| 669 | + // It's not exactly a mask because of sign extension, but the concept |
| 670 | + // is that it's treated like a 4-bit signed integer using the lowest 4 bits |
| 671 | + int2 Mask4Offset(int2 offset) { |
| 672 | + offset.x = (offset.x << 28) >> 28; |
| 673 | + offset.y = (offset.y << 28) >> 28; |
| 674 | + return offset; |
| 675 | + } |
| 676 | +
|
| 677 | + // Tests Sample and SampleCmp variants with programmed offsets |
| 678 | + uint4 DoTests( int2 coord, int2 offset, bool bDeriv ) { |
| 679 | +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7) |
| 680 | + // offset for purposes of the compare value, which should only acknowledge the lower 4 bits |
| 681 | + int2 moffset = Mask4Offset(offset); |
| 682 | + // The index that should correspond to this location is the expected value |
| 683 | + float cmp = (coord.y + moffset.y)*1000 + coord.x + moffset.x; |
| 684 | + // Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions |
| 685 | + float2 fcoord = (coord + 0.5)/1000.0; |
| 686 | + if (bDeriv) { |
| 687 | + return uint4( g_tex.Sample(g_samp, fcoord, offset), |
| 688 | + g_tex.SampleCmp(g_sampCmp, fcoord, cmp, offset), |
| 689 | + g_tex.SampleCmpLevel(g_sampCmp, fcoord, cmp, 0, offset), |
| 690 | + g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp, offset)); |
| 691 | + } else { |
| 692 | + return uint4( -1, -1, |
| 693 | + g_tex.SampleCmpLevel(g_sampCmp, fcoord, cmp, 0, offset), |
| 694 | + g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp, offset)); |
| 695 | + } |
| 696 | +#else |
| 697 | + // Fake the offset by adding it to the integer coords |
| 698 | + coord += Mask4Offset(offset); |
| 699 | + // The index that should correspond to this location |
| 700 | + float cmp = (coord.y)*1000 + coord.x; |
| 701 | + // Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions |
| 702 | + float2 fcoord = (coord + 0.5)/1000.0; |
| 703 | + if (bDeriv) { |
| 704 | + return uint4( g_tex.Sample(g_samp, fcoord), |
| 705 | + g_tex.SampleCmp(g_sampCmp, fcoord, cmp), |
| 706 | + g_tex.SampleLevel(g_samp, fcoord, 0) == cmp, |
| 707 | + g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp)); |
| 708 | + } else { |
| 709 | + return uint4( -1, -1, |
| 710 | + g_tex.SampleLevel(g_samp, fcoord, 0) == cmp, |
| 711 | + g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp)); |
| 712 | + } |
| 713 | +#endif |
| 714 | + } |
| 715 | +
|
| 716 | + // Tests Load and Sample* variants with programmed offsets |
| 717 | + uint4 DoMoarTests( int2 coord, int2 offset, bool bDeriv ) { |
| 718 | + // Load requires a uint3 unlike the rest |
| 719 | + uint3 lcoord = uint3(coord, 0); |
| 720 | +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7) |
| 721 | + // Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions |
| 722 | + float2 fcoord = (coord + 0.5)/1000.0; |
| 723 | + if (bDeriv) { |
| 724 | + return uint4( g_tex.Load(lcoord, offset), |
| 725 | + g_tex.SampleBias(g_samp, fcoord, -1.0, offset), |
| 726 | + g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0, offset), |
| 727 | + g_tex.SampleLevel(g_samp, fcoord, 0, offset)); |
| 728 | + } else { |
| 729 | + return uint4( g_tex.Load(lcoord, offset), |
| 730 | + -1, |
| 731 | + g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0, offset), |
| 732 | + g_tex.SampleLevel(g_samp, fcoord, 0, offset)); |
| 733 | + } |
| 734 | +#else |
| 735 | + // Fake the offset by adding it to the integer coords |
| 736 | + coord += Mask4Offset(offset); |
| 737 | + // Load requires a uint3 unlike the rest |
| 738 | + lcoord = uint3(coord, 0); |
| 739 | + // Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions |
| 740 | + float2 fcoord = (coord + 0.5)/1000.0; |
| 741 | + if (bDeriv) { |
| 742 | + return uint4( g_tex.Load(lcoord), |
| 743 | + g_tex.SampleBias(g_samp, fcoord, -1.0), |
| 744 | + g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0), |
| 745 | + g_tex.SampleLevel(g_samp, fcoord, 0)); |
| 746 | + } else { |
| 747 | + return uint4( g_tex.Load(lcoord), |
| 748 | + -1, |
| 749 | + g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0), |
| 750 | + g_tex.SampleLevel(g_samp, fcoord, 0)); |
| 751 | + } |
| 752 | +#endif |
| 753 | + } |
| 754 | +
|
| 755 | + static float4 g_Verts[6] = { |
| 756 | + { -1.0f, 1.0f, 0.0f, 1.0f }, |
| 757 | + { 1.0f, 1.0f, 0.0f, 1.0f }, |
| 758 | + { -1.0f, -1.0f, 0.0f, 1.0f }, |
| 759 | +
|
| 760 | + { -1.0f, -1.0f, 0.0f, 1.0f }, |
| 761 | + { 1.0f, 1.0f, 0.0f, 1.0f }, |
| 762 | + { 1.0f, -1.0f, 0.0f, 1.0f }}; |
| 763 | +
|
| 764 | + static float2 g_UV[6] = { |
| 765 | + { 0.0f, 0.0f }, |
| 766 | + { 1.0f, 0.0f }, |
| 767 | + { 0.0f, 1.0f }, |
| 768 | +
|
| 769 | + { 0.0f, 1.0f }, |
| 770 | + { 1.0f, 0.0f }, |
| 771 | + { 1.0f, 1.0f }}; |
| 772 | +
|
| 773 | + struct Payload { |
| 774 | + uint nothing; |
| 775 | + }; |
| 776 | +
|
| 777 | + // Clearly these could be calculated in place, but it's illustrative to write them out |
| 778 | + // These values were chosen to represent a selection of locations and the complete gamut of offsets |
| 779 | + static const int coords[OFFSETS] = {100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950}; |
| 780 | + static const int offsets[OFFSETS] = {-9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8}; |
| 781 | + [NumThreads(2, 2, 1)] |
| 782 | + void ASMain(uint3 id : SV_GroupThreadId) { |
| 783 | + Payload payload; |
| 784 | + // Bunching up work to compensate for small numthreads limits |
| 785 | + for (int y = 0; y < 9; y++) { |
| 786 | + for (int x = 0; x < 9; x++) { |
| 787 | + float2 coord = float2(coords[9*id.x+x], coords[9*id.y+y]); |
| 788 | + int2 offset = uint2(offsets[9*id.x+x], offsets[9*id.y+y]); |
| 789 | + g_bufAmp[2*(18*(9*id.y + y) + 9*id.x + x) + 0] = DoTests(coord, offset, DERIV_MESH_AMP); |
| 790 | + g_bufAmp[2*(18*(9*id.y + y) + 9*id.x + x) + 1] = DoMoarTests(coord, offset, DERIV_MESH_AMP); |
| 791 | + } |
| 792 | + } |
| 793 | + payload.nothing = 0; |
| 794 | + DispatchMesh(1, 1, 1, payload); |
| 795 | + } |
| 796 | +
|
| 797 | + [NumThreads(2, 2, 1)] |
| 798 | + [OutputTopology("triangle")] |
| 799 | + void MSMain( |
| 800 | + uint3 id : SV_GroupThreadId, |
| 801 | + uint ix : SV_GroupIndex, |
| 802 | + in payload Payload payload, |
| 803 | + out vertices PSInput verts[6], |
| 804 | + out indices uint3 tris[2]) { |
| 805 | + SetMeshOutputCounts(6, 2); |
| 806 | + // Assign static fullscreen 2 tri quad |
| 807 | + if (ix == 0) { |
| 808 | + for (uint i = 0; i < 6; ++i) { |
| 809 | + verts[i].position = g_Verts[i]; |
| 810 | + verts[i].uv = g_UV[i]; |
| 811 | + } |
| 812 | + } |
| 813 | + if (ix < 2) { |
| 814 | + tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2); |
| 815 | + } |
| 816 | + // Bunching up work to compensate for small numthreads limits |
| 817 | + for (int y = 0; y < 9; y++) { |
| 818 | + for (int x = 0; x < 9; x++) { |
| 819 | + float2 coord = float2(coords[9*id.x+x], coords[9*id.y+y]); |
| 820 | + int2 offset = uint2(offsets[9*id.x+x], offsets[9*id.y+y]); |
| 821 | + g_bufMesh[2*(18*(9*id.y + y) + 9*id.x + x) + 0] = DoTests(coord, offset, DERIV_MESH_AMP); |
| 822 | + g_bufMesh[2*(18*(9*id.y + y) + 9*id.x + x) + 1] = DoMoarTests(coord, offset, DERIV_MESH_AMP); |
| 823 | + } |
| 824 | + } |
| 825 | + } |
| 826 | +
|
| 827 | + PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD) { |
| 828 | + PSInput result; |
| 829 | + result.position = float4(position, 1.0); |
| 830 | + result.uv = uv; |
| 831 | + return result; |
| 832 | + } |
| 833 | +
|
| 834 | + float4 PSMain(PSInput input) : SV_TARGET { |
| 835 | + uint ix = uint(input.uv.y * OFFSETS)*OFFSETS + uint(input.uv.x*OFFSETS); |
| 836 | + // Retrieve coords and offsets based on texcoords |
| 837 | + float2 coord = float2(coords[input.uv.x*OFFSETS], coords[input.uv.y*OFFSETS]); |
| 838 | + uint2 offset = uint2(offsets[input.uv.x*OFFSETS], offsets[input.uv.y*OFFSETS]); |
| 839 | + g_bufMain[2*ix] = DoTests(coord, offset, true); |
| 840 | + g_bufMain[2*ix+1] = DoMoarTests(coord, offset, true); |
| 841 | + return 1; |
| 842 | + } |
| 843 | +
|
| 844 | + [NumThreads(OFFSETS, OFFSETS, 1)] |
| 845 | + void CSMain(precise uint3 id : SV_GroupThreadId, precise uint ix : SV_GroupIndex) { |
| 846 | + int2 coord = int2(coords[id.x], coords[id.y]); |
| 847 | + int2 offset = int2(offsets[id.x], offsets[id.y]); |
| 848 | +#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 6) |
| 849 | + g_bufMain[2*ix] = DoTests(coord, offset, true); |
| 850 | + g_bufMain[2*ix+1] = DoMoarTests(coord, offset, true); |
| 851 | +#else |
| 852 | + g_bufMain[2*ix] = DoTests(coord, offset, false); |
| 853 | + g_bufMain[2*ix+1] = DoMoarTests(coord, offset, false); |
| 854 | +#endif |
| 855 | + } |
| 856 | +
|
| 857 | + ]]> |
| 858 | + </Shader> |
| 859 | + </ShaderOp> |
584 | 860 | <ShaderOp Name="OOB" PS="PS" VS="VS">
|
585 | 861 | <RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2))</RootSignature>
|
586 | 862 | <Resource Name="CB0" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" TransitionTo="VERTEX_AND_CONSTANT_BUFFER">
|
|
0 commit comments