Anatomy of Nvidia's Voxel Cone Tracing Code (VXGI)

Posted dydx

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Anatomy of Nvidia's Voxel Cone Tracing Code (VXGI)相关的知识,希望对你有一定的参考价值。

Here is diffuse cone tracing code.

  1 //
  2  Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.16384
  3  Buffer Definitions: 
  4  cbuffer AbstractTracingCB
  5  {
  6    struct VxgiAbstractTracingConstants
  7    {
  8 
  9        float4 rOpacityTextureSize;    // Offset:    0
 10        float4 rEmittanceTextureSize;  // Offset:   16
 11        float4 ClipmapAnchor;          // Offset:   32
 12        float4 SceneBoundaryLower;     // Offset:   48
 13        float4 SceneBoundaryUpper;     // Offset:   64
 14        float4 ClipmapCenter;          // Offset:   80
 15        float4 TracingToroidalOffset;  // Offset:   96
 16        float EmittancePackingStride;  // Offset:  112
 17        float FinestVoxelSize;         // Offset:  116
 18        float StackTextureSize;        // Offset:  120
 19        float rNearestLevel0Boundary;  // Offset:  124
 20        float MaxMipmapLevel;          // Offset:  128
 21        float rEmittanceStorageScale;  // Offset:  132
 22        float rClipmapSizeWorld;       // Offset:  136
 23        uint Use6DOpacity;             // Offset:  140
 24    } g_VxgiAbstractTracingCB;         // Offset:    0 Size:   144
 25  }
 26  cbuffer TranslationCB
 27  {
 28    float4 g_VxgiTranslationParameters[13];// Offset:    0 Size:   208
 29    float4 g_VxgiTranslationParameters2[13];// Offset:  208 Size:   208
 30  }
 31  cbuffer cBuiltinTracingParameters
 32  {
 33    struct GBufferParameters
 34    {
 35 
 36        row_major float4x4 viewProjMatrix;// Offset:    0
 37        row_major float4x4 viewProjMatrixInv;// Offset:   64
 38        row_major float4x4 viewMatrix; // Offset:  128
 39        float4 cameraPosition;         // Offset:  192
 40        float4 uvToView;               // Offset:  208
 41        float2 gbufferSize;            // Offset:  224
 42        float2 gbufferSizeInv;         // Offset:  232
 43        float2 viewportOrigin;         // Offset:  240
 44        float2 viewportSize;           // Offset:  248
 45        float2 viewportSizeInv;        // Offset:  256
 46        float2 firstSamplePosition;    // Offset:  264
 47        float projectionA;             // Offset:  272
 48        float projectionB;             // Offset:  276
 49        float depthScale;              // Offset:  280
 50        float depthBias;               // Offset:  284
 51        float normalScale;             // Offset:  288
 52        float normalBias;              // Offset:  292
 53        float radiusToScreen;          // Offset:  296
 54    } g_GBuffer;                       // Offset:    0 Size:   300
 55 
 56    struct GBufferParameters
 57    {
 58 
 59        row_major float4x4 viewProjMatrix;// Offset:  304
 60        row_major float4x4 viewProjMatrixInv;// Offset:  368
 61        row_major float4x4 viewMatrix; // Offset:  432
 62        float4 cameraPosition;         // Offset:  496
 63        float4 uvToView;               // Offset:  512
 64        float2 gbufferSize;            // Offset:  528
 65        float2 gbufferSizeInv;         // Offset:  536
 66        float2 viewportOrigin;         // Offset:  544
 67        float2 viewportSize;           // Offset:  552
 68        float2 viewportSizeInv;        // Offset:  560
 69        float2 firstSamplePosition;    // Offset:  568
 70        float projectionA;             // Offset:  576
 71        float projectionB;             // Offset:  580
 72        float depthScale;              // Offset:  584
 73        float depthBias;               // Offset:  588
 74        float normalScale;             // Offset:  592
 75        float normalBias;              // Offset:  596
 76        float radiusToScreen;          // Offset:  600
 77    } g_PreviousGBuffer;               // Offset:  304 Size:   300 [unused]
 78    row_major float4x4 g_ReprojectionMatrix;// Offset:  608 Size:    64 [unused]
 79    float4 g_AmbientColor;             // Offset:  672 Size:    16
 80    float4 g_DownsampleScale;          // Offset:  688 Size:    16
 81    float4 g_DebugParams;              // Offset:  704 Size:    16 [unused]
 82    float4 g_EnvironmentMapTint;       // Offset:  720 Size:    16
 83    float4 g_RefinementGridResolution; // Offset:  736 Size:    16 [unused]
 84    float4 g_BackgroundColor;          // Offset:  752 Size:    16 [unused]
 85    int2 g_PixelToSave;                // Offset:  768 Size:     8 [unused]
 86    int2 g_RandomOffset;               // Offset:  776 Size:     8
 87    float2 g_GridOrigin;               // Offset:  784 Size:     8 [unused]
 88    float g_ConeFactor;                // Offset:  792 Size:     4
 89    float g_TracingStep;               // Offset:  796 Size:     4
 90    float g_OpacityCorrectionFactor;   // Offset:  800 Size:     4
 91    int g_MaxSamples;                  // Offset:  804 Size:     4
 92    int g_NumCones;                    // Offset:  808 Size:     4 [unused]
 93    float g_rNumCones;                 // Offset:  812 Size:     4
 94    float g_EmittanceScale;            // Offset:  816 Size:     4
 95    float g_EnvironmentMapResolution;  // Offset:  820 Size:     4
 96    float g_MaxEnvironmentMapMipLevel; // Offset:  824 Size:     4
 97    float g_NormalOffsetFactor;        // Offset:  828 Size:     4
 98    float g_AmbientAttenuationFactor;  // Offset:  832 Size:     4
 99    uint g_FlipOpacityDirections;      // Offset:  836 Size:     4
100    float g_InitialOffsetBias;         // Offset:  840 Size:     4
101    float g_InitialOffsetDistanceFactor;// Offset:  844 Size:     4
102    uint g_EnableSpecularRandomOffsets;// Offset:  848 Size:     4 [unused]
103    uint g_NumDiscontinuityLevels;     // Offset:  852 Size:     4 [unused]
104    float g_TemporalReprojectionWeight;// Offset:  856 Size:     4 [unused]
105    float g_TangentJitterScale;        // Offset:  860 Size:     4 [unused]
106    float g_DepthDeltaSign;            // Offset:  864 Size:     4 [unused]
107    float g_ReprojectionDepthWeightScale;// Offset:  868 Size:     4 [unused]
108    float g_ReprojectionNormalWeightExponent;// Offset:  872 Size:     4 [unused]
109    float g_InterpolationWeightThreshold;// Offset:  876 Size:     4 [unused]
110    uint g_EnableRefinement;           // Offset:  880 Size:     4 [unused]
111    float g_AmbientScale;              // Offset:  884 Size:     4
112    float g_AmbientBias;               // Offset:  888 Size:     4
113    float g_AmbientPower;              // Offset:  892 Size:     4
114    float g_AmbientDistanceDarkening;  // Offset:  896 Size:     4
115    int g_AltSettingsStencilMask;      // Offset:  900 Size:     4
116    int g_AltSettingsStencilRefValue;  // Offset:  904 Size:     4
117    float g_AltInitialOffsetBias;      // Offset:  908 Size:     4
118    float g_AltInitialOffsetDistanceFactor;// Offset:  912 Size:     4
119    float g_AltNormalOffsetFactor;     // Offset:  916 Size:     4
120    float g_AltTracingStep;            // Offset:  920 Size:     4
121    float g_SSAO_SurfaceBias;          // Offset:  924 Size:     4 [unused]
122    float g_SSAO_RadiusWorld;          // Offset:  928 Size:     4 [unused]
123    float g_SSAO_rBackgroundViewDepth; // Offset:  932 Size:     4 [unused]
124    float g_SSAO_CoarseAO;             // Offset:  936 Size:     4 [unused]
125    float g_SSAO_PowerExponent;        // Offset:  940 Size:     4 [unused]
126  }
127  Resource Bindings:
128  Name                                 Type  Format         Dim Slot Elements
129  ------------------------------ ---------- ------- ----------- ---- --------
130  s_VoxelTextureSampler             sampler      NA          NA    0        1
131  s_EnvironmentMapSampler           sampler      NA          NA   11        1
132  g_DepthBuffer                     texture  float4          2d    0        1
133  g_TargetFlatNormal                texture  float4          2d    3        1
134  g_TargetStencil                   texture   uint2          2d    4        1
135  t_OpacityMap_Pos                  texture  float4          3d    6        1
136  t_OpacityMap_Neg                  texture  float4          3d    7        1
137  t_ConeDirectionMap                texture  float4     2darray   10        1
138  t_EnvironmentMap                  texture  float4        cube   11        1
139  t_EmittanceEven                   texture  float4          3d   12        1
140  t_EmittanceOdd                    texture  float4          3d   15        1
141  AbstractTracingCB                 cbuffer      NA          NA    0        1
142  TranslationCB                     cbuffer      NA          NA    1        1
143  cBuiltinTracingParameters         cbuffer      NA          NA    2        1
144  Input signature:
145  Name                 Index   Mask Register SysValue  Format   Used
146  -------------------- ----- ------ -------- -------- ------- ------
147  TEXCOORD                 0   xy          0     NONE   float       
148  INSTANCEID               0     z         0     NONE   float     z 
149  RAY                      0   xyzw        1     NONE   float       
150  SV_Position              0   xyzw        2      POS   float   xy  
151  Output signature:
152  Name                 Index   Mask Register SysValue  Format   Used
153  -------------------- ----- ------ -------- -------- ------- ------
154  SV_Target                0   xyzw        0   TARGET   float   xyzw
155  SV_Target                1   xyzw        1   TARGET   float   xyzw
156  SV_Target                2   xyzw        2   TARGET   float   xyzw
157 
158 0x00000000: ps_5_0
159 0x00000008: dcl_globalFlags refactoringAllowed
160 0x0000000C: dcl_constantbuffer cb0[9], immediateIndexed
161 0x0000001C: dcl_constantbuffer cb1[26], dynamicIndexed
162 0x0000002C: dcl_constantbuffer cb2[58], immediateIndexed
163 0x0000003C: dcl_sampler s_VoxelTextureSampler, mode_default
164 0x00000048: dcl_sampler s_EnvironmentMapSampler, mode_default
165 0x00000054: dcl_resource_texture2d (float,float,float,float) g_DepthBuffer
166 0x00000064: dcl_resource_texture2d (float,float,float,float) g_TargetFlatNormal
167 0x00000074: dcl_resource_texture2d (uint,uint,uint,uint) g_TargetStencil
168 0x00000084: dcl_resource_texture3d (float,float,float,float) t_OpacityMap_Pos
169 0x00000094: dcl_resource_texture3d (float,float,float,float) t_OpacityMap_Neg
170 0x000000A4: dcl_resource_texture2darray (float,float,float,float) t_ConeDirectionMap
171 0x000000B4: dcl_resource_texturecube (float,float,float,float) t_EnvironmentMap
172 0x000000C4: dcl_resource_texture3d (float,float,float,float) t_EmittanceEven
173 0x000000D4: dcl_resource_texture3d (float,float,float,float) t_EmittanceOdd
174 0x000000E4: dcl_input_ps linear v0.z
175 0x000000F0: dcl_input_ps_siv linear noperspective v2.xy, position
176 0x00000100: dcl_output o0.xyzw
177 0x0000010C: dcl_output o1.xyzw
178 0x00000118: dcl_output o2.xyzw
179 0x00000124: dcl_temps 19
180 
181 0x0000012C: lt r0.xy, l(1.000000, 1.000000, 0.000000, 0.000000), g_DownsampleScale.xyxx
182 0x00000158: and r0.x, r0.y, r0.x
183 0x00000174: round_ni r0.yz, v2.xxyx
184 0x00000188: itof r1.xy, g_RandomOffset.zwzz
185 0x000001A0: add r1.xy, r0.yzyy, r1.xyxx
186 0x000001BC: mul r1.xy, r1.xyxx, g_DownsampleScale.zwzz
187 0x000001DC: frc r1.xy, r1.xyxx
188 0x000001F0: mul r1.xy, r1.xyxx, g_DownsampleScale.xyxx
189 0x00000210: round_ni r1.xy, r1.xyxx
190 0x00000224: add r0.w, -r1.x, g_DownsampleScale.x
191 0x00000248: mad r0.w, r0.z, g_DownsampleScale.y, r0.w
192 0x00000270: mad r1.x, r0.y, g_DownsampleScale.x, r1.y
193 0x00000298: add r0.w, r0.w, firstSamplePosition.w
194 0x000002B8: add r1.x, r1.x, firstSamplePosition.z
195 0x000002D8: add r1.y, r0.w, l(-1.000000)
196 0x000002F4: movc r0.xw, r0.xxxx, r1.xxxy, v2.xxxy
197 0x00000318: ftoi r1.xy, r0.xwxx
198 0x0000032C: mov r1.zw, l(0,0,0,0)
199 
200 // load depth and normal
201 // r2 = {depth, normal.x, normal.y, normal.z}
202 0x0000034C: ld_indexable(texture2d)(float,float,float,float) r2.x, r1.xyww, g_DepthBuffer.xyzw
203 0x00000370: ld_indexable(texture2d)(float,float,float,float) r2.yzw, r1.xyww, g_TargetFlatNormal.wxyz
204 0x00000394: mad r2.x, r2.x, depthScale, depthBias
205 0x000003C0: mad r2.yzw, r2.yyzw, normalScale.xxxx, normalBias.yyyy
206 0x000003EC: dp3 r3.x, r2.yzwy, r2.yzwy
207 0x00000408: sqrt r3.x, r3.x
208 0x0000041C: div r2.yzw, r2.yyzw, r3.xxxx                             // normalize
209 0x00000438: ge r3.x, l(0.000000), r3.x
210 0x00000454: movc r2.yzw, r3.xxxx, l(0,0,0,0), r2.yyzw
211 
212 // convert screen position to world space
213 // r3.xyz = world space position
214 // r0.xw  = {screenX, screenY} [-1,1]
215 0x00000484: add r0.xw, r0.xxxw, -viewportOrigin.xxxy
216 0x000004A8: mul r0.xw, r0.xxxw, viewportSizeInv.xxxy
217 0x000004C8: mad r0.x, r0.x, l(2.000000), l(-1.000000)
218 0x000004EC: mad r0.w, -r0.w, l(2.000000), l(1.000000)
219 0x00000514: mul r3.xyzw, r0.wwww, viewProjMatrixInv.xyzw
220 0x00000534: mad r3.xyzw, r0.xxxx, viewProjMatrixInv.xyzw, r3.xyzw
221 0x0000055C: mad r3.xyzw, r2.xxxx, viewProjMatrixInv.xyzw, r3.xyzw
222 0x00000584: add r3.xyzw, r3.xyzw, viewProjMatrixInv.xyzw
223 0x000005A4: div r3.xyz, r3.xyzx, r3.wwww
224 
225 // calculate distance from clipmap center
226 0x000005C0: add r4.xyz, r3.xyzx, -ClipmapAnchor.xyzx
227 0x000005E4: max r0.x, |r4.z|, |r4.y|
228 0x00000608: max r0.x, r0.x, |r4.x|                 // r0.x = max distance from clipmap center
229 0x00000628: add r0.w, -r0.x, ClipmapAnchor.w
230 0x0000064C: mul r0.w, r0.w, rClipmapSizeWorld.z    // r0.w = (ClipmapAnchor.w - r0.x) * rClipmapSizeWorld
231 0x0000066C: mul_sat r0.w, r0.w, l(4.000000)        // r0.w = saturate(4 * (ClipmapAnchor.w - r0.x) * rClipmapSizeWorld)
232 // r0.x = max distance
233 // r0.w = saturate(4 * (ClipmapAnchor.w - r0.x) * rClipmapSizeWorld)
234 // r4 = offset from clipmap center
235 
236 // check if normal and depth is valid
237 // r2.x = is valid
238 0x00000688: and r4.xyz, r2.yzwy, l(0x7f800000, 0x7f800000, 0x7f800000, 0)   // fetch exponent part
239 0x000006B0: ieq r5.xyz, r4.xyzx, l(0, 0, 0, 0)
240 0x000006D8: and r2.x, r5.y, r5.x
241 0x000006F4: and r2.x, r5.z, r2.x                                            // check if all exponent part is 0
242 0x00000710: ieq r4.xyz, r4.xyzx, l(0x7f800000, 0x7f800000, 0x7f800000, 0)
243 0x00000738: or r3.w, r4.y, r4.x
244 0x00000754: or r3.w, r4.z, r3.w
245 0x00000770: or r2.x, r2.x, r3.w
246 0x0000078C: not r2.x, r2.x
247 0x000007A0: ne r3.w, r0.w, l(0.000000)
248 0x000007BC: and r2.x, r2.x, r3.w
249 
250 // calculate indirect diffuse light
251 0x000007D8: if_nz r2.x
252 // normalize normal
253 0x000007E4:   ftou r4.z, v0.z                     // r4.z = instance id
254 0x000007F8:   dp3 r2.x, r2.yzwy, r2.yzwy          // r2.x = dot(normal, normal)
255 0x00000814:   rsq r2.x, r2.x                      // r2.x = 1 / length(normal)
256 0x00000828:   mul r5.xyz, r2.xxxx, r2.yzwy        // r5.xyz = normalize(normal)
257 
258 // calculate local coordinate system
259 // r5 = normal
260 // r6 = tangent
261 // r7 = bitangent
262 0x00000844:   mov r6.xyz, |r5.xyzx|                          // r6.xyz = |normalize(normal)|
263 0x0000085C:   max r3.w, r6.z, r6.y
264 0x00000878:   max r3.w, r3.w, r6.x                           // r3.w = max3(normalize(normal))
265 0x00000894:   mad r7.xyz, -r2.zyyz, r2.xxxx, -r5.zzyz        // r7.xyz = -r2.zyy / |normal| - r5.zzy
266 0x000008C0:   lt r8.xyz, l(0.000000, 0.000000, 0.000000, 0.000000), r5.xyzx
267 0x000008E8:   lt r9.xyz, r5.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000)
268 0x00000910:   iadd r8.xyz, -r8.xyzx, r9.xyzx
269 0x00000930:   itof r8.xyz, r8.xyzx
270 0x00000944:   eq r9.xy, r6.xyxx, r3.wwww
271 0x00000960:   mul r7.xyz, r7.xyzx, r8.xyzx
272 0x0000097C:   mov r6.w, r7.y
273 0x00000990:   mov r7.w, r6.z
274 0x000009A4:   movc r6.yzw, r9.yyyy, r6.yywy, r7.wwwz
275 0x000009C8:   mov r7.yz, r6.xxxx
276 0x000009DC:   movc r6.xyz, r9.xxxx, r7.xyzx, r6.yzwy
277 0x00000A00:   dp3 r3.w, r6.xyzx, r6.xyzx
278 0x00000A1C:   rsq r3.w, r3.w
279 0x00000A30:   mul r6.xyz, r3.wwww, r6.xyzx                       // r6.xyz = normalize(r6.xyz)
280 0x00000A4C:   mul r7.xyz, r5.yzxy, r6.zxyz
281 0x00000A68:   mad r7.xyz, r6.yzxy, r5.zxyz, -r7.xyzx             // r7.xyz = cross(r6.xyz, r5.xyz)
282 0x00000A90:   ftoi r0.yz, r0.yyzy
283 0x00000AA4:   iadd r0.yz, r0.yyzy, g_RandomOffset.zzwz
284 0x00000AC4:   and r4.xy, r0.yzyy, l(3, 3, 0, 0)                  // r4.xy is useless because color is the same
285 0x00000AEC:   mov r4.w, l(0)                                     // r4.w = 0
286 
287 // get cone direction from texture (what a stupid way!)
288 // r4.xyz = normalized cone direction in world space
289 0x00000B00:   ld_indexable(texture2darray)(float,float,float,float) r4.xyzw, r4.xyzw, t_ConeDirectionMap.xyzw // r4.z = instance id = cone index, r4.xy is useless 
290 0x00000B24:   mul r5.xyz, r5.xyzx, r4.yyyy                       // r5.xyz = r5.xyz * r4.y
291 0x00000B40:   mad r5.xyz, r4.xxxx, r6.xyzx, r5.xyzx              // r5.xyz = r4.x * r6.xyz + r5.xyz * r4.y
292 0x00000B64:   mad r4.xyz, r4.zzzz, r7.xyzx, r5.xyzx              // r4.xyz = r4.x * r6.xyz + r4.y * r5.xyz + r4.z * r7.xyz
293 0x00000B88:   dp3 r0.y, r4.xyzx, r4.xyzx
294 0x00000BA4:   rsq r0.y, r0.y
295 0x00000BB8:   mul r4.xyz, r0.yyyy, r4.xyzx                       // r4.xyz = normalized cone direction
296 0x00000BD4:   dp2 r0.x, rNearestLevel0Boundary.wwww, r0.xxxx     // r0.x = 0, rNearestLevel0Boundary = 0
297 0x00000BF4:   max r0.x, r0.x, l(1.000000)                        // r0.x = 1
298 0x00000C10:   mul r0.y, r4.w, r0.x                               // r0.y = 0
299 
300 // set cone tracing parameter
301 // r1 = {g_InitialOffsetBias, g_InitialOffsetDistanceFactor, g_NormalOffsetFactor, g_TracingStep}
302 //    = {2, 1, 0.5, 0.5}
303 0x00000C2C:   if_nz g_AltSettingsStencilMask
304 0x00000C3C:     ld_indexable(texture2d)(uint,uint,uint,uint) r0.z, r1.xyzw, g_TargetStencil.xzyw
305 0x00000C60:     and r0.z, r0.z, g_AltSettingsStencilMask
306 0x00000C80:     ieq r0.z, r0.z, g_AltSettingsStencilRefValue
307 0x00000CA0:     mov r1.xy, (g_InitialOffsetBias,g_InitialOffsetDistanceFactor,g_InitialOffsetBias,g_InitialOffsetBias)
308 0x00000CB8:     mov r1.z, g_NormalOffsetFactor
309 0x00000CD0:     mov r1.w, g_TracingStep
310 0x00000CE8:     movc r1.x, r0.z, g_AltInitialOffsetBias, r1.x
311 0x00000D10:     movc r1.yzw, r0.zzzz, (g_AltInitialOffsetDistanceFactor,g_AltInitialOffsetDistanceFactor,g_AltNormalOffsetFactor,g_AltTracingStep), r1.yyzw
312 0x00000D38:   else 
313 0x00000D3C:     mov r1.xy, (g_InitialOffsetBias,g_InitialOffsetDistanceFactor,g_InitialOffsetBias,g_InitialOffsetBias)
314 0x00000D54:     mov r1.z, g_NormalOffsetFactor
315 0x00000D6C:     mov r1.w, g_TracingStep
316 0x00000D84:   endif 
317 
318 // compute initial offset
319 // g_AmbientAttenuationFactor = 0.04
320 // g_InitialOffsetBias = 2
321 // g_InitialOffsetDistanceFactor = 1
322 // g_TracingStep = 0.5
323 // g_NormalOffsetFactor = 0.5
324 // r0.xy = {3,0}
325 0x00000D88:   log r0.z, r0.x                                // r0.x = 1
326 0x00000D9C:   mul r0.z, r0.z, g_AmbientDistanceDarkening
327 0x00000DBC:   exp r0.z, r0.z
328 0x00000DD0:   mul r0.z, r0.z, g_AmbientAttenuationFactor    // r0.z = g_AmbientAttenuationFactor * r0.x ^ g_AmbientDistanceDarkening = 0.04 * r0.x ^ -0.25 = 0.04
329 0x00000DF0:   itof r3.w, g_MaxSamples                       // g_MaxSamples = 128
330 0x00000E08:   mad r0.x, r0.x, r1.y, r1.x                    // r0.x = r0.x * g_InitialOffsetDistanceFactor + g_InitialOffsetBias = 3
331 0x00000E2C:   mad r0.x, r0.y, g_TracingStep, r0.x           // r0.x = r0.y * g_TracingStep + r0.x = 3
332 // r0.z = AO scale
333 
334 // offset 3*FinestVoxelSize from surface point
335 0x00000E54:   mad r2.xyz, r2.yzwy, r2.xxxx, -r4.xyzx        // r2.xyz = normalize(normal) - r4.xyz
336 0x00000E7C:   mad r1.xyz, r1.zzzz, r2.xyzx, r4.xyzx         // r1.xyz = g_NormalOffsetFactor * r2.xyz + r4.xyz = 0.5 * (normal + direction)
337 0x00000EA0:   dp3 r0.y, r1.xyzx, r1.xyzx
338 0x00000EBC:   rsq r0.y, r0.y
339 0x00000ED0:   mul r1.xyz, r0.yyyy, r1.xyzx                  // r1.xyz = normalize(r1.xyz)
340 0x00000EEC:   mul r0.y, r0.x, FinestVoxelSize.y             // r0.y = r0.x * FinestVoxelSize = 3 * 8 = 24
341 0x00000F0C:   mad r1.xyz, r1.xyzx, r0.yyyy, r3.xyzx         // r1.xyz = r1.xyz * 24 + world position
342 0x00000F30:   mul r0.y, rEmittanceStorageScale.y, g_EmittanceScale // r0.y = 1.0 * 0.52
343 // r1.xyz = start trace position = position + 3 * FinestVoxelSize * 0.5 * (normal + direction)
344 
345 // set cone tracing step
346 // g_ConeFactor = 2 * sin a
347 // d‘ = d * (2 + 2*sin a) / (2 - 2*sin a) = d * (2 + g_ConeFactor) / (2 - g_ConeFactor)
348 0x00000F54:   add r2.x, -g_ConeFactor, l(2.000000)           // r2.x = 2 - g_ConeFactor = 2 - 0.87 = 1.13
349 0x00000F78:   add r2.y, g_ConeFactor, l(2.000000)            // r2.y = 2 + g_ConeFactor = 2.87
350 0x00000F98:   div r2.y, r2.y, r2.x
351 0x00000FB4:   add r2.y, r2.y, l(-1.000000)                   // r2.y = (2 + g_ConeFactor) / (2 - g_ConeFactor) -1 = 1.53
352 0x00000FD0:   add r2.z, MaxMipmapLevel.x, l(1.000000)        // r2.z = MaxMipmapLevel + 1 = 10
353 0x00000FF0:   mul r2.w, FinestVoxelSize.y, FinestVoxelSize.y // r2.w = FinestVoxelSize * FinestVoxelSize = 64
354 // r2 = {2 - g_ConeFactor, step factor, MaxMipmapLevel + 1, FinestVoxelSize * FinestVoxelSize}
355 //    = {1.13, 1.53, 10, 64}
356 
357 // initialize indirect color data
358 0x00001014:   movc r3.xyz, g_FlipOpacityDirections.yyyy, -r4.xyzx, r4.xyzx    // r3.xyz = cone tracing direction
359 0x00001040:   lt r5.xyz, l(0.000000, 0.000000, 0.000000, 0.000000), r4.xyzx
360 0x00001068:   and r4.w, r5.x, l(0x40400000)
361 0x00001084:   movc r5.xy, r5.yzyy, l(4.000000,5.000000,0,0), l(1.000000,2.000000,0,0)
362 0x000010C0:   mul r6.x, r4.w, EmittancePackingStride.x
363 0x000010E0:   mul r5.xy, r5.xyxx, EmittancePackingStride.xxxx
364 0x00001100:   mov r6.yz, l(0,0,0,0)
365 0x00001120:   mov r5.zw, l(0,0,0,0)
366 0x00001140:   mov r8.xyz, r1.xyzx                                    // r8.xyz = start trace position in world space
367 0x00001154:   mov r4.w, Use6DOpacity.w
368 0x0000116C:   mov r6.w, l(1.000000)
369 0x00001180:   mov r7.xyzw, l(0,0,0,1.000000)
370 0x000011A0:   mov r8.w, l(1.000000)
371 0x000011B4:   mov r9.z, r0.x                                         // r9.z = 3, distance in voxel count
372 0x000011C8:   mov r9.xyw, l(0,1.000000,0,0)
373 // r6.w = 1       = latest transparency
374 // r7 = {0,0,0,1} = {indirect color.rgb, last step transparency}
375 // r8.w = 1       = last two step transparency
376 // r9 = {0,1,3,0}
377 
378 // cone trace loop
379 // r8.xyz = sample position in world space
380 0x000011E8:   loop 
381 0x000011EC:     ge r10.x, r9.w, r3.w                                 // if(iteration >= 128) break;
382 0x00001208:     breakc_nz r10.x
383 
384 0x00001214:     add r10.xyz, r8.xyzx, -ClipmapAnchor.xyzx            // r10.xyz = offset from clipmap center
385 0x00001238:     max r10.y, |r10.z|, |r10.y|
386 0x0000125C:     max r10.x, r10.y, |r10.x|                            // r10.x = distance from clipmap center = max(r10.xyz)
387 0x0000127C:     dp2 r10.y, rNearestLevel0Boundary.wwww, r10.xxxx     // rNearestLevel0Boundary = 0
388 0x0000129C:     max r10.y, r10.y, l(1.000000)                        // r10.y = 1
389 // r10.y = 1
390 
391 // calculate sample level
392 // r9.z  = distance measured by voxel count
393 // r10.y = 1
394 // r11.x = level
395 0x000012B8:     mul r10.z, r9.z, g_ConeFactor                        // r10.z = r9.z * g_ConeFactor = r9.z * 0.87
396 0x000012D8:     lt r10.w, r10.z, r10.y                               // if (r10.z < 1)
397 0x000012F4:     movc r10.z, r10.w, r10.y, r10.z                      //    r10.z = 1
398 0x00001318:     log r11.x, r10.z                                     // r11.x = level
399 0x0000132C:     ge r11.y, r11.x, r2.z                                // if(level > MaxMipmapLevel + 1)
400 0x00001348:     if_nz r11.y                                          //      break
401 0x00001354:       break 
402 0x00001358:     endif
403 // r10.z = diameter measured by voxel count
404 // r10.w = is diameter less that 1 voxel
405 
406 // r10.x = max axis distance from clipmap center - distance from ray origin
407 0x0000135C:     add r10.x, -r10.x, ClipmapAnchor.w                   // r10.x = 8128 - manhattan distance
408 0x00001380:     mul r11.y, r10.z, FinestVoxelSize.y                  // r11.y = diameter in world coordinate
409 0x000013A0:     mad r10.x, -r10.z, FinestVoxelSize.y, r10.x          // r10.x = max axis distance from clipmap center - diameter
410 0x000013CC:     lt r11.z, r10.x, l(0.000000)                         // if (max axis distance < diameter)
411 0x000013E8:     if_nz r11.z                                          //      break
412 0x000013F4:       break 
413 0x000013F8:     endif 
414 
415 // check if sample position is out of scene bounding box
416 0x000013FC:     mad r12.xyz, r10.zzzz, FinestVoxelSize.yyyy, r8.xyzx // r12.xyz = sample position + diameter
417 0x00001424:     lt r12.xyz, r12.xyzx, SceneBoundaryLower.xyzx
418 0x00001444:     or r11.z, r12.y, r12.x
419 0x00001460:     or r11.z, r12.z, r11.z
420 0x0000147C:     mad r12.xyz, -r10.zzzz, FinestVoxelSize.yyyy, r8.xyzx
421 0x000014A8:     lt r12.xyz, SceneBoundaryUpper.xyzx, r12.xyzx
422 0x000014C8:     or r11.w, r12.y, r12.x
423 0x000014E4:     or r11.w, r12.z, r11.w
424 0x00001500:     or r11.z, r11.w, r11.z
425 0x0000151C:     if_nz r11.z                                          // if out of box, break
426 0x00001528:       break 
427 0x0000152C:     endif 
428 
429 // calculate weight for lower and higher levels
430 // r6.w  = transparency for environment map
431 // r9.z  = distance measured by voxel count
432 // r10.x = manhattan distance from clipmap center - distance from ray origin
433 // r11.x = level
434 // r9.x  = old AO
435 // r10.w = is diameter less that one voxel
436 0x00001530:     mad r11.z, r9.z, l(2.000000), r10.y                  // r11.z = distance in voxels * 2 + 1
437 0x00001554:     div r11.z, r11.z, r2.x                               // r11.z /= 2 - g_ConeFactor
438 0x00001570:     add r10.y, r9.z, r10.y                               // r10.y = r9.z + 1
439 0x0000158C:     max r10.y, r10.y, r11.z                              // r10.y = max(distance + 1, (distance * 2 + 1)/(2 - g_ConeFactor))
440 0x000015A8:     add r10.y, -r9.z, r10.y                              // r10.y = max(distance + 1, (distance * 2 + 1)/(2 - g_ConeFactor)) - distance
441 0x000015C8:     mul r11.z, r2.y, r9.z                                // r11.z = step in voxels = distance * step factor
442 0x000015E4:     movc r10.y, r10.w, r10.y, r11.z                      // r10.y = is diameter less that one voxel ? r10.y : original step in voxels
443 0x00001608:     mul r10.w, r1.w, r10.y                               // r10.w = g_TracingStep * original step in voxels = step in voxels
444 0x00001624:     mul r11.z, r0.z, -r9.z                               // r11.z = 0.04 * -distance measured by voxel count
445 0x00001644:     mul r11.z, r11.z, l(1.442695)                        // r11.z = 0.04 * -distance measured by voxel count * 1.442695
446 0x00001660:     exp r11.z, r11.z                                     // r11.z = new weight = 2 ^ (0.04 * -distance measured by voxel count * 1.442695)
447 0x00001674:     add r11.w, r9.y, -r11.z                              // r11.w = delta weight = old weight - new weight = r9.y - 2 ^ (0.04 * -distance measured by voxel count * 1.442695)
448 0x00001694:     mad r11.w, r11.w, r6.w, r9.x                         // r11.w = AO = delta weight * transparency for env map + old AO
449 0x000016B8:     div_sat r10.x, r10.x, r11.y                          // r11.y = distance in world coordinate
450 0x000016D4:     round_ni r12.x, r11.x                                // r12.x = floor(level)
451 0x000016E8:     add r12.y, r11.x, -r12.x                             // r12.y = level - floor(level) = weight for upper level
452 0x00001708:     add r12.z, -r12.y, l(1.000000)                       // r12.z = 1 - r12.y = weight for lower level
453 0x00001728:     mul r12.z, r10.x, r12.z                              // r12.z = weight for lower level * r10.x
454 0x00001744:     lt r11.x, MaxMipmapLevel.x, r11.x                    // r11.x = MaxMipmapLevel < level
455 0x00001764:     mul r10.x, r10.x, r12.y                              // r10.x = weight for upper level
456 0x00001780:     movc r10.x, r11.x, l(0), r10.x                       // r10.x = (level > MaxMipmapLevel) ? 0 : r10.x
457 0x000017A4:     ftoi r11.x, r12.x                                    // r11.x = int(floor(level))
458 // r10.y = original step in voxels
459 // r10.w = step in voxels
460 //       = g_TracingStep * (max(distance + 1, (distance * 2 + 1)/(2 - g_ConeFactor)) - distance), if diameter less that one voxel
461 //       = g_TracingStep * ((2 + g_ConeFactor) / (2 - g_ConeFactor)-1) * distance
462 // r11.z = new weight = 2 ^ (0.04 * -distance measured by voxel count * 1.442695)
463 // r11.w = AO
464 //       = old AO + (old weight - new weight) * transparency for env map
465 //       = old AO + (old weight - new weight) * transparency for env map
466 
467 // calculate clipmap address
468 // r11.x = lower level
469 // r8.xyz = sample position
470 0x000017B8:     add r13.xyz, r8.xyzx, -ClipmapCenter.xyzx            // r13.xyz = sample position - clipmap center
471 0x000017DC:     mad r14.xyz, r13.xyzx, g_VxgiTranslationParameters[r11.x].xxxx, l(0.500000, 0.500000, 0.500000, 0.000000) // r14.xyz = 0.5
472 0x00001814:     add r14.xyz, r14.xyzx, g_VxgiTranslationParameters2[r11.x].xyzx                                           // r14.xyz = r14.xyz + g_VxgiTranslationParameters2[level]
473 0x0000183C:     frc r14.xyz, r14.xyzx
474 0x00001850:     mul r15.xy, r14.xyxx, g_VxgiTranslationParameters[r11.x].yyyy                                             // r15.xy = r14.xy * clip map resolution
475 0x00001874:     mad r15.z, r14.z, g_VxgiTranslationParameters[r11.x].y, g_VxgiTranslationParameters[r11.x].z              // r15.z = r14.z * clip map resolution + clip map offset
476 0x000018A8:     mul r15.xyz, r15.xyzx, rOpacityTextureSize.xyzx                                                           // r15.xyz = texture coordinate
477 0x000018C8:     mad r16.xy, r14.xyxx, g_VxgiTranslationParameters[r11.x].yyyy, l(1.000000, 0.000000, 0.000000, 0.000000)
478 0x00001900:     mad r16.z, r14.z, g_VxgiTranslationParameters[r11.x].y, g_VxgiTranslationParameters[r11.x].w
479 0x00001934:     mul r14.xyz, r16.xyzx, rEmittanceTextureSize.xyzx
480 0x00001954:     add r12.y, r12.x, l(1.000000)
481 0x00001970:     ftoi r12.y, r12.y
482 
483 0x00001984:     mad r13.xyz, r13.xyzx, g_VxgiTranslationParameters[r12.y].xxxx, l(0.500000, 0.500000, 0.500000, 0.000000)
484 0x000019BC:     add r13.xyz, r13.xyzx, g_VxgiTranslationParameters2[r12.y].xyzx
485 0x000019E4:     frc r13.xyz, r13.xyzx
486 0x000019F8:     mul r16.xy, r13.xyxx, g_VxgiTranslationParameters[r12.y].yyyy
487 0x00001A1C:     mad r16.z, r13.z, g_VxgiTranslationParameters[r12.y].y, g_VxgiTranslationParameters[r12.y].z
488 0x00001A50:     mul r16.xyz, r16.xyzx, rOpacityTextureSize.xyzx
489 0x00001A70:     mad r17.xy, r13.xyxx, g_VxgiTranslationParameters[r12.y].yyyy, l(1.000000, 0.000000, 0.000000, 0.000000)
490 0x00001AA8:     mad r17.z, r13.z, g_VxgiTranslationParameters[r12.y].y, g_VxgiTranslationParameters[r12.y].w
491 0x00001ADC:     mul r13.xyz, r17.xyzx, rEmittanceTextureSize.xyzx
492 
493 // sample lower opacity clipmap
494 // r3.xyz = cone tracing direction
495 // r12.y = low opacity
496 // r15.y = is occupied
497 0x00001AFC:     sample_l_indexable(texture3d)(float,float,float,float) r17.xyzw, r15.xyzx, t_OpacityMap_Pos.xyzw, s_VoxelTextureSampler, l(0.000000)
498 0x00001B30:     if_nz r4.w         // if(Use6DOpacity)
499 0x00001B3C:       sample_l_indexable(texture3d)(float,float,float,float) r15.xyz, r15.xyzx, t_OpacityMap_Neg.xyzw, s_VoxelTextureSampler, l(0.000000)
500 0x00001B70:       mul_sat r18.xyz, -r3.xyzx, r17.xyzx                                  // r18.xyz = -1 * cone direction.xyz * opacity_pos.xyz
501 0x00001B90:       add r12.y, r18.y, r18.x
502 0x00001BAC:       add r12.y, r18.z, r12.y                                              // r12.y = r18.x + r18.y + r18.z
503 0x00001BC8:       mul_sat r15.xyz, r3.xyzx, r15.xyzx                                   // r15.xyz = cone direction.xyz * opacity_neg.xyz
504 0x00001BE4:       add r12.w, r15.y, r15.x
505 0x00001C00:       add r12.w, r15.z, r12.w                                              // r12.w = r15.x + r15.y + r15.z
506 0x00001C1C:       min r12.yw, r12.yyyw, l(0.000000, 1.000000, 0.000000, 1.000000)      // r12.yw = min(r12.yw, 1)
507 0x00001C44:       add r12.y, r12.w, r12.y
508 0x00001C60:     else 
509 0x00001C64:       mul_sat r15.xyz, |r4.xyzx|, r17.xyzx
510 0x00001C84:       add r12.w, r15.y, r15.x
511 0x00001CA0:       add r12.w, r15.z, r12.w                                              // r12.w = dot(|direction.xyz|, opacity.xyz)
512 0x00001CBC:       min r12.y, r12.w, l(1.000000)                                        // r12.y = lower opacity
513 0x00001CD8:     endif 
514 0x00001CDC:     ne r15.y, r17.w, l(0.000000)                                           // r15.y = if lower voxel is occupied
515 
516 // sample higher opacity clipmap
517 // r12.w = high opacity
518 // r15.x = is occupied
519 0x00001CF8:     sample_l_indexable(texture3d)(float,float,float,float) r17.xyzw, r16.xyzx, t_OpacityMap_Pos.xyzw, s_VoxelTextureSampler, l(0.000000)
520 0x00001D2C:     if_nz r4.w         // if(Use6DOpacity)
521 0x00001D38:       sample_l_indexable(texture3d)(float,float,float,float) r16.xyz, r16.xyzx, t_OpacityMap_Neg.xyzw, s_VoxelTextureSampler, l(0.000000)
522 0x00001D6C:       mul_sat r18.xyz, -r3.xyzx, r17.xyzx
523 0x00001D8C:       add r12.w, r18.y, r18.x
524 0x00001DA8:       add r12.w, r18.z, r12.w
525 0x00001DC4:       min r12.w, r12.w, l(1.000000)
526 0x00001DE0:       mul_sat r16.xyz, r3.xyzx, r16.xyzx
527 0x00001DFC:       add r13.w, r16.y, r16.x
528 0x00001E18:       add r13.w, r16.z, r13.w
529 0x00001E34:       min r13.w, r13.w, l(1.000000)
530 0x00001E50:       add r12.w, r12.w, r13.w
531 0x00001E6C:     else 
532 0x00001E70:       mul_sat r16.xyz, |r4.xyzx|, r17.xyzx
533 0x00001E90:       add r13.w, r16.y, r16.x
534 0x00001EAC:       add r13.w, r16.z, r13.w
535 0x00001EC8:       min r12.w, r13.w, l(1.000000)                                        // r12.w = higher opacity
536 0x00001EE4:     endif 
537 0x00001EE8:     ne r15.x, r17.w, l(0.000000)                                           // r15.x = if higher voxel is occupied
538 
539 // interpolate opacity
540 // r12.x = floor(level)
541 // r12.y = low opacity
542 // r12.z = low weight
543 // r12.w = high opacity
544 // r10.x = high weight
545 0x00001F04:     mul r12.w, r10.x, r12.w                                                // r12.w = higher opacity * weight
546 0x00001F20:     mad_sat r12.y, r12.y, r12.z, r12.w                                     // r12.y = interpolated opacity
547 0x00001F44:     add r12.x, r12.x, r12.x                                                // r12.x = floor(level) * 2
548 0x00001F60:     exp r12.x, r12.x                                                       // r12.x = 2 ^ (floor(level) * 2)
549 0x00001F74:     mul r12.x, r2.w, r12.x                                                 // r12.x = r12.x * FinestVoxelSize * FinestVoxelSize = r12.x * 64
550 0x00001F90:     mul r10.x, r10.x, r12.x                                                // r10.x = 2 ^ (floor(level) * 2) * 64 * high weight
551 0x00001FAC:     mul r15.w, r12.z, r12.x                                                // r15.w = 2 ^ (floor(level) * 2) * 64 * low weight
552 0x00001FC8:     mul r15.z, r10.x, l(4.000000)                                          // r15.z = 2 ^ (floor(level) * 2) * 64 * high weight * 4
553 0x00001FE4:     and r10.x, r11.x, l(1)                                                 // r10.x = is lower level odd ?
554 0x00002000:     movc r15.xyzw, r10.xxxx, r15.xyzw, r15.yxwz                            // r15.xyzw = is odd ? r15.xyzw : r15.yxwz
555 // r15 = {is even occupied, is odd occupied, even weight, odd weight}
556 
557 // sample even color clipmap
558 0x00002024:     if_nz r15.x
559 0x00002030:       movc r12.xzw, r10.xxxx, r13.xxyz, r14.xxyz
560 0x00002054:       add r16.xyz, r6.xyyx, r12.xzwx
561 0x00002070:       add r17.xyz, r5.xzzx, r12.xzwx
562 0x0000208C:       add r12.xzw, r5.yyzz, r12.xxzw
563 0x000020A8:       sample_l_indexable(texture3d)(float,float,float,float) r16.xyz, r16.xyzx, t_EmittanceEven.xyzw, s_VoxelTextureSampler, l(0.000000)
564 0x000020DC:       sample_l_indexable(texture3d)(float,float,float,float) r17.xyz, r17.xyzx, t_EmittanceEven.xyzw, s_VoxelTextureSampler, l(0.000000)
565 0x00002110:       sample_l_indexable(texture3d)(float,float,float,float) r12.xzw, r12.xzwx, t_EmittanceEven.xwyz, s_VoxelTextureSampler, l(0.000000)
566 0x00002144:       mul r17.xyz, |r4.yyyy|, r17.xyzx
567 0x00002164:       mad r16.xyz, |r4.xxxx|, r16.xyzx, r17.xyzx
568 0x0000218C:       mad r12.xzw, |r4.zzzz|, r12.xxzw, r16.xxyz                            // r12.xzw = interpolated color in even texture
569 0x000021B4:       mul r12.xzw, r15.zzzz, r12.xxzw                                       // r12.xzw = even color
570 0x000021D0:     else 
571 0x000021D4:       mov r12.xzw, l(0,0,0,0)
572 0x000021F4:     endif
573 
574 // sample odd color clipmap
575 0x000021F8:     lt r11.x, l(0.000000), r15.w
576 0x00002214:     and r11.x, r11.x, r15.y
577 0x00002230:     if_nz r11.x                                                             // if (is odd occupied && odd weight > 0)
578 0x0000223C:       movc r13.xyz, r10.xxxx, r14.xyzx, r13.xyzx
579 0x00002260:       add r14.xyz, r6.xzzx, r13.xyzx
580 0x0000227C:       add r16.xyz, r5.xwwx, r13.xyzx
581 0x00002298:       add r13.xyz, r5.ywwy, r13.xyzx
582 0x000022B4:       sample_l_indexable(texture3d)(float,float,float,float) r14.xyz, r14.xyzx, t_EmittanceOdd.xyzw, s_VoxelTextureSampler, l(0.000000)
583 0x000022E8:       sample_l_indexable(texture3d)(float,float,float,float) r16.xyz, r16.xyzx, t_EmittanceOdd.xyzw, s_VoxelTextureSampler, l(0.000000)
584 0x0000231C:       sample_l_indexable(texture3d)(float,float,float,float) r13.xyz, r13.xyzx, t_EmittanceOdd.xyzw, s_VoxelTextureSampler, l(0.000000)
585 0x00002350:       mul r16.xyz, |r4.yyyy|, r16.xyzx
586 0x00002370:       mad r14.xyz, |r4.xxxx|, r14.xyzx, r16.xyzx
587 0x00002398:       mad r13.xyz, |r4.zzzz|, r13.xyzx, r14.xyzx
588 0x000023C0:       mad r12.xzw, r13.xxyz, r15.wwww, r12.xxzw                             // r12.xzw = final interpolated color
589 0x000023E4:       mov r15.x, l(-1)
590 0x000023F8:     endif
591 // r12.xzw = final interpolated color
592 
593 // Add color contribution and update transparency for environment map
594 // r12.y = intepolated opacity
595 // r10.w = step in voxels
596 // r10.z = diameter
597 // r11.y = distance in world coordinate
598 // r6.w = old transparency
599 0x000023FC:     mul r13.xyz, r0.yyyy, r12.xzwx                                          // r13.xyz = scaled color = rEmittanceStorageScale.y * g_EmittanceScale * color = 0.52 * color
600 0x00002418:     div r10.x, g_ConeFactor, r11.y                                          // r10.x = g_ConeFactor / distance
601 0x00002438:     mul r10.x, r10.x, r10.x                                                 // r10.x = (g_ConeFactor / distance) ^ 2
602 0x00002454:     mul r13.xyz, r10.xxxx, r13.xyzx                                         // color.xyz = (g_ConeFactor / distance) ^ 2 * color.xyz
603 0x00002470:     movc r12.xzw, r15.xxxx, r13.xxyz, r12.xxzw                              // r12.xzw = color.xyz * -1 + final color
604 0x00002494:     lt r10.x, l(0.000000), r12.y                                            // r10.x = opacity > 0
605 0x000024B0:     add r11.x, -r12.y, l(1.000000)                                          // r11.x = 1 - opacity
606 0x000024D0:     mul r11.y, r10.w, g_OpacityCorrectionFactor                             // r11.y = step in voxels * g_OpacityCorrectionFactor
607 0x000024F0:     div r10.z, r11.y, r10.z                                                 // r10.z = factor = step in voxels * g_OpacityCorrectionFactor / diameter
608 0x0000250C:     log r11.x, r11.x                                                        // r11.x = log2(1 - opacity)
609 0x00002520:     mul r10.z, r10.z, r11.x                                                 // r10.z = r10.z * log2(1 - opacity)
610 0x0000253C:     exp r10.z, r10.z                                                        // r10.z = 2 ^ (r10.z) = 2 ^ (factor * log2(1 - opacity)) = (1 - opacity) ^ factor
611 0x00002550:     add r10.z, -r10.z, l(1.000000)                                          // r10.z = 1 - (1 - opacity) ^ factor
612 0x00002570:     and r10.x, r10.z, r10.x                                                 // r10.x = opacity > 0 ? 1 - (1 - opacity) ^ factor : 0
613 0x0000258C:     mad r12.xyz, r8.wwww, r12.xzwx, r7.xyzx                                 // r12.xyz = new indirect color = 1 * (color.xyz * -1 + final color) + indirect color
614 0x000025B0:     add r10.x, -r10.x, l(1.000000)                                          // r10.x = step transparency = (1 - opacity) ^ factor
615 0x000025D0:     mul r10.x, r6.w, r10.x                                                  // r10.x = r6.w * r10.x = transparency for environment map * step transparency
616 // r10.x = new transparency for environment map
617 //       = old transparency * (1 - ((1- intepolated opacity) ^ (step in voxels * g_OpacityCorrectionFactor / diameter in voxels))), if intepolated opacity < 1
618 //       = old transparency, otherwise
619 // r12.xyz = new indirect color
620 //         = old indirect color + intepolated color * the second last transparency
621 
622 // terminate cone tracing
623 0x000025EC:     lt r10.z, r7.w, l(0.000100)                                             // r10.z = r7.w < 0.001
624 0x00002608:     if_nz r10.z                                                             // if(r7.w < 0.001)
625 0x00002614:       mov r7.xyz, r12.xyzx                                                  //     r7.xyz = indirect color
626 0x00002628:       mov r9.x, r11.w                                                       //     r9.x = AO = r11.w
627 0x0000263C:       mov r6.w, r10.x                                                       //     r6.w = r10.x = transparency for environment map
628 0x00002650:       break 
629 0x00002654:     endif
630 
631 // proceed to next iteration
632 // r11.z = new AO weight = 2 ^ (0.04 * distance measured by voxel count * 1.442695)
633 // r11.w = AO
634 // r10.x = transparency for environment map
635 0x00002658:     mad r9.z, r10.y, r1.w, r9.z                                             // r9.z = new distance = g_TracingStep * step in voxels + distance in voxels
636 0x0000267C:     mul r10.y, r10.w, FinestVoxelSize.y                                     // r10.y = step length
637 0x0000269C:     mad r8.xyz, r10.yyyy, r4.xyzx, r8.xyzx                                  // r8.xyz = new sampling position = old sampling position + step size * direction
638 0x000026C0:     add r9.w, r9.w, l(1.000000)                                             // r9.w = iteration++
639 0x000026DC:     mov r7.xyz, r12.xyzx                                                    // r7.xyz = indirect color
640 0x000026F0:     mov r10.y, r6.w                                                         // r10.y = tranparency
641 0x00002704:     mov r8.w, r7.w                                                          // r8.w = tranparency
642 0x00002718:     mov r9.xy, r11.wzww                                                     // r9.xy = r11.wz = {AO, 2 ^ (0.04 * distance measured by voxel count * 1.442695)}
643 0x0000272C:     mov r6.w, r10.x                                                         // r6.w = new transparency for env map = r10.x
644 0x00002740:     mov r7.w, r10.y                                                         // r7.w = transparency
645 // r7.xyz = indirect light
646 // r6.w = latest transparency for environment map
647 // r7.w = last transparency for environment map
648 // r8.w = the second last transparency for environment map
649 // r9.x = AO
650 // r9.y = new AO weight
651 
652 0x00002754:   endloop 
653 
654 // sample environment map
655 0x00002758:   mov_sat r9.x, r9.x
656 0x0000276C:   add_sat r0.x, -r6.w, l(1.000000)                                          // r0.x = saturate(1 - transparency)
657 0x0000278C:   add r0.x, -r0.x, l(1.000000)                                              // r0.x = 1 - saturate(1 - transparency) = transparency
658 0x000027AC:   mul r0.xyz, r0.xxxx, g_EnvironmentMapTint.xyzx                            // r0.xyz = g_EnvironmentMapTint * transparency
659 0x000027CC:   lt r1.xyz, l(0.000000, 0.000000, 0.000000, 0.000000), r0.xyzx
660 0x000027F4:   or r1.x, r1.y, r1.x
661 0x00002810:   or r1.x, r1.z, r1.x
662 0x0000282C:   if_nz r1.x                                                                // if (any(g_EnvironmentMapTint * transparency > 0))
663 0x00002838:     mul r1.x, g_ConeFactor, g_EnvironmentMapResolution                      //      r1.x = g_ConeFactor * g_EnvironmentMapResolution
664 0x0000285C:     log r1.x, r1.x                                                          //      r1.x = log2(g_ConeFactor * g_EnvironmentMapResolution)
665 0x00002870:     add r1.x, r1.x, l(-1.000000)
666 0x0000288C:     max r1.x, r1.x, l(0.000000)                                             //      r1.x = max(log2(g_ConeFactor * g_EnvironmentMapResolution)-1,0)
667 0x000028A8:     min r1.x, r1.x, g_MaxEnvironmentMapMipLevel                             //      r1.x = min(g_MaxEnvironmentMapMipLevel, max(log2(g_ConeFactor * g_EnvironmentMapResolution)-1,0))
668 0x000028C8:     sample_l_indexable(texturecube)(float,float,float,float) r1.xyz, r4.xyzx, t_EnvironmentMap.xyzw, s_EnvironmentMapSampler, r1.x
669 0x000028FC:     mul r0.xyz, r0.xyzx, r1.xyzx                                            //      r0.xyz = environment color = g_EnvironmentMapTint * transparency * environment map color
670 0x00002918:     mad r7.xyz, r0.xyzx, g_rNumCones.wwww, r7.xyzx                          //      r7.xyz = indirect color = r7.xyz + environment color / number of cones
671 0x00002940:   endif 
672 // r7.xyz = indirect light
673 
674 // add ambient color
675 // r9.x = AO?
676 0x00002944:   mad_sat r0.x, r9.x, g_AmbientScale, g_AmbientBias                         // r0.x = saturate(r9.x * g_AmbientScale + g_AmbientBias)
677 0x00002970:   log r0.x, r0.x
678 0x00002984:   mul r0.x, r0.x, g_AmbientPower
679 0x000029A4:   exp r0.x, r0.x                                                            // r0.x = ambient strength = pow(saturate(r9.x * g_AmbientScale + g_AmbientBias), g_AmbientPower)
680 0x000029B8:   mul r0.xyz, r0.xxxx, g_AmbientColor.xyzx
681 0x000029D8:   mul r0.xyz, r0.xyzx, g_rNumCones.wwww                                     // r0.xyz = ambient term = ambient strength * g_AmbientColor.xyzx / number of cones
682 0x000029F8:   mad r0.xyz, r7.xyzx, l(0.318310, 0.318310, 0.318310, 0.000000), r0.xyzx   // r0.xyz = all indirect light = ambient term + indirect light * 0.318310
683 0x00002A28:   mul r1.xyz, r4.xxxx, r0.xyzx                                              // r1.xyz = all indirect light * direction.x
684 0x00002A44:   mul r2.xyz, r4.yyyy, r0.xyzx                                              // r2.xyz = all indirect light * direction.y = all indirect light * cos(theta)
685 0x00002A60:   mul r0.xyz, r4.zzzz, r0.xyzx                                              // r0.xyz = all indirect light * direction.z
686 0x00002A7C: else 
687 0x00002A80:   mov r1.xyz, l(0,0,0,0)
688 0x00002AA0:   mov r2.xyz, l(0,0,0,0)
689 0x00002AC0:   mov r0.xyz, l(0,0,0,0)
690 0x00002AE0: endif 
691 
692 // write x axis indirect illumination
693 0x00002AE4: mul r0.w, r0.w, g_rNumCones.w
694 0x00002B04: and r1.w, r1.x, l(0x7f800000)
695 0x00002B20: ieq r1.w, r1.w, l(0x7f800000)
696 0x00002B3C: movc o0.x, r1.w, l(0), r1.x
697 0x00002B60: and r1.x, r1.y, l(0x7f800000)
698 0x00002B7C: ieq r1.x, r1.x, l(0x7f800000)
699 0x00002B98: movc o0.y, r1.x, l(0), r1.y
700 0x00002BBC: and r1.x, r1.z, l(0x7f800000)
701 0x00002BD8: ieq r1.x, r1.x, l(0x7f800000)
702 0x00002BF4: movc o0.z, r1.x, l(0), r1.z
703 0x00002C18: and r1.x, r0.w, l(0x7f800000)
704 0x00002C34: ieq r1.x, r1.x, l(0x7f800000)
705 0x00002C50: movc o0.w, r1.x, l(0), r0.w
706 
707 // write y axis indirect illumination
708 0x00002C74: and r0.w, r2.x, l(0x7f800000)
709 0x00002C90: ieq r0.w, r0.w, l(0x7f800000)
710 0x00002CAC: movc o1.x, r0.w, l(0), r2.x
711 0x00002CD0: and r0.w, r2.y, l(0x7f800000)
712 0x00002CEC: ieq r0.w, r0.w, l(0x7f800000)
713 0x00002D08: movc o1.y, r0.w, l(0), r2.y
714 0x00002D2C: and r0.w, r2.z, l(0x7f800000)
715 0x00002D48: ieq r0.w, r0.w, l(0x7f800000)
716 0x00002D64: movc o1.z, r0.w, l(0), r2.z
717 
718 // write z axis indirect illumination
719 0x00002D88: and r0.w, r0.x, l(0x7f800000)
720 0x00002DA4: ieq r0.w, r0.w, l(0x7f800000)
721 0x00002DC0: movc o2.x, r0.w, l(0), r0.x
722 0x00002DE4: and r0.x, r0.y, l(0x7f800000)
723 0x00002E00: ieq r0.x, r0.x, l(0x7f800000)
724 0x00002E1C: movc o2.y, r0.x, l(0), r0.y
725 0x00002E40: and r0.x, r0.z, l(0x7f800000)
726 0x00002E5C: ieq r0.x, r0.x, l(0x7f800000)
727 0x00002E78: movc o2.z, r0.x, l(0), r0.z
728 0x00002E9C: mov o1.w, l(0)
729 0x00002EB0: mov o2.w, l(0)
730 0x00002EC4: ret 
731 // Approximately 399 instruction slots used

 

以上是关于Anatomy of Nvidia's Voxel Cone Tracing Code (VXGI)的主要内容,如果未能解决你的问题,请参考以下文章

Life of a triangle - NVIDIA's logical pipeline

The Anatomy of a Large-Scale Hypertextual Web Search Engine

Anatomy of a Program in Memory.剖析程序的内存布局

翻译Anatomy of a Program in Memory—剖析内存中的一个程序(进程的虚拟存储器映像布局详解)

大规模超文本网络搜索引擎解析 [ The Anatomy of a Large-Scale Hypertextual Web Search Engine ]

CodeForces 1109F. Sasha and Algorithm of Silence's Sounds