Anatomy of Nvidia's Voxel Cone Tracing Code (VXGI)
Posted dydx
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Anatomy of Nvidia's Voxel Cone Tracing Code (VXGI)相关的知识,希望对你有一定的参考价值。
Here is diffuse cone tracing code.
1 // 2 Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.16384 3 Buffer Definitions: 4 cbuffer AbstractTracingCB 5 { 6 struct VxgiAbstractTracingConstants 7 { 8 9 float4 rOpacityTextureSize; // Offset: 0 10 float4 rEmittanceTextureSize; // Offset: 16 11 float4 ClipmapAnchor; // Offset: 32 12 float4 SceneBoundaryLower; // Offset: 48 13 float4 SceneBoundaryUpper; // Offset: 64 14 float4 ClipmapCenter; // Offset: 80 15 float4 TracingToroidalOffset; // Offset: 96 16 float EmittancePackingStride; // Offset: 112 17 float FinestVoxelSize; // Offset: 116 18 float StackTextureSize; // Offset: 120 19 float rNearestLevel0Boundary; // Offset: 124 20 float MaxMipmapLevel; // Offset: 128 21 float rEmittanceStorageScale; // Offset: 132 22 float rClipmapSizeWorld; // Offset: 136 23 uint Use6DOpacity; // Offset: 140 24 } g_VxgiAbstractTracingCB; // Offset: 0 Size: 144 25 } 26 cbuffer TranslationCB 27 { 28 float4 g_VxgiTranslationParameters[13];// Offset: 0 Size: 208 29 float4 g_VxgiTranslationParameters2[13];// Offset: 208 Size: 208 30 } 31 cbuffer cBuiltinTracingParameters 32 { 33 struct GBufferParameters 34 { 35 36 row_major float4x4 viewProjMatrix;// Offset: 0 37 row_major float4x4 viewProjMatrixInv;// Offset: 64 38 row_major float4x4 viewMatrix; // Offset: 128 39 float4 cameraPosition; // Offset: 192 40 float4 uvToView; // Offset: 208 41 float2 gbufferSize; // Offset: 224 42 float2 gbufferSizeInv; // Offset: 232 43 float2 viewportOrigin; // Offset: 240 44 float2 viewportSize; // Offset: 248 45 float2 viewportSizeInv; // Offset: 256 46 float2 firstSamplePosition; // Offset: 264 47 float projectionA; // Offset: 272 48 float projectionB; // Offset: 276 49 float depthScale; // Offset: 280 50 float depthBias; // Offset: 284 51 float normalScale; // Offset: 288 52 float normalBias; // Offset: 292 53 float radiusToScreen; // Offset: 296 54 } g_GBuffer; // Offset: 0 Size: 300 55 56 struct GBufferParameters 57 { 58 59 row_major float4x4 viewProjMatrix;// Offset: 304 60 row_major float4x4 viewProjMatrixInv;// Offset: 368 61 row_major float4x4 viewMatrix; // Offset: 432 62 float4 cameraPosition; // Offset: 496 63 float4 uvToView; // Offset: 512 64 float2 gbufferSize; // Offset: 528 65 float2 gbufferSizeInv; // Offset: 536 66 float2 viewportOrigin; // Offset: 544 67 float2 viewportSize; // Offset: 552 68 float2 viewportSizeInv; // Offset: 560 69 float2 firstSamplePosition; // Offset: 568 70 float projectionA; // Offset: 576 71 float projectionB; // Offset: 580 72 float depthScale; // Offset: 584 73 float depthBias; // Offset: 588 74 float normalScale; // Offset: 592 75 float normalBias; // Offset: 596 76 float radiusToScreen; // Offset: 600 77 } g_PreviousGBuffer; // Offset: 304 Size: 300 [unused] 78 row_major float4x4 g_ReprojectionMatrix;// Offset: 608 Size: 64 [unused] 79 float4 g_AmbientColor; // Offset: 672 Size: 16 80 float4 g_DownsampleScale; // Offset: 688 Size: 16 81 float4 g_DebugParams; // Offset: 704 Size: 16 [unused] 82 float4 g_EnvironmentMapTint; // Offset: 720 Size: 16 83 float4 g_RefinementGridResolution; // Offset: 736 Size: 16 [unused] 84 float4 g_BackgroundColor; // Offset: 752 Size: 16 [unused] 85 int2 g_PixelToSave; // Offset: 768 Size: 8 [unused] 86 int2 g_RandomOffset; // Offset: 776 Size: 8 87 float2 g_GridOrigin; // Offset: 784 Size: 8 [unused] 88 float g_ConeFactor; // Offset: 792 Size: 4 89 float g_TracingStep; // Offset: 796 Size: 4 90 float g_OpacityCorrectionFactor; // Offset: 800 Size: 4 91 int g_MaxSamples; // Offset: 804 Size: 4 92 int g_NumCones; // Offset: 808 Size: 4 [unused] 93 float g_rNumCones; // Offset: 812 Size: 4 94 float g_EmittanceScale; // Offset: 816 Size: 4 95 float g_EnvironmentMapResolution; // Offset: 820 Size: 4 96 float g_MaxEnvironmentMapMipLevel; // Offset: 824 Size: 4 97 float g_NormalOffsetFactor; // Offset: 828 Size: 4 98 float g_AmbientAttenuationFactor; // Offset: 832 Size: 4 99 uint g_FlipOpacityDirections; // Offset: 836 Size: 4 100 float g_InitialOffsetBias; // Offset: 840 Size: 4 101 float g_InitialOffsetDistanceFactor;// Offset: 844 Size: 4 102 uint g_EnableSpecularRandomOffsets;// Offset: 848 Size: 4 [unused] 103 uint g_NumDiscontinuityLevels; // Offset: 852 Size: 4 [unused] 104 float g_TemporalReprojectionWeight;// Offset: 856 Size: 4 [unused] 105 float g_TangentJitterScale; // Offset: 860 Size: 4 [unused] 106 float g_DepthDeltaSign; // Offset: 864 Size: 4 [unused] 107 float g_ReprojectionDepthWeightScale;// Offset: 868 Size: 4 [unused] 108 float g_ReprojectionNormalWeightExponent;// Offset: 872 Size: 4 [unused] 109 float g_InterpolationWeightThreshold;// Offset: 876 Size: 4 [unused] 110 uint g_EnableRefinement; // Offset: 880 Size: 4 [unused] 111 float g_AmbientScale; // Offset: 884 Size: 4 112 float g_AmbientBias; // Offset: 888 Size: 4 113 float g_AmbientPower; // Offset: 892 Size: 4 114 float g_AmbientDistanceDarkening; // Offset: 896 Size: 4 115 int g_AltSettingsStencilMask; // Offset: 900 Size: 4 116 int g_AltSettingsStencilRefValue; // Offset: 904 Size: 4 117 float g_AltInitialOffsetBias; // Offset: 908 Size: 4 118 float g_AltInitialOffsetDistanceFactor;// Offset: 912 Size: 4 119 float g_AltNormalOffsetFactor; // Offset: 916 Size: 4 120 float g_AltTracingStep; // Offset: 920 Size: 4 121 float g_SSAO_SurfaceBias; // Offset: 924 Size: 4 [unused] 122 float g_SSAO_RadiusWorld; // Offset: 928 Size: 4 [unused] 123 float g_SSAO_rBackgroundViewDepth; // Offset: 932 Size: 4 [unused] 124 float g_SSAO_CoarseAO; // Offset: 936 Size: 4 [unused] 125 float g_SSAO_PowerExponent; // Offset: 940 Size: 4 [unused] 126 } 127 Resource Bindings: 128 Name Type Format Dim Slot Elements 129 ------------------------------ ---------- ------- ----------- ---- -------- 130 s_VoxelTextureSampler sampler NA NA 0 1 131 s_EnvironmentMapSampler sampler NA NA 11 1 132 g_DepthBuffer texture float4 2d 0 1 133 g_TargetFlatNormal texture float4 2d 3 1 134 g_TargetStencil texture uint2 2d 4 1 135 t_OpacityMap_Pos texture float4 3d 6 1 136 t_OpacityMap_Neg texture float4 3d 7 1 137 t_ConeDirectionMap texture float4 2darray 10 1 138 t_EnvironmentMap texture float4 cube 11 1 139 t_EmittanceEven texture float4 3d 12 1 140 t_EmittanceOdd texture float4 3d 15 1 141 AbstractTracingCB cbuffer NA NA 0 1 142 TranslationCB cbuffer NA NA 1 1 143 cBuiltinTracingParameters cbuffer NA NA 2 1 144 Input signature: 145 Name Index Mask Register SysValue Format Used 146 -------------------- ----- ------ -------- -------- ------- ------ 147 TEXCOORD 0 xy 0 NONE float 148 INSTANCEID 0 z 0 NONE float z 149 RAY 0 xyzw 1 NONE float 150 SV_Position 0 xyzw 2 POS float xy 151 Output signature: 152 Name Index Mask Register SysValue Format Used 153 -------------------- ----- ------ -------- -------- ------- ------ 154 SV_Target 0 xyzw 0 TARGET float xyzw 155 SV_Target 1 xyzw 1 TARGET float xyzw 156 SV_Target 2 xyzw 2 TARGET float xyzw 157 158 0x00000000: ps_5_0 159 0x00000008: dcl_globalFlags refactoringAllowed 160 0x0000000C: dcl_constantbuffer cb0[9], immediateIndexed 161 0x0000001C: dcl_constantbuffer cb1[26], dynamicIndexed 162 0x0000002C: dcl_constantbuffer cb2[58], immediateIndexed 163 0x0000003C: dcl_sampler s_VoxelTextureSampler, mode_default 164 0x00000048: dcl_sampler s_EnvironmentMapSampler, mode_default 165 0x00000054: dcl_resource_texture2d (float,float,float,float) g_DepthBuffer 166 0x00000064: dcl_resource_texture2d (float,float,float,float) g_TargetFlatNormal 167 0x00000074: dcl_resource_texture2d (uint,uint,uint,uint) g_TargetStencil 168 0x00000084: dcl_resource_texture3d (float,float,float,float) t_OpacityMap_Pos 169 0x00000094: dcl_resource_texture3d (float,float,float,float) t_OpacityMap_Neg 170 0x000000A4: dcl_resource_texture2darray (float,float,float,float) t_ConeDirectionMap 171 0x000000B4: dcl_resource_texturecube (float,float,float,float) t_EnvironmentMap 172 0x000000C4: dcl_resource_texture3d (float,float,float,float) t_EmittanceEven 173 0x000000D4: dcl_resource_texture3d (float,float,float,float) t_EmittanceOdd 174 0x000000E4: dcl_input_ps linear v0.z 175 0x000000F0: dcl_input_ps_siv linear noperspective v2.xy, position 176 0x00000100: dcl_output o0.xyzw 177 0x0000010C: dcl_output o1.xyzw 178 0x00000118: dcl_output o2.xyzw 179 0x00000124: dcl_temps 19 180 181 0x0000012C: lt r0.xy, l(1.000000, 1.000000, 0.000000, 0.000000), g_DownsampleScale.xyxx 182 0x00000158: and r0.x, r0.y, r0.x 183 0x00000174: round_ni r0.yz, v2.xxyx 184 0x00000188: itof r1.xy, g_RandomOffset.zwzz 185 0x000001A0: add r1.xy, r0.yzyy, r1.xyxx 186 0x000001BC: mul r1.xy, r1.xyxx, g_DownsampleScale.zwzz 187 0x000001DC: frc r1.xy, r1.xyxx 188 0x000001F0: mul r1.xy, r1.xyxx, g_DownsampleScale.xyxx 189 0x00000210: round_ni r1.xy, r1.xyxx 190 0x00000224: add r0.w, -r1.x, g_DownsampleScale.x 191 0x00000248: mad r0.w, r0.z, g_DownsampleScale.y, r0.w 192 0x00000270: mad r1.x, r0.y, g_DownsampleScale.x, r1.y 193 0x00000298: add r0.w, r0.w, firstSamplePosition.w 194 0x000002B8: add r1.x, r1.x, firstSamplePosition.z 195 0x000002D8: add r1.y, r0.w, l(-1.000000) 196 0x000002F4: movc r0.xw, r0.xxxx, r1.xxxy, v2.xxxy 197 0x00000318: ftoi r1.xy, r0.xwxx 198 0x0000032C: mov r1.zw, l(0,0,0,0) 199 200 // load depth and normal 201 // r2 = {depth, normal.x, normal.y, normal.z} 202 0x0000034C: ld_indexable(texture2d)(float,float,float,float) r2.x, r1.xyww, g_DepthBuffer.xyzw 203 0x00000370: ld_indexable(texture2d)(float,float,float,float) r2.yzw, r1.xyww, g_TargetFlatNormal.wxyz 204 0x00000394: mad r2.x, r2.x, depthScale, depthBias 205 0x000003C0: mad r2.yzw, r2.yyzw, normalScale.xxxx, normalBias.yyyy 206 0x000003EC: dp3 r3.x, r2.yzwy, r2.yzwy 207 0x00000408: sqrt r3.x, r3.x 208 0x0000041C: div r2.yzw, r2.yyzw, r3.xxxx // normalize 209 0x00000438: ge r3.x, l(0.000000), r3.x 210 0x00000454: movc r2.yzw, r3.xxxx, l(0,0,0,0), r2.yyzw 211 212 // convert screen position to world space 213 // r3.xyz = world space position 214 // r0.xw = {screenX, screenY} [-1,1] 215 0x00000484: add r0.xw, r0.xxxw, -viewportOrigin.xxxy 216 0x000004A8: mul r0.xw, r0.xxxw, viewportSizeInv.xxxy 217 0x000004C8: mad r0.x, r0.x, l(2.000000), l(-1.000000) 218 0x000004EC: mad r0.w, -r0.w, l(2.000000), l(1.000000) 219 0x00000514: mul r3.xyzw, r0.wwww, viewProjMatrixInv.xyzw 220 0x00000534: mad r3.xyzw, r0.xxxx, viewProjMatrixInv.xyzw, r3.xyzw 221 0x0000055C: mad r3.xyzw, r2.xxxx, viewProjMatrixInv.xyzw, r3.xyzw 222 0x00000584: add r3.xyzw, r3.xyzw, viewProjMatrixInv.xyzw 223 0x000005A4: div r3.xyz, r3.xyzx, r3.wwww 224 225 // calculate distance from clipmap center 226 0x000005C0: add r4.xyz, r3.xyzx, -ClipmapAnchor.xyzx 227 0x000005E4: max r0.x, |r4.z|, |r4.y| 228 0x00000608: max r0.x, r0.x, |r4.x| // r0.x = max distance from clipmap center 229 0x00000628: add r0.w, -r0.x, ClipmapAnchor.w 230 0x0000064C: mul r0.w, r0.w, rClipmapSizeWorld.z // r0.w = (ClipmapAnchor.w - r0.x) * rClipmapSizeWorld 231 0x0000066C: mul_sat r0.w, r0.w, l(4.000000) // r0.w = saturate(4 * (ClipmapAnchor.w - r0.x) * rClipmapSizeWorld) 232 // r0.x = max distance 233 // r0.w = saturate(4 * (ClipmapAnchor.w - r0.x) * rClipmapSizeWorld) 234 // r4 = offset from clipmap center 235 236 // check if normal and depth is valid 237 // r2.x = is valid 238 0x00000688: and r4.xyz, r2.yzwy, l(0x7f800000, 0x7f800000, 0x7f800000, 0) // fetch exponent part 239 0x000006B0: ieq r5.xyz, r4.xyzx, l(0, 0, 0, 0) 240 0x000006D8: and r2.x, r5.y, r5.x 241 0x000006F4: and r2.x, r5.z, r2.x // check if all exponent part is 0 242 0x00000710: ieq r4.xyz, r4.xyzx, l(0x7f800000, 0x7f800000, 0x7f800000, 0) 243 0x00000738: or r3.w, r4.y, r4.x 244 0x00000754: or r3.w, r4.z, r3.w 245 0x00000770: or r2.x, r2.x, r3.w 246 0x0000078C: not r2.x, r2.x 247 0x000007A0: ne r3.w, r0.w, l(0.000000) 248 0x000007BC: and r2.x, r2.x, r3.w 249 250 // calculate indirect diffuse light 251 0x000007D8: if_nz r2.x 252 // normalize normal 253 0x000007E4: ftou r4.z, v0.z // r4.z = instance id 254 0x000007F8: dp3 r2.x, r2.yzwy, r2.yzwy // r2.x = dot(normal, normal) 255 0x00000814: rsq r2.x, r2.x // r2.x = 1 / length(normal) 256 0x00000828: mul r5.xyz, r2.xxxx, r2.yzwy // r5.xyz = normalize(normal) 257 258 // calculate local coordinate system 259 // r5 = normal 260 // r6 = tangent 261 // r7 = bitangent 262 0x00000844: mov r6.xyz, |r5.xyzx| // r6.xyz = |normalize(normal)| 263 0x0000085C: max r3.w, r6.z, r6.y 264 0x00000878: max r3.w, r3.w, r6.x // r3.w = max3(normalize(normal)) 265 0x00000894: mad r7.xyz, -r2.zyyz, r2.xxxx, -r5.zzyz // r7.xyz = -r2.zyy / |normal| - r5.zzy 266 0x000008C0: lt r8.xyz, l(0.000000, 0.000000, 0.000000, 0.000000), r5.xyzx 267 0x000008E8: lt r9.xyz, r5.xyzx, l(0.000000, 0.000000, 0.000000, 0.000000) 268 0x00000910: iadd r8.xyz, -r8.xyzx, r9.xyzx 269 0x00000930: itof r8.xyz, r8.xyzx 270 0x00000944: eq r9.xy, r6.xyxx, r3.wwww 271 0x00000960: mul r7.xyz, r7.xyzx, r8.xyzx 272 0x0000097C: mov r6.w, r7.y 273 0x00000990: mov r7.w, r6.z 274 0x000009A4: movc r6.yzw, r9.yyyy, r6.yywy, r7.wwwz 275 0x000009C8: mov r7.yz, r6.xxxx 276 0x000009DC: movc r6.xyz, r9.xxxx, r7.xyzx, r6.yzwy 277 0x00000A00: dp3 r3.w, r6.xyzx, r6.xyzx 278 0x00000A1C: rsq r3.w, r3.w 279 0x00000A30: mul r6.xyz, r3.wwww, r6.xyzx // r6.xyz = normalize(r6.xyz) 280 0x00000A4C: mul r7.xyz, r5.yzxy, r6.zxyz 281 0x00000A68: mad r7.xyz, r6.yzxy, r5.zxyz, -r7.xyzx // r7.xyz = cross(r6.xyz, r5.xyz) 282 0x00000A90: ftoi r0.yz, r0.yyzy 283 0x00000AA4: iadd r0.yz, r0.yyzy, g_RandomOffset.zzwz 284 0x00000AC4: and r4.xy, r0.yzyy, l(3, 3, 0, 0) // r4.xy is useless because color is the same 285 0x00000AEC: mov r4.w, l(0) // r4.w = 0 286 287 // get cone direction from texture (what a stupid way!) 288 // r4.xyz = normalized cone direction in world space 289 0x00000B00: ld_indexable(texture2darray)(float,float,float,float) r4.xyzw, r4.xyzw, t_ConeDirectionMap.xyzw // r4.z = instance id = cone index, r4.xy is useless 290 0x00000B24: mul r5.xyz, r5.xyzx, r4.yyyy // r5.xyz = r5.xyz * r4.y 291 0x00000B40: mad r5.xyz, r4.xxxx, r6.xyzx, r5.xyzx // r5.xyz = r4.x * r6.xyz + r5.xyz * r4.y 292 0x00000B64: mad r4.xyz, r4.zzzz, r7.xyzx, r5.xyzx // r4.xyz = r4.x * r6.xyz + r4.y * r5.xyz + r4.z * r7.xyz 293 0x00000B88: dp3 r0.y, r4.xyzx, r4.xyzx 294 0x00000BA4: rsq r0.y, r0.y 295 0x00000BB8: mul r4.xyz, r0.yyyy, r4.xyzx // r4.xyz = normalized cone direction 296 0x00000BD4: dp2 r0.x, rNearestLevel0Boundary.wwww, r0.xxxx // r0.x = 0, rNearestLevel0Boundary = 0 297 0x00000BF4: max r0.x, r0.x, l(1.000000) // r0.x = 1 298 0x00000C10: mul r0.y, r4.w, r0.x // r0.y = 0 299 300 // set cone tracing parameter 301 // r1 = {g_InitialOffsetBias, g_InitialOffsetDistanceFactor, g_NormalOffsetFactor, g_TracingStep} 302 // = {2, 1, 0.5, 0.5} 303 0x00000C2C: if_nz g_AltSettingsStencilMask 304 0x00000C3C: ld_indexable(texture2d)(uint,uint,uint,uint) r0.z, r1.xyzw, g_TargetStencil.xzyw 305 0x00000C60: and r0.z, r0.z, g_AltSettingsStencilMask 306 0x00000C80: ieq r0.z, r0.z, g_AltSettingsStencilRefValue 307 0x00000CA0: mov r1.xy, (g_InitialOffsetBias,g_InitialOffsetDistanceFactor,g_InitialOffsetBias,g_InitialOffsetBias) 308 0x00000CB8: mov r1.z, g_NormalOffsetFactor 309 0x00000CD0: mov r1.w, g_TracingStep 310 0x00000CE8: movc r1.x, r0.z, g_AltInitialOffsetBias, r1.x 311 0x00000D10: movc r1.yzw, r0.zzzz, (g_AltInitialOffsetDistanceFactor,g_AltInitialOffsetDistanceFactor,g_AltNormalOffsetFactor,g_AltTracingStep), r1.yyzw 312 0x00000D38: else 313 0x00000D3C: mov r1.xy, (g_InitialOffsetBias,g_InitialOffsetDistanceFactor,g_InitialOffsetBias,g_InitialOffsetBias) 314 0x00000D54: mov r1.z, g_NormalOffsetFactor 315 0x00000D6C: mov r1.w, g_TracingStep 316 0x00000D84: endif 317 318 // compute initial offset 319 // g_AmbientAttenuationFactor = 0.04 320 // g_InitialOffsetBias = 2 321 // g_InitialOffsetDistanceFactor = 1 322 // g_TracingStep = 0.5 323 // g_NormalOffsetFactor = 0.5 324 // r0.xy = {3,0} 325 0x00000D88: log r0.z, r0.x // r0.x = 1 326 0x00000D9C: mul r0.z, r0.z, g_AmbientDistanceDarkening 327 0x00000DBC: exp r0.z, r0.z 328 0x00000DD0: mul r0.z, r0.z, g_AmbientAttenuationFactor // r0.z = g_AmbientAttenuationFactor * r0.x ^ g_AmbientDistanceDarkening = 0.04 * r0.x ^ -0.25 = 0.04 329 0x00000DF0: itof r3.w, g_MaxSamples // g_MaxSamples = 128 330 0x00000E08: mad r0.x, r0.x, r1.y, r1.x // r0.x = r0.x * g_InitialOffsetDistanceFactor + g_InitialOffsetBias = 3 331 0x00000E2C: mad r0.x, r0.y, g_TracingStep, r0.x // r0.x = r0.y * g_TracingStep + r0.x = 3 332 // r0.z = AO scale 333 334 // offset 3*FinestVoxelSize from surface point 335 0x00000E54: mad r2.xyz, r2.yzwy, r2.xxxx, -r4.xyzx // r2.xyz = normalize(normal) - r4.xyz 336 0x00000E7C: mad r1.xyz, r1.zzzz, r2.xyzx, r4.xyzx // r1.xyz = g_NormalOffsetFactor * r2.xyz + r4.xyz = 0.5 * (normal + direction) 337 0x00000EA0: dp3 r0.y, r1.xyzx, r1.xyzx 338 0x00000EBC: rsq r0.y, r0.y 339 0x00000ED0: mul r1.xyz, r0.yyyy, r1.xyzx // r1.xyz = normalize(r1.xyz) 340 0x00000EEC: mul r0.y, r0.x, FinestVoxelSize.y // r0.y = r0.x * FinestVoxelSize = 3 * 8 = 24 341 0x00000F0C: mad r1.xyz, r1.xyzx, r0.yyyy, r3.xyzx // r1.xyz = r1.xyz * 24 + world position 342 0x00000F30: mul r0.y, rEmittanceStorageScale.y, g_EmittanceScale // r0.y = 1.0 * 0.52 343 // r1.xyz = start trace position = position + 3 * FinestVoxelSize * 0.5 * (normal + direction) 344 345 // set cone tracing step 346 // g_ConeFactor = 2 * sin a 347 // d‘ = d * (2 + 2*sin a) / (2 - 2*sin a) = d * (2 + g_ConeFactor) / (2 - g_ConeFactor) 348 0x00000F54: add r2.x, -g_ConeFactor, l(2.000000) // r2.x = 2 - g_ConeFactor = 2 - 0.87 = 1.13 349 0x00000F78: add r2.y, g_ConeFactor, l(2.000000) // r2.y = 2 + g_ConeFactor = 2.87 350 0x00000F98: div r2.y, r2.y, r2.x 351 0x00000FB4: add r2.y, r2.y, l(-1.000000) // r2.y = (2 + g_ConeFactor) / (2 - g_ConeFactor) -1 = 1.53 352 0x00000FD0: add r2.z, MaxMipmapLevel.x, l(1.000000) // r2.z = MaxMipmapLevel + 1 = 10 353 0x00000FF0: mul r2.w, FinestVoxelSize.y, FinestVoxelSize.y // r2.w = FinestVoxelSize * FinestVoxelSize = 64 354 // r2 = {2 - g_ConeFactor, step factor, MaxMipmapLevel + 1, FinestVoxelSize * FinestVoxelSize} 355 // = {1.13, 1.53, 10, 64} 356 357 // initialize indirect color data 358 0x00001014: movc r3.xyz, g_FlipOpacityDirections.yyyy, -r4.xyzx, r4.xyzx // r3.xyz = cone tracing direction 359 0x00001040: lt r5.xyz, l(0.000000, 0.000000, 0.000000, 0.000000), r4.xyzx 360 0x00001068: and r4.w, r5.x, l(0x40400000) 361 0x00001084: movc r5.xy, r5.yzyy, l(4.000000,5.000000,0,0), l(1.000000,2.000000,0,0) 362 0x000010C0: mul r6.x, r4.w, EmittancePackingStride.x 363 0x000010E0: mul r5.xy, r5.xyxx, EmittancePackingStride.xxxx 364 0x00001100: mov r6.yz, l(0,0,0,0) 365 0x00001120: mov r5.zw, l(0,0,0,0) 366 0x00001140: mov r8.xyz, r1.xyzx // r8.xyz = start trace position in world space 367 0x00001154: mov r4.w, Use6DOpacity.w 368 0x0000116C: mov r6.w, l(1.000000) 369 0x00001180: mov r7.xyzw, l(0,0,0,1.000000) 370 0x000011A0: mov r8.w, l(1.000000) 371 0x000011B4: mov r9.z, r0.x // r9.z = 3, distance in voxel count 372 0x000011C8: mov r9.xyw, l(0,1.000000,0,0) 373 // r6.w = 1 = latest transparency 374 // r7 = {0,0,0,1} = {indirect color.rgb, last step transparency} 375 // r8.w = 1 = last two step transparency 376 // r9 = {0,1,3,0} 377 378 // cone trace loop 379 // r8.xyz = sample position in world space 380 0x000011E8: loop 381 0x000011EC: ge r10.x, r9.w, r3.w // if(iteration >= 128) break; 382 0x00001208: breakc_nz r10.x 383 384 0x00001214: add r10.xyz, r8.xyzx, -ClipmapAnchor.xyzx // r10.xyz = offset from clipmap center 385 0x00001238: max r10.y, |r10.z|, |r10.y| 386 0x0000125C: max r10.x, r10.y, |r10.x| // r10.x = distance from clipmap center = max(r10.xyz) 387 0x0000127C: dp2 r10.y, rNearestLevel0Boundary.wwww, r10.xxxx // rNearestLevel0Boundary = 0 388 0x0000129C: max r10.y, r10.y, l(1.000000) // r10.y = 1 389 // r10.y = 1 390 391 // calculate sample level 392 // r9.z = distance measured by voxel count 393 // r10.y = 1 394 // r11.x = level 395 0x000012B8: mul r10.z, r9.z, g_ConeFactor // r10.z = r9.z * g_ConeFactor = r9.z * 0.87 396 0x000012D8: lt r10.w, r10.z, r10.y // if (r10.z < 1) 397 0x000012F4: movc r10.z, r10.w, r10.y, r10.z // r10.z = 1 398 0x00001318: log r11.x, r10.z // r11.x = level 399 0x0000132C: ge r11.y, r11.x, r2.z // if(level > MaxMipmapLevel + 1) 400 0x00001348: if_nz r11.y // break 401 0x00001354: break 402 0x00001358: endif 403 // r10.z = diameter measured by voxel count 404 // r10.w = is diameter less that 1 voxel 405 406 // r10.x = max axis distance from clipmap center - distance from ray origin 407 0x0000135C: add r10.x, -r10.x, ClipmapAnchor.w // r10.x = 8128 - manhattan distance 408 0x00001380: mul r11.y, r10.z, FinestVoxelSize.y // r11.y = diameter in world coordinate 409 0x000013A0: mad r10.x, -r10.z, FinestVoxelSize.y, r10.x // r10.x = max axis distance from clipmap center - diameter 410 0x000013CC: lt r11.z, r10.x, l(0.000000) // if (max axis distance < diameter) 411 0x000013E8: if_nz r11.z // break 412 0x000013F4: break 413 0x000013F8: endif 414 415 // check if sample position is out of scene bounding box 416 0x000013FC: mad r12.xyz, r10.zzzz, FinestVoxelSize.yyyy, r8.xyzx // r12.xyz = sample position + diameter 417 0x00001424: lt r12.xyz, r12.xyzx, SceneBoundaryLower.xyzx 418 0x00001444: or r11.z, r12.y, r12.x 419 0x00001460: or r11.z, r12.z, r11.z 420 0x0000147C: mad r12.xyz, -r10.zzzz, FinestVoxelSize.yyyy, r8.xyzx 421 0x000014A8: lt r12.xyz, SceneBoundaryUpper.xyzx, r12.xyzx 422 0x000014C8: or r11.w, r12.y, r12.x 423 0x000014E4: or r11.w, r12.z, r11.w 424 0x00001500: or r11.z, r11.w, r11.z 425 0x0000151C: if_nz r11.z // if out of box, break 426 0x00001528: break 427 0x0000152C: endif 428 429 // calculate weight for lower and higher levels 430 // r6.w = transparency for environment map 431 // r9.z = distance measured by voxel count 432 // r10.x = manhattan distance from clipmap center - distance from ray origin 433 // r11.x = level 434 // r9.x = old AO 435 // r10.w = is diameter less that one voxel 436 0x00001530: mad r11.z, r9.z, l(2.000000), r10.y // r11.z = distance in voxels * 2 + 1 437 0x00001554: div r11.z, r11.z, r2.x // r11.z /= 2 - g_ConeFactor 438 0x00001570: add r10.y, r9.z, r10.y // r10.y = r9.z + 1 439 0x0000158C: max r10.y, r10.y, r11.z // r10.y = max(distance + 1, (distance * 2 + 1)/(2 - g_ConeFactor)) 440 0x000015A8: add r10.y, -r9.z, r10.y // r10.y = max(distance + 1, (distance * 2 + 1)/(2 - g_ConeFactor)) - distance 441 0x000015C8: mul r11.z, r2.y, r9.z // r11.z = step in voxels = distance * step factor 442 0x000015E4: movc r10.y, r10.w, r10.y, r11.z // r10.y = is diameter less that one voxel ? r10.y : original step in voxels 443 0x00001608: mul r10.w, r1.w, r10.y // r10.w = g_TracingStep * original step in voxels = step in voxels 444 0x00001624: mul r11.z, r0.z, -r9.z // r11.z = 0.04 * -distance measured by voxel count 445 0x00001644: mul r11.z, r11.z, l(1.442695) // r11.z = 0.04 * -distance measured by voxel count * 1.442695 446 0x00001660: exp r11.z, r11.z // r11.z = new weight = 2 ^ (0.04 * -distance measured by voxel count * 1.442695) 447 0x00001674: add r11.w, r9.y, -r11.z // r11.w = delta weight = old weight - new weight = r9.y - 2 ^ (0.04 * -distance measured by voxel count * 1.442695) 448 0x00001694: mad r11.w, r11.w, r6.w, r9.x // r11.w = AO = delta weight * transparency for env map + old AO 449 0x000016B8: div_sat r10.x, r10.x, r11.y // r11.y = distance in world coordinate 450 0x000016D4: round_ni r12.x, r11.x // r12.x = floor(level) 451 0x000016E8: add r12.y, r11.x, -r12.x // r12.y = level - floor(level) = weight for upper level 452 0x00001708: add r12.z, -r12.y, l(1.000000) // r12.z = 1 - r12.y = weight for lower level 453 0x00001728: mul r12.z, r10.x, r12.z // r12.z = weight for lower level * r10.x 454 0x00001744: lt r11.x, MaxMipmapLevel.x, r11.x // r11.x = MaxMipmapLevel < level 455 0x00001764: mul r10.x, r10.x, r12.y // r10.x = weight for upper level 456 0x00001780: movc r10.x, r11.x, l(0), r10.x // r10.x = (level > MaxMipmapLevel) ? 0 : r10.x 457 0x000017A4: ftoi r11.x, r12.x // r11.x = int(floor(level)) 458 // r10.y = original step in voxels 459 // r10.w = step in voxels 460 // = g_TracingStep * (max(distance + 1, (distance * 2 + 1)/(2 - g_ConeFactor)) - distance), if diameter less that one voxel 461 // = g_TracingStep * ((2 + g_ConeFactor) / (2 - g_ConeFactor)-1) * distance 462 // r11.z = new weight = 2 ^ (0.04 * -distance measured by voxel count * 1.442695) 463 // r11.w = AO 464 // = old AO + (old weight - new weight) * transparency for env map 465 // = old AO + (old weight - new weight) * transparency for env map 466 467 // calculate clipmap address 468 // r11.x = lower level 469 // r8.xyz = sample position 470 0x000017B8: add r13.xyz, r8.xyzx, -ClipmapCenter.xyzx // r13.xyz = sample position - clipmap center 471 0x000017DC: mad r14.xyz, r13.xyzx, g_VxgiTranslationParameters[r11.x].xxxx, l(0.500000, 0.500000, 0.500000, 0.000000) // r14.xyz = 0.5 472 0x00001814: add r14.xyz, r14.xyzx, g_VxgiTranslationParameters2[r11.x].xyzx // r14.xyz = r14.xyz + g_VxgiTranslationParameters2[level] 473 0x0000183C: frc r14.xyz, r14.xyzx 474 0x00001850: mul r15.xy, r14.xyxx, g_VxgiTranslationParameters[r11.x].yyyy // r15.xy = r14.xy * clip map resolution 475 0x00001874: mad r15.z, r14.z, g_VxgiTranslationParameters[r11.x].y, g_VxgiTranslationParameters[r11.x].z // r15.z = r14.z * clip map resolution + clip map offset 476 0x000018A8: mul r15.xyz, r15.xyzx, rOpacityTextureSize.xyzx // r15.xyz = texture coordinate 477 0x000018C8: mad r16.xy, r14.xyxx, g_VxgiTranslationParameters[r11.x].yyyy, l(1.000000, 0.000000, 0.000000, 0.000000) 478 0x00001900: mad r16.z, r14.z, g_VxgiTranslationParameters[r11.x].y, g_VxgiTranslationParameters[r11.x].w 479 0x00001934: mul r14.xyz, r16.xyzx, rEmittanceTextureSize.xyzx 480 0x00001954: add r12.y, r12.x, l(1.000000) 481 0x00001970: ftoi r12.y, r12.y 482 483 0x00001984: mad r13.xyz, r13.xyzx, g_VxgiTranslationParameters[r12.y].xxxx, l(0.500000, 0.500000, 0.500000, 0.000000) 484 0x000019BC: add r13.xyz, r13.xyzx, g_VxgiTranslationParameters2[r12.y].xyzx 485 0x000019E4: frc r13.xyz, r13.xyzx 486 0x000019F8: mul r16.xy, r13.xyxx, g_VxgiTranslationParameters[r12.y].yyyy 487 0x00001A1C: mad r16.z, r13.z, g_VxgiTranslationParameters[r12.y].y, g_VxgiTranslationParameters[r12.y].z 488 0x00001A50: mul r16.xyz, r16.xyzx, rOpacityTextureSize.xyzx 489 0x00001A70: mad r17.xy, r13.xyxx, g_VxgiTranslationParameters[r12.y].yyyy, l(1.000000, 0.000000, 0.000000, 0.000000) 490 0x00001AA8: mad r17.z, r13.z, g_VxgiTranslationParameters[r12.y].y, g_VxgiTranslationParameters[r12.y].w 491 0x00001ADC: mul r13.xyz, r17.xyzx, rEmittanceTextureSize.xyzx 492 493 // sample lower opacity clipmap 494 // r3.xyz = cone tracing direction 495 // r12.y = low opacity 496 // r15.y = is occupied 497 0x00001AFC: sample_l_indexable(texture3d)(float,float,float,float) r17.xyzw, r15.xyzx, t_OpacityMap_Pos.xyzw, s_VoxelTextureSampler, l(0.000000) 498 0x00001B30: if_nz r4.w // if(Use6DOpacity) 499 0x00001B3C: sample_l_indexable(texture3d)(float,float,float,float) r15.xyz, r15.xyzx, t_OpacityMap_Neg.xyzw, s_VoxelTextureSampler, l(0.000000) 500 0x00001B70: mul_sat r18.xyz, -r3.xyzx, r17.xyzx // r18.xyz = -1 * cone direction.xyz * opacity_pos.xyz 501 0x00001B90: add r12.y, r18.y, r18.x 502 0x00001BAC: add r12.y, r18.z, r12.y // r12.y = r18.x + r18.y + r18.z 503 0x00001BC8: mul_sat r15.xyz, r3.xyzx, r15.xyzx // r15.xyz = cone direction.xyz * opacity_neg.xyz 504 0x00001BE4: add r12.w, r15.y, r15.x 505 0x00001C00: add r12.w, r15.z, r12.w // r12.w = r15.x + r15.y + r15.z 506 0x00001C1C: min r12.yw, r12.yyyw, l(0.000000, 1.000000, 0.000000, 1.000000) // r12.yw = min(r12.yw, 1) 507 0x00001C44: add r12.y, r12.w, r12.y 508 0x00001C60: else 509 0x00001C64: mul_sat r15.xyz, |r4.xyzx|, r17.xyzx 510 0x00001C84: add r12.w, r15.y, r15.x 511 0x00001CA0: add r12.w, r15.z, r12.w // r12.w = dot(|direction.xyz|, opacity.xyz) 512 0x00001CBC: min r12.y, r12.w, l(1.000000) // r12.y = lower opacity 513 0x00001CD8: endif 514 0x00001CDC: ne r15.y, r17.w, l(0.000000) // r15.y = if lower voxel is occupied 515 516 // sample higher opacity clipmap 517 // r12.w = high opacity 518 // r15.x = is occupied 519 0x00001CF8: sample_l_indexable(texture3d)(float,float,float,float) r17.xyzw, r16.xyzx, t_OpacityMap_Pos.xyzw, s_VoxelTextureSampler, l(0.000000) 520 0x00001D2C: if_nz r4.w // if(Use6DOpacity) 521 0x00001D38: sample_l_indexable(texture3d)(float,float,float,float) r16.xyz, r16.xyzx, t_OpacityMap_Neg.xyzw, s_VoxelTextureSampler, l(0.000000) 522 0x00001D6C: mul_sat r18.xyz, -r3.xyzx, r17.xyzx 523 0x00001D8C: add r12.w, r18.y, r18.x 524 0x00001DA8: add r12.w, r18.z, r12.w 525 0x00001DC4: min r12.w, r12.w, l(1.000000) 526 0x00001DE0: mul_sat r16.xyz, r3.xyzx, r16.xyzx 527 0x00001DFC: add r13.w, r16.y, r16.x 528 0x00001E18: add r13.w, r16.z, r13.w 529 0x00001E34: min r13.w, r13.w, l(1.000000) 530 0x00001E50: add r12.w, r12.w, r13.w 531 0x00001E6C: else 532 0x00001E70: mul_sat r16.xyz, |r4.xyzx|, r17.xyzx 533 0x00001E90: add r13.w, r16.y, r16.x 534 0x00001EAC: add r13.w, r16.z, r13.w 535 0x00001EC8: min r12.w, r13.w, l(1.000000) // r12.w = higher opacity 536 0x00001EE4: endif 537 0x00001EE8: ne r15.x, r17.w, l(0.000000) // r15.x = if higher voxel is occupied 538 539 // interpolate opacity 540 // r12.x = floor(level) 541 // r12.y = low opacity 542 // r12.z = low weight 543 // r12.w = high opacity 544 // r10.x = high weight 545 0x00001F04: mul r12.w, r10.x, r12.w // r12.w = higher opacity * weight 546 0x00001F20: mad_sat r12.y, r12.y, r12.z, r12.w // r12.y = interpolated opacity 547 0x00001F44: add r12.x, r12.x, r12.x // r12.x = floor(level) * 2 548 0x00001F60: exp r12.x, r12.x // r12.x = 2 ^ (floor(level) * 2) 549 0x00001F74: mul r12.x, r2.w, r12.x // r12.x = r12.x * FinestVoxelSize * FinestVoxelSize = r12.x * 64 550 0x00001F90: mul r10.x, r10.x, r12.x // r10.x = 2 ^ (floor(level) * 2) * 64 * high weight 551 0x00001FAC: mul r15.w, r12.z, r12.x // r15.w = 2 ^ (floor(level) * 2) * 64 * low weight 552 0x00001FC8: mul r15.z, r10.x, l(4.000000) // r15.z = 2 ^ (floor(level) * 2) * 64 * high weight * 4 553 0x00001FE4: and r10.x, r11.x, l(1) // r10.x = is lower level odd ? 554 0x00002000: movc r15.xyzw, r10.xxxx, r15.xyzw, r15.yxwz // r15.xyzw = is odd ? r15.xyzw : r15.yxwz 555 // r15 = {is even occupied, is odd occupied, even weight, odd weight} 556 557 // sample even color clipmap 558 0x00002024: if_nz r15.x 559 0x00002030: movc r12.xzw, r10.xxxx, r13.xxyz, r14.xxyz 560 0x00002054: add r16.xyz, r6.xyyx, r12.xzwx 561 0x00002070: add r17.xyz, r5.xzzx, r12.xzwx 562 0x0000208C: add r12.xzw, r5.yyzz, r12.xxzw 563 0x000020A8: sample_l_indexable(texture3d)(float,float,float,float) r16.xyz, r16.xyzx, t_EmittanceEven.xyzw, s_VoxelTextureSampler, l(0.000000) 564 0x000020DC: sample_l_indexable(texture3d)(float,float,float,float) r17.xyz, r17.xyzx, t_EmittanceEven.xyzw, s_VoxelTextureSampler, l(0.000000) 565 0x00002110: sample_l_indexable(texture3d)(float,float,float,float) r12.xzw, r12.xzwx, t_EmittanceEven.xwyz, s_VoxelTextureSampler, l(0.000000) 566 0x00002144: mul r17.xyz, |r4.yyyy|, r17.xyzx 567 0x00002164: mad r16.xyz, |r4.xxxx|, r16.xyzx, r17.xyzx 568 0x0000218C: mad r12.xzw, |r4.zzzz|, r12.xxzw, r16.xxyz // r12.xzw = interpolated color in even texture 569 0x000021B4: mul r12.xzw, r15.zzzz, r12.xxzw // r12.xzw = even color 570 0x000021D0: else 571 0x000021D4: mov r12.xzw, l(0,0,0,0) 572 0x000021F4: endif 573 574 // sample odd color clipmap 575 0x000021F8: lt r11.x, l(0.000000), r15.w 576 0x00002214: and r11.x, r11.x, r15.y 577 0x00002230: if_nz r11.x // if (is odd occupied && odd weight > 0) 578 0x0000223C: movc r13.xyz, r10.xxxx, r14.xyzx, r13.xyzx 579 0x00002260: add r14.xyz, r6.xzzx, r13.xyzx 580 0x0000227C: add r16.xyz, r5.xwwx, r13.xyzx 581 0x00002298: add r13.xyz, r5.ywwy, r13.xyzx 582 0x000022B4: sample_l_indexable(texture3d)(float,float,float,float) r14.xyz, r14.xyzx, t_EmittanceOdd.xyzw, s_VoxelTextureSampler, l(0.000000) 583 0x000022E8: sample_l_indexable(texture3d)(float,float,float,float) r16.xyz, r16.xyzx, t_EmittanceOdd.xyzw, s_VoxelTextureSampler, l(0.000000) 584 0x0000231C: sample_l_indexable(texture3d)(float,float,float,float) r13.xyz, r13.xyzx, t_EmittanceOdd.xyzw, s_VoxelTextureSampler, l(0.000000) 585 0x00002350: mul r16.xyz, |r4.yyyy|, r16.xyzx 586 0x00002370: mad r14.xyz, |r4.xxxx|, r14.xyzx, r16.xyzx 587 0x00002398: mad r13.xyz, |r4.zzzz|, r13.xyzx, r14.xyzx 588 0x000023C0: mad r12.xzw, r13.xxyz, r15.wwww, r12.xxzw // r12.xzw = final interpolated color 589 0x000023E4: mov r15.x, l(-1) 590 0x000023F8: endif 591 // r12.xzw = final interpolated color 592 593 // Add color contribution and update transparency for environment map 594 // r12.y = intepolated opacity 595 // r10.w = step in voxels 596 // r10.z = diameter 597 // r11.y = distance in world coordinate 598 // r6.w = old transparency 599 0x000023FC: mul r13.xyz, r0.yyyy, r12.xzwx // r13.xyz = scaled color = rEmittanceStorageScale.y * g_EmittanceScale * color = 0.52 * color 600 0x00002418: div r10.x, g_ConeFactor, r11.y // r10.x = g_ConeFactor / distance 601 0x00002438: mul r10.x, r10.x, r10.x // r10.x = (g_ConeFactor / distance) ^ 2 602 0x00002454: mul r13.xyz, r10.xxxx, r13.xyzx // color.xyz = (g_ConeFactor / distance) ^ 2 * color.xyz 603 0x00002470: movc r12.xzw, r15.xxxx, r13.xxyz, r12.xxzw // r12.xzw = color.xyz * -1 + final color 604 0x00002494: lt r10.x, l(0.000000), r12.y // r10.x = opacity > 0 605 0x000024B0: add r11.x, -r12.y, l(1.000000) // r11.x = 1 - opacity 606 0x000024D0: mul r11.y, r10.w, g_OpacityCorrectionFactor // r11.y = step in voxels * g_OpacityCorrectionFactor 607 0x000024F0: div r10.z, r11.y, r10.z // r10.z = factor = step in voxels * g_OpacityCorrectionFactor / diameter 608 0x0000250C: log r11.x, r11.x // r11.x = log2(1 - opacity) 609 0x00002520: mul r10.z, r10.z, r11.x // r10.z = r10.z * log2(1 - opacity) 610 0x0000253C: exp r10.z, r10.z // r10.z = 2 ^ (r10.z) = 2 ^ (factor * log2(1 - opacity)) = (1 - opacity) ^ factor 611 0x00002550: add r10.z, -r10.z, l(1.000000) // r10.z = 1 - (1 - opacity) ^ factor 612 0x00002570: and r10.x, r10.z, r10.x // r10.x = opacity > 0 ? 1 - (1 - opacity) ^ factor : 0 613 0x0000258C: mad r12.xyz, r8.wwww, r12.xzwx, r7.xyzx // r12.xyz = new indirect color = 1 * (color.xyz * -1 + final color) + indirect color 614 0x000025B0: add r10.x, -r10.x, l(1.000000) // r10.x = step transparency = (1 - opacity) ^ factor 615 0x000025D0: mul r10.x, r6.w, r10.x // r10.x = r6.w * r10.x = transparency for environment map * step transparency 616 // r10.x = new transparency for environment map 617 // = old transparency * (1 - ((1- intepolated opacity) ^ (step in voxels * g_OpacityCorrectionFactor / diameter in voxels))), if intepolated opacity < 1 618 // = old transparency, otherwise 619 // r12.xyz = new indirect color 620 // = old indirect color + intepolated color * the second last transparency 621 622 // terminate cone tracing 623 0x000025EC: lt r10.z, r7.w, l(0.000100) // r10.z = r7.w < 0.001 624 0x00002608: if_nz r10.z // if(r7.w < 0.001) 625 0x00002614: mov r7.xyz, r12.xyzx // r7.xyz = indirect color 626 0x00002628: mov r9.x, r11.w // r9.x = AO = r11.w 627 0x0000263C: mov r6.w, r10.x // r6.w = r10.x = transparency for environment map 628 0x00002650: break 629 0x00002654: endif 630 631 // proceed to next iteration 632 // r11.z = new AO weight = 2 ^ (0.04 * distance measured by voxel count * 1.442695) 633 // r11.w = AO 634 // r10.x = transparency for environment map 635 0x00002658: mad r9.z, r10.y, r1.w, r9.z // r9.z = new distance = g_TracingStep * step in voxels + distance in voxels 636 0x0000267C: mul r10.y, r10.w, FinestVoxelSize.y // r10.y = step length 637 0x0000269C: mad r8.xyz, r10.yyyy, r4.xyzx, r8.xyzx // r8.xyz = new sampling position = old sampling position + step size * direction 638 0x000026C0: add r9.w, r9.w, l(1.000000) // r9.w = iteration++ 639 0x000026DC: mov r7.xyz, r12.xyzx // r7.xyz = indirect color 640 0x000026F0: mov r10.y, r6.w // r10.y = tranparency 641 0x00002704: mov r8.w, r7.w // r8.w = tranparency 642 0x00002718: mov r9.xy, r11.wzww // r9.xy = r11.wz = {AO, 2 ^ (0.04 * distance measured by voxel count * 1.442695)} 643 0x0000272C: mov r6.w, r10.x // r6.w = new transparency for env map = r10.x 644 0x00002740: mov r7.w, r10.y // r7.w = transparency 645 // r7.xyz = indirect light 646 // r6.w = latest transparency for environment map 647 // r7.w = last transparency for environment map 648 // r8.w = the second last transparency for environment map 649 // r9.x = AO 650 // r9.y = new AO weight 651 652 0x00002754: endloop 653 654 // sample environment map 655 0x00002758: mov_sat r9.x, r9.x 656 0x0000276C: add_sat r0.x, -r6.w, l(1.000000) // r0.x = saturate(1 - transparency) 657 0x0000278C: add r0.x, -r0.x, l(1.000000) // r0.x = 1 - saturate(1 - transparency) = transparency 658 0x000027AC: mul r0.xyz, r0.xxxx, g_EnvironmentMapTint.xyzx // r0.xyz = g_EnvironmentMapTint * transparency 659 0x000027CC: lt r1.xyz, l(0.000000, 0.000000, 0.000000, 0.000000), r0.xyzx 660 0x000027F4: or r1.x, r1.y, r1.x 661 0x00002810: or r1.x, r1.z, r1.x 662 0x0000282C: if_nz r1.x // if (any(g_EnvironmentMapTint * transparency > 0)) 663 0x00002838: mul r1.x, g_ConeFactor, g_EnvironmentMapResolution // r1.x = g_ConeFactor * g_EnvironmentMapResolution 664 0x0000285C: log r1.x, r1.x // r1.x = log2(g_ConeFactor * g_EnvironmentMapResolution) 665 0x00002870: add r1.x, r1.x, l(-1.000000) 666 0x0000288C: max r1.x, r1.x, l(0.000000) // r1.x = max(log2(g_ConeFactor * g_EnvironmentMapResolution)-1,0) 667 0x000028A8: min r1.x, r1.x, g_MaxEnvironmentMapMipLevel // r1.x = min(g_MaxEnvironmentMapMipLevel, max(log2(g_ConeFactor * g_EnvironmentMapResolution)-1,0)) 668 0x000028C8: sample_l_indexable(texturecube)(float,float,float,float) r1.xyz, r4.xyzx, t_EnvironmentMap.xyzw, s_EnvironmentMapSampler, r1.x 669 0x000028FC: mul r0.xyz, r0.xyzx, r1.xyzx // r0.xyz = environment color = g_EnvironmentMapTint * transparency * environment map color 670 0x00002918: mad r7.xyz, r0.xyzx, g_rNumCones.wwww, r7.xyzx // r7.xyz = indirect color = r7.xyz + environment color / number of cones 671 0x00002940: endif 672 // r7.xyz = indirect light 673 674 // add ambient color 675 // r9.x = AO? 676 0x00002944: mad_sat r0.x, r9.x, g_AmbientScale, g_AmbientBias // r0.x = saturate(r9.x * g_AmbientScale + g_AmbientBias) 677 0x00002970: log r0.x, r0.x 678 0x00002984: mul r0.x, r0.x, g_AmbientPower 679 0x000029A4: exp r0.x, r0.x // r0.x = ambient strength = pow(saturate(r9.x * g_AmbientScale + g_AmbientBias), g_AmbientPower) 680 0x000029B8: mul r0.xyz, r0.xxxx, g_AmbientColor.xyzx 681 0x000029D8: mul r0.xyz, r0.xyzx, g_rNumCones.wwww // r0.xyz = ambient term = ambient strength * g_AmbientColor.xyzx / number of cones 682 0x000029F8: mad r0.xyz, r7.xyzx, l(0.318310, 0.318310, 0.318310, 0.000000), r0.xyzx // r0.xyz = all indirect light = ambient term + indirect light * 0.318310 683 0x00002A28: mul r1.xyz, r4.xxxx, r0.xyzx // r1.xyz = all indirect light * direction.x 684 0x00002A44: mul r2.xyz, r4.yyyy, r0.xyzx // r2.xyz = all indirect light * direction.y = all indirect light * cos(theta) 685 0x00002A60: mul r0.xyz, r4.zzzz, r0.xyzx // r0.xyz = all indirect light * direction.z 686 0x00002A7C: else 687 0x00002A80: mov r1.xyz, l(0,0,0,0) 688 0x00002AA0: mov r2.xyz, l(0,0,0,0) 689 0x00002AC0: mov r0.xyz, l(0,0,0,0) 690 0x00002AE0: endif 691 692 // write x axis indirect illumination 693 0x00002AE4: mul r0.w, r0.w, g_rNumCones.w 694 0x00002B04: and r1.w, r1.x, l(0x7f800000) 695 0x00002B20: ieq r1.w, r1.w, l(0x7f800000) 696 0x00002B3C: movc o0.x, r1.w, l(0), r1.x 697 0x00002B60: and r1.x, r1.y, l(0x7f800000) 698 0x00002B7C: ieq r1.x, r1.x, l(0x7f800000) 699 0x00002B98: movc o0.y, r1.x, l(0), r1.y 700 0x00002BBC: and r1.x, r1.z, l(0x7f800000) 701 0x00002BD8: ieq r1.x, r1.x, l(0x7f800000) 702 0x00002BF4: movc o0.z, r1.x, l(0), r1.z 703 0x00002C18: and r1.x, r0.w, l(0x7f800000) 704 0x00002C34: ieq r1.x, r1.x, l(0x7f800000) 705 0x00002C50: movc o0.w, r1.x, l(0), r0.w 706 707 // write y axis indirect illumination 708 0x00002C74: and r0.w, r2.x, l(0x7f800000) 709 0x00002C90: ieq r0.w, r0.w, l(0x7f800000) 710 0x00002CAC: movc o1.x, r0.w, l(0), r2.x 711 0x00002CD0: and r0.w, r2.y, l(0x7f800000) 712 0x00002CEC: ieq r0.w, r0.w, l(0x7f800000) 713 0x00002D08: movc o1.y, r0.w, l(0), r2.y 714 0x00002D2C: and r0.w, r2.z, l(0x7f800000) 715 0x00002D48: ieq r0.w, r0.w, l(0x7f800000) 716 0x00002D64: movc o1.z, r0.w, l(0), r2.z 717 718 // write z axis indirect illumination 719 0x00002D88: and r0.w, r0.x, l(0x7f800000) 720 0x00002DA4: ieq r0.w, r0.w, l(0x7f800000) 721 0x00002DC0: movc o2.x, r0.w, l(0), r0.x 722 0x00002DE4: and r0.x, r0.y, l(0x7f800000) 723 0x00002E00: ieq r0.x, r0.x, l(0x7f800000) 724 0x00002E1C: movc o2.y, r0.x, l(0), r0.y 725 0x00002E40: and r0.x, r0.z, l(0x7f800000) 726 0x00002E5C: ieq r0.x, r0.x, l(0x7f800000) 727 0x00002E78: movc o2.z, r0.x, l(0), r0.z 728 0x00002E9C: mov o1.w, l(0) 729 0x00002EB0: mov o2.w, l(0) 730 0x00002EC4: ret 731 // Approximately 399 instruction slots used
以上是关于Anatomy of Nvidia's Voxel Cone Tracing Code (VXGI)的主要内容,如果未能解决你的问题,请参考以下文章
Life of a triangle - NVIDIA's logical pipeline
The Anatomy of a Large-Scale Hypertextual Web Search Engine
Anatomy of a Program in Memory.剖析程序的内存布局
翻译Anatomy of a Program in Memory—剖析内存中的一个程序(进程的虚拟存储器映像布局详解)
大规模超文本网络搜索引擎解析 [ The Anatomy of a Large-Scale Hypertextual Web Search Engine ]