Wicked Surfel GI 实现方法
资源链接:
Wicked Engine
Wicked Engine Net – 3D Engine Development
实现顺序
循环一:SurfelGI_Coverage
循环二:Grid reset -> Update -> Grid offsets -> Binning -> Raytracing -> Integrate rays
surfel buffer 总结
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 void main (uint3 DTid : SV_DispatchThreadID) { uint surfel_count = surfelStatsBuffer.Load (SURFEL_STATS_OFFSET_NEXTCOUNT); surfel_count = clamp (surfel_count, 0 , SURFEL_CAPACITY); int dead_count = asint (surfelStatsBuffer.Load (SURFEL_STATS_OFFSET_DEADCOUNT)); int shortage = max (0 , -dead_count); dead_count = clamp (dead_count, 0 , SURFEL_CAPACITY); uint ray_count = surfelStatsBuffer.Load (SURFEL_STATS_OFFSET_RAYCOUNT); surfelStatsBuffer.Store (SURFEL_STATS_OFFSET_COUNT, surfel_count); surfelStatsBuffer.Store (SURFEL_STATS_OFFSET_NEXTCOUNT, 0 ); surfelStatsBuffer.Store (SURFEL_STATS_OFFSET_DEADCOUNT, dead_count); surfelStatsBuffer.Store (SURFEL_STATS_OFFSET_CELLALLOCATOR, 0 ); surfelStatsBuffer.Store (SURFEL_STATS_OFFSET_RAYCOUNT, 0 ); surfelStatsBuffer.Store (SURFEL_STATS_OFFSET_SHORTAGE, shortage); surfelIndirectBuffer.Store3 (SURFEL_INDIRECT_OFFSET_ITERATE, uint3 ((surfel_count + SURFEL_INDIRECT_NUMTHREADS - 1 ) / SURFEL_INDIRECT_NUMTHREADS, 1 , 1 )); surfelIndirectBuffer.Store3 (SURFEL_INDIRECT_OFFSET_RAYTRACE, uint3 ((ray_count + SURFEL_INDIRECT_NUMTHREADS - 1 ) / SURFEL_INDIRECT_NUMTHREADS, 1 , 1 )); surfelIndirectBuffer.Store3 (SURFEL_INDIRECT_OFFSET_INTEGRATE, uint3 (surfel_count, 1 , 1 )); }
surfelStatsBuffer:存储各种count,每个count32位,4字节偏移
surfelIndirectBuffer:存储indirect计算时threads的动态分配方案
surfelCellBuffer:存储cell的surfel index
surfelGridBuffer:哈希存储结构,存储类型为SurfelGridCell,有offset和count两个值,用来记录一个surfelcell中的surfel包含情况
其它buffer没什么难点
ShaderInterop_SurfelGI.h
包含的内容主要是各种宏和常量定义,surfel的存储格式和数据压缩方案,哈希计算和末尾的各种加权计算。
surfel_indirectprepareCS.hlsl
具体内容就是上面贴的各种buffer的分配,一定要看懂。
surfel_binningCS.hlsl
遍历surfel,查找每个surfel所在的surfel_cell,统计周围3*3*3的cell内该surfel覆盖了哪些cell,将count写入该cell的surfelGridBuffer和surfelCellBuffer。注意写入的buffer是周围cell的,和中心surfel_cell无关。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 if (surfel.GetRadius () > 0 ) { int3 center_cell = surfel_cell (surfel.position); for (uint i = 0 ; i < 27 ; ++i) { int3 gridpos = center_cell + surfel_neighbor_offsets[i]; if (surfel_cellintersects (surfel, gridpos)) { uint cellindex = surfel_cellindex (gridpos); uint prevCount; InterlockedAdd (surfelGridBuffer[cellindex].count, 1 , prevCount); surfelCellBuffer[surfelGridBuffer[cellindex].offset + prevCount] = surfel_index; } } }
surfel_coverageCS.hlsl
将屏幕空间分为16*16的pixel cell,遍历每个pixel,转化为世界坐标,通过世界坐标换算成哈希值(即cellindex),然后拿这个cellindex去surfelGridBuffer里找到有哪些surfel覆盖,通过方向、距离、mipmap等加权计算覆盖率,统计最小覆盖率后生成新的surfel。还有一些屏幕空间的debug函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 for (uint i = 0 ; i < cell.count; ++i) { uint surfel_index = surfelCellBuffer[cell.offset + i]; Surfel surfel = surfelBuffer[surfel_index]; float3 L = P - surfel.position; float dist2 = dot (L, L); if (dist2 < sqr (surfel.GetRadius ())) { float3 normal = normalize (unpack_unitvector (surfel.normal)); float dotN = dot (N, normal); if (dotN > 0 ) { float dist = sqrt (dist2); float contribution = 1 ; contribution *= saturate (dotN); contribution *= saturate (1 - dist / surfel.GetRadius ()); contribution = smoothstep (0 , 1 , contribution); coverage += contribution; float2 moments = surfelMomentsTexture.SampleLevel (sampler_linear_clamp, surfel_moment_uv (surfel_index, normal, L / dist), 0 ); contribution *= surfel_moment_weight (moments, dist); contribution = lerp (0 , contribution, surfelDataBuffer[surfel_index].GetLife () / 2.0f ); color += float4 (surfel.color, 1 ) * contribution; switch (push.debug) { case SURFEL_DEBUG_NORMAL: debug.rgb += normal * contribution; debug.a = 1 ; break ; case SURFEL_DEBUG_RANDOM: debug += float4 (random_color (surfel_index), 1 ) * contribution; break ; case SURFEL_DEBUG_INCONSISTENCY: debug += float4 (surfelDataBuffer[surfel_index].inconsistency.xxx, 1 ) * contribution; break ; default : break ; } } if (push.debug == SURFEL_DEBUG_POINT) { if (dist2 <= sqr (0.05 )) debug = float4 (1 , 0 , 1 , 1 ); } } } if (cell.count < SURFEL_CELL_LIMIT) { uint surfel_count_at_pixel = 0 ; surfel_count_at_pixel |= (uint (coverage) & 0xFF ) << 24 ; surfel_count_at_pixel |= (uint (rng.next_float () * 65535 ) & 0xFFFF ) << 8 ; surfel_count_at_pixel |= (GTid.x & 0xF ) << 4 ; surfel_count_at_pixel |= (GTid.y & 0xF ) << 0 ; InterlockedMin (GroupMinSurfelCount, surfel_count_at_pixel); }
surfel_gridoffsetsCS.hlsl surfel_gridresetCS.hlsl
遍历、初始化各个grid,统计总count。
surfel_raytraceCS.hlsl
目前这个surfel实现的raytracing原理就是遍历场景中每一条光线。找到光线源的surfel,然后从这个surfel向这个光线的方向附近发射新的光线进行重要性采样,hit到surface后,hit点周围的surfel颜色加权平均,和光源direct diffuse的结果相加,将结果保存在这个raydata里。每一帧渲染的时候就查找surfel的raydata颜色,和surfel本身的颜色加权。实际上就是做了一次光源直接光照和每个surfel的间接光照。目前搜集到的资料来看,实现的surfel基本上都是这么做的。多次反射还没有具体的实现。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 { float4 surfel_gi = 0 ; uint cellindex = surfel_cellindex (surfel_cell (surface.P)); SurfelGridCell cell = surfelGridBuffer[cellindex]; for (uint i = 0 ; i < cell.count; ++i) { uint surfel_index = surfelCellBuffer[cell.offset + i]; Surfel surfel = surfelBuffer[surfel_index]; float3 L = surface.P - surfel.position; float dist2 = dot (L, L); if (dist2 < sqr (surfel.GetRadius ())) { float3 normal = normalize (unpack_unitvector (surfel.normal)); float dotN = dot (surface.N, normal); if (dotN > 0 ) { float dist = sqrt (dist2); float contribution = 1 ; contribution *= saturate (dotN); contribution *= saturate (1 - dist / surfel.GetRadius ()); contribution = smoothstep (0 , 1 , contribution); float2 moments = surfelMomentsTexturePrev.SampleLevel (sampler_linear_clamp, surfel_moment_uv (surfel_index, normal, L / dist), 0 ); contribution *= surfel_moment_weight (moments, dist); surfel_gi += float4 (surfel.color, 1 ) * contribution; } } } if (surfel_gi.a > 0 ) { const float energy_conservation = 0.95 ; surfel_gi.rgb *= energy_conservation; surfel_gi.rgb /= surfel_gi.a; surfel_gi.a = saturate (surfel_gi.a); hit_result += max (0 , surfel_gi.rgb); } }
surfel_updateCS.hlsl
实现surfel的data更新。
遍历当前surfel,通过surfel_data中保存的数据计算。若此surfel仍旧在某个surface上,那么生成新的surface数据,并更新surfel.data,统计这个新surfel在世界空间中覆盖的cell。根据surfel的生命周期进行ray quest,将新数据打包进surfel.data后写入surfelBuffer和surfelRayBuffer。若当前surfel没覆盖到表面,则写入surfelDeadBuffer。
surfel_integrateCS.hlsl
surfel处理主函数。为每个surfel都开一个8*8的线程组,因此Gid代表每个surfel。 内容很多,直接写到注释里吧。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 [numthreads (THREADCOUNT, THREADCOUNT, 1 )]void main (uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint groupIndex : SV_GroupIndex) { uint surfel_index = surfelAliveBuffer[Gid.x]; Surfel surfel = surfelBuffer[surfel_index]; SurfelData surfel_data = surfelDataBuffer[surfel_index]; uint life = surfel_data.GetLife (); uint recycle = surfel_data.GetRecycle (); float maxDistance = surfel.GetRadius (); const float3 P = surfel.position; const float3 N = normalize (unpack_unitvector (surfel.normal)); float3 texel_direction = decode_hemioct (((GTid.xy + 0.5 ) / (float2)SURFEL_MOMENT_RESOLUTION) * 2 - 1 ); texel_direction = mul (texel_direction, get_tangentspace (N)); texel_direction = normalize (texel_direction); float4 result = 0 ; float2 result_depth = 0 ; float total_weight = 0 ; uint remaining_rays = surfel.GetRayCount (); uint offset = surfel.GetRayOffset (); while (remaining_rays > 0 ) { uint num_rays = min (CACHE_SIZE, remaining_rays); if (groupIndex < num_rays) { ray_cache[groupIndex] = surfelRayBuffer[offset + groupIndex].load (); } GroupMemoryBarrierWithGroupSync (); for (uint r = 0 ; r < num_rays; ++r) { SurfelRayData ray = ray_cache[r]; result += float4 (ray.radiance, 1 ); float depth; if (ray.depth > 0 ) { depth = clamp (ray.depth, 0 , maxDistance); } else { depth = maxDistance; } const float3 radiance = ray.radiance.rgb; float weight = saturate (dot (texel_direction, ray.direction) + 0.01 ); weight = pow (weight, 32 ); if (weight > WEIGHT_EPSILON) { result_depth += float2 (depth, sqr (depth)) * weight; total_weight += weight; } } GroupMemoryBarrierWithGroupSync (); remaining_rays -= num_rays; offset += num_rays; } uint2 moments_topleft = unflatten2D (surfel_index, SQRT_SURFEL_CAPACITY) * SURFEL_MOMENT_TEXELS; if (total_weight > WEIGHT_EPSILON && GTid.x < SURFEL_MOMENT_RESOLUTION && GTid.y < SURFEL_MOMENT_RESOLUTION) { result_depth /= total_weight; uint2 moments_pixel = moments_topleft + 1 + GTid.xy; if (life > 0 ) { const float2 prev_moment = surfelMomentsTexturePrev[moments_pixel]; result_depth = lerp (prev_moment, result_depth, 0.02 ); } surfelMomentsTexture[moments_pixel] = result_depth; } #ifdef SURFEL_ENABLE_IRRADIANCE_SHARING { uint cellindex = surfel_cellindex (surfel_cell (P)); SurfelGridCell cell = surfelGridBuffer[cellindex]; for (uint i = 0 ; i < cell.count; i += THREADCOUNT * THREADCOUNT) { uint surfel_index = surfelCellBuffer[cell.offset + i]; Surfel surfel = surfelBuffer[surfel_index]; const float combined_radius = surfel.GetRadius () + maxDistance; float3 L = P - surfel.position; float dist2 = dot (L, L); if (dist2 < sqr (combined_radius)) { float3 normal = normalize (unpack_unitvector (surfel.normal)); float dotN = dot (N, normal); if (dotN > 0 ) { float dist = sqrt (dist2); float contribution = 1 ; contribution *= saturate (dotN); contribution *= saturate (1 - dist / combined_radius); contribution = smoothstep (0 , 1 , contribution); float2 moments = surfelMomentsTexturePrev.SampleLevel (sampler_linear_clamp, surfel_moment_uv (surfel_index, normal, L / dist), 0 ); contribution *= surfel_moment_weight (moments, dist); result += float4 (surfel.color, 1 ) * contribution; } } } } result_cache[groupIndex] = result;#endif AllMemoryBarrierWithGroupSync (); for (uint i = GTid.x; i < SURFEL_MOMENT_TEXELS; i += THREADCOUNT) { for (uint j = GTid.y; j < SURFEL_MOMENT_TEXELS; j += THREADCOUNT) { uint2 pixel_write = moments_topleft + uint2 (i, j); uint2 pixel_read = clamp (pixel_write, moments_topleft + 1 , moments_topleft + 1 + SURFEL_MOMENT_RESOLUTION - 1 ); surfelMomentsTexture[pixel_write] = surfelMomentsTexture[pixel_read]; } } if (groupIndex > 0 ) return ;#ifdef SURFEL_ENABLE_IRRADIANCE_SHARING result = 0 ; for (uint c = 0 ; c < CACHE_SIZE; ++c) { result += result_cache[c]; }#endif if (result.a > 0 ) { result /= result.a; float diff = 0.2 ; float3 dev = sqrt (max (1e-5 , surfel_data.variance)); float3 shortDiff = surfel_data.mean - surfel_data.shortMean; float relativeDiff = dot (float3 (0.299 , 0.587 , 0.114 ), abs (shortDiff) / max (1e-5 , dev)); relativeDiff = clamp (relativeDiff, 0 , 1 ); diff += relativeDiff * 0.1 ; MultiscaleMeanEstimator (result.rgb, surfel_data, diff); } life++; float3 cam_to_surfel = surfel.position - GetCamera ().position; if (length (cam_to_surfel) > SURFEL_RECYCLE_DISTANCE) { ShaderSphere sphere; sphere.center = surfel.position; sphere.radius = surfel.GetRadius (); if (GetCamera ().frustum.intersects (sphere)) { recycle = 0 ; } else { recycle++; } } else { recycle = 0 ; } surfel_data.life_recycle = 0 ; surfel_data.life_recycle |= life & 0xFFFF ; surfel_data.life_recycle |= (recycle & 0xFFFF ) << 16u ; surfelDataBuffer[surfel_index] = surfel_data; }
调参
ShaderInterop_SurfelGI.h函数开头有很多参数可以调。
surfel_integrateCS.hlsl我添加了权重计算,基于rgb的变化速度。其中MultiscaleMeanEstimator函数里面有很对权重可以微调,但是注意surfel更新过快会造成闪烁。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 void MultiscaleMeanEstimator ( float3 y, inout SurfelData data, float shortWindowBlend ) { float3 mean = data.mean; float3 shortMean = data.shortMean; float vbbr = data.vbbr; float3 variance = data.variance; float inconsistency = data.inconsistency; { float3 dev = sqrt (max (1e-5 , variance)); float3 highThreshold = 0.1 + shortMean + dev * 8 ; float3 overflow = max (0 , y - highThreshold); y -= overflow; } float3 delta = y - shortMean; shortMean = lerp (shortMean, y, shortWindowBlend); float3 delta2 = y - shortMean; float varianceBlend = shortWindowBlend * 0.5 ; variance = lerp (variance, delta * delta2, varianceBlend); float3 dev = sqrt (max (1e-5 , variance)); float3 shortDiff = mean - shortMean; float relativeDiff = dot (float3 (0.299 , 0.587 , 0.114 ), abs (shortDiff) / max (1e-5 , dev)); inconsistency = lerp (inconsistency, relativeDiff, 0.08 ); float varianceBasedBlendReduction = clamp (dot (float3 (0.299 , 0.587 , 0.114 ), 0.5 * shortMean / max (1e-5 , dev)), 1.0 / 32 , 1 ); float3 catchUpBlend = clamp (smoothstep (0 , 1 , relativeDiff * max (0.02 , inconsistency - 0.2 )), 1.0 / 256 , 1 ); catchUpBlend *= vbbr; vbbr = lerp (vbbr, varianceBasedBlendReduction, 0.1 ); mean = lerp (mean, y, saturate (catchUpBlend)); data.mean = mean; data.shortMean = shortMean; data.vbbr = vbbr; data.variance = variance; data.inconsistency = inconsistency; }