Wicked Surfel GI 实现方法

本文最后更新于:1 年前

Wicked Surfel GI 实现方法

资源链接:

Wicked Engine

Wicked Engine Net – 3D Engine Development

实现顺序

循环一:SurfelGI_Coverage

循环二:Grid reset -> Update -> Grid offsets -> Binning -> Raytracing -> Integrate rays

surfel buffer 总结

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void main(uint3 DTid : SV_DispatchThreadID)
{
uint surfel_count = surfelStatsBuffer.Load(SURFEL_STATS_OFFSET_NEXTCOUNT);
surfel_count = clamp(surfel_count, 0, SURFEL_CAPACITY);

int dead_count = asint(surfelStatsBuffer.Load(SURFEL_STATS_OFFSET_DEADCOUNT));
int shortage = max(0, -dead_count); // if deadcount was negative, there was shortage
dead_count = clamp(dead_count, 0, SURFEL_CAPACITY);

uint ray_count = surfelStatsBuffer.Load(SURFEL_STATS_OFFSET_RAYCOUNT);

surfelStatsBuffer.Store(SURFEL_STATS_OFFSET_COUNT, surfel_count);
surfelStatsBuffer.Store(SURFEL_STATS_OFFSET_NEXTCOUNT, 0);
surfelStatsBuffer.Store(SURFEL_STATS_OFFSET_DEADCOUNT, dead_count);
surfelStatsBuffer.Store(SURFEL_STATS_OFFSET_CELLALLOCATOR, 0);
surfelStatsBuffer.Store(SURFEL_STATS_OFFSET_RAYCOUNT, 0);
surfelStatsBuffer.Store(SURFEL_STATS_OFFSET_SHORTAGE, shortage);

surfelIndirectBuffer.Store3(SURFEL_INDIRECT_OFFSET_ITERATE, uint3((surfel_count + SURFEL_INDIRECT_NUMTHREADS - 1) / SURFEL_INDIRECT_NUMTHREADS, 1, 1));
surfelIndirectBuffer.Store3(SURFEL_INDIRECT_OFFSET_RAYTRACE, uint3((ray_count + SURFEL_INDIRECT_NUMTHREADS - 1) / SURFEL_INDIRECT_NUMTHREADS, 1, 1));
surfelIndirectBuffer.Store3(SURFEL_INDIRECT_OFFSET_INTEGRATE, uint3(surfel_count, 1, 1));
}

surfelStatsBuffer:存储各种count,每个count32位,4字节偏移

surfelIndirectBuffer:存储indirect计算时threads的动态分配方案

surfelCellBuffer:存储cell的surfel index

surfelGridBuffer:哈希存储结构,存储类型为SurfelGridCell,有offset和count两个值,用来记录一个surfelcell中的surfel包含情况

其它buffer没什么难点

ShaderInterop_SurfelGI.h

包含的内容主要是各种宏和常量定义,surfel的存储格式和数据压缩方案,哈希计算和末尾的各种加权计算。

surfel_indirectprepareCS.hlsl

具体内容就是上面贴的各种buffer的分配,一定要看懂。

surfel_binningCS.hlsl

遍历surfel,查找每个surfel所在的surfel_cell,统计周围3*3*3的cell内该surfel覆盖了哪些cell,将count写入该cell的surfelGridBuffer和surfelCellBuffer。注意写入的buffer是周围cell的,和中心surfel_cell无关。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
if (surfel.GetRadius() > 0)
{
int3 center_cell = surfel_cell(surfel.position);
for (uint i = 0; i < 27; ++i)
{
int3 gridpos = center_cell + surfel_neighbor_offsets[i];
//判断surfel覆盖了周围多少个格子
if (surfel_cellintersects(surfel, gridpos))
{
uint cellindex = surfel_cellindex(gridpos);
uint prevCount;
InterlockedAdd(surfelGridBuffer[cellindex].count, 1, prevCount);
surfelCellBuffer[surfelGridBuffer[cellindex].offset + prevCount] = surfel_index;
}
}

}

surfel_coverageCS.hlsl

将屏幕空间分为16*16的pixel cell,遍历每个pixel,转化为世界坐标,通过世界坐标换算成哈希值(即cellindex),然后拿这个cellindex去surfelGridBuffer里找到有哪些surfel覆盖,通过方向、距离、mipmap等加权计算覆盖率,统计最小覆盖率后生成新的surfel。还有一些屏幕空间的debug函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
//遍历每个surfel
for (uint i = 0; i < cell.count; ++i)
{
uint surfel_index = surfelCellBuffer[cell.offset + i];
Surfel surfel = surfelBuffer[surfel_index];
//各种加权计算
float3 L = P - surfel.position;
float dist2 = dot(L, L);
if (dist2 < sqr(surfel.GetRadius()))
{
float3 normal = normalize(unpack_unitvector(surfel.normal));
float dotN = dot(N, normal);
if (dotN > 0)
{
float dist = sqrt(dist2);
float contribution = 1;

contribution *= saturate(dotN);
contribution *= saturate(1 - dist / surfel.GetRadius());
contribution = smoothstep(0, 1, contribution);
coverage += contribution;

float2 moments = surfelMomentsTexture.SampleLevel(sampler_linear_clamp, surfel_moment_uv(surfel_index, normal, L / dist), 0);
contribution *= surfel_moment_weight(moments, dist);

// contribution based on life can eliminate black popping surfels, but the surfel_data must be accessed...
contribution = lerp(0, contribution, surfelDataBuffer[surfel_index].GetLife() / 2.0f);

color += float4(surfel.color, 1) * contribution;

switch (push.debug)
{
case SURFEL_DEBUG_NORMAL:
debug.rgb += normal * contribution;
debug.a = 1;
break;
case SURFEL_DEBUG_RANDOM:
debug += float4(random_color(surfel_index), 1) * contribution;
break;
case SURFEL_DEBUG_INCONSISTENCY:
debug += float4(surfelDataBuffer[surfel_index].inconsistency.xxx, 1) * contribution;
break;
default:
break;
}

}

if (push.debug == SURFEL_DEBUG_POINT)
{
if (dist2 <= sqr(0.05))
debug = float4(1, 0, 1, 1);
}
}

}
//计算覆盖率,打包进GroupMinSurfelCount
if (cell.count < SURFEL_CELL_LIMIT)
{
uint surfel_count_at_pixel = 0;
surfel_count_at_pixel |= (uint(coverage) & 0xFF) << 24; // the upper bits matter most for min selection
surfel_count_at_pixel |= (uint(rng.next_float() * 65535) & 0xFFFF) << 8; // shuffle pixels randomly
surfel_count_at_pixel |= (GTid.x & 0xF) << 4;
surfel_count_at_pixel |= (GTid.y & 0xF) << 0;
InterlockedMin(GroupMinSurfelCount, surfel_count_at_pixel);
}

surfel_gridoffsetsCS.hlsl surfel_gridresetCS.hlsl

遍历、初始化各个grid,统计总count。

surfel_raytraceCS.hlsl

目前这个surfel实现的raytracing原理就是遍历场景中每一条光线。找到光线源的surfel,然后从这个surfel向这个光线的方向附近发射新的光线进行重要性采样,hit到surface后,hit点周围的surfel颜色加权平均,和光源direct diffuse的结果相加,将结果保存在这个raydata里。每一帧渲染的时候就查找surfel的raydata颜色,和surfel本身的颜色加权。实际上就是做了一次光源直接光照和每个surfel的间接光照。目前搜集到的资料来看,实现的surfel基本上都是这么做的。多次反射还没有具体的实现。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
//surfel加权计算
{
float4 surfel_gi = 0;
uint cellindex = surfel_cellindex(surfel_cell(surface.P));
SurfelGridCell cell = surfelGridBuffer[cellindex];
for (uint i = 0; i < cell.count; ++i)
{
uint surfel_index = surfelCellBuffer[cell.offset + i];
Surfel surfel = surfelBuffer[surfel_index];

float3 L = surface.P - surfel.position;
float dist2 = dot(L, L);
if (dist2 < sqr(surfel.GetRadius()))
{
float3 normal = normalize(unpack_unitvector(surfel.normal));
float dotN = dot(surface.N, normal);
if (dotN > 0)
{
float dist = sqrt(dist2);
float contribution = 1;

contribution *= saturate(dotN);
contribution *= saturate(1 - dist / surfel.GetRadius());
contribution = smoothstep(0, 1, contribution);

float2 moments = surfelMomentsTexturePrev.SampleLevel(sampler_linear_clamp, surfel_moment_uv(surfel_index, normal, L / dist), 0);
contribution *= surfel_moment_weight(moments, dist);

surfel_gi += float4(surfel.color, 1) * contribution;

}
}
}
if (surfel_gi.a > 0)
{
const float energy_conservation = 0.95;
surfel_gi.rgb *= energy_conservation;
surfel_gi.rgb /= surfel_gi.a;
surfel_gi.a = saturate(surfel_gi.a);
hit_result += max(0, surfel_gi.rgb);
}
}

surfel_updateCS.hlsl

实现surfel的data更新。

遍历当前surfel,通过surfel_data中保存的数据计算。若此surfel仍旧在某个surface上,那么生成新的surface数据,并更新surfel.data,统计这个新surfel在世界空间中覆盖的cell。根据surfel的生命周期进行ray quest,将新数据打包进surfel.data后写入surfelBuffer和surfelRayBuffer。若当前surfel没覆盖到表面,则写入surfelDeadBuffer。

surfel_integrateCS.hlsl

surfel处理主函数。为每个surfel都开一个8*8的线程组,因此Gid代表每个surfel。内容很多,直接写到注释里吧。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
//线程组内8*8的八面体遍历
[numthreads(THREADCOUNT, THREADCOUNT, 1)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 Gid : SV_GroupID, uint3 GTid : SV_GroupThreadID, uint groupIndex : SV_GroupIndex)
{
//读取当前surfel数据
uint surfel_index = surfelAliveBuffer[Gid.x];
Surfel surfel = surfelBuffer[surfel_index];
SurfelData surfel_data = surfelDataBuffer[surfel_index];
uint life = surfel_data.GetLife();
uint recycle = surfel_data.GetRecycle();
float maxDistance = surfel.GetRadius();

const float3 P = surfel.position;
const float3 N = normalize(unpack_unitvector(surfel.normal));

//计算当前surfel的八面体方向
float3 texel_direction = decode_hemioct(((GTid.xy + 0.5) / (float2)SURFEL_MOMENT_RESOLUTION) * 2 - 1);
texel_direction = mul(texel_direction, get_tangentspace(N));
texel_direction = normalize(texel_direction);

float4 result = 0;
float2 result_depth = 0;
float total_weight = 0;

//读取surfel的ray数据
uint remaining_rays = surfel.GetRayCount();
uint offset = surfel.GetRayOffset();
while (remaining_rays > 0)
{
uint num_rays = min(CACHE_SIZE, remaining_rays);

//八面体8*8的ray数据写入ray_cache
if (groupIndex < num_rays)
{
ray_cache[groupIndex] = surfelRayBuffer[offset + groupIndex].load();
}

//同步
GroupMemoryBarrierWithGroupSync();

//遍历当前surfel的每个ray
for (uint r = 0; r < num_rays; ++r)
{
SurfelRayData ray = ray_cache[r];
result += float4(ray.radiance, 1);

float depth;
if (ray.depth > 0)
{
depth = clamp(ray.depth, 0, maxDistance);
}
else
{
depth = maxDistance;
}
const float3 radiance = ray.radiance.rgb;

//通过方向加权
float weight = saturate(dot(texel_direction, ray.direction) + 0.01);
weight = pow(weight, 32);

if (weight > WEIGHT_EPSILON)
{
result_depth += float2(depth, sqr(depth)) * weight;
total_weight += weight;
}
}

//同步
GroupMemoryBarrierWithGroupSync();

remaining_rays -= num_rays;
offset += num_rays;
}

//将当前surfel转换为2D坐标,在当前surfel分辨率内通过ray depth加权来更新当前坐标的texture_depth
uint2 moments_topleft = unflatten2D(surfel_index, SQRT_SURFEL_CAPACITY) * SURFEL_MOMENT_TEXELS;
if (total_weight > WEIGHT_EPSILON && GTid.x < SURFEL_MOMENT_RESOLUTION && GTid.y < SURFEL_MOMENT_RESOLUTION)
{
result_depth /= total_weight;

uint2 moments_pixel = moments_topleft + 1 + GTid.xy;
if (life > 0)
{
const float2 prev_moment = surfelMomentsTexturePrev[moments_pixel];
result_depth = lerp(prev_moment, result_depth, 0.02);
}
surfelMomentsTexture[moments_pixel] = result_depth;
}


//遍历当前surfel世界坐标所处的cell内的所有surfel,根据方向和距离加权计算irradiance
#ifdef SURFEL_ENABLE_IRRADIANCE_SHARING
// Surfel irradiance sharing:
{
uint cellindex = surfel_cellindex(surfel_cell(P));
SurfelGridCell cell = surfelGridBuffer[cellindex];
for (uint i = 0; i < cell.count; i += THREADCOUNT * THREADCOUNT)
{
uint surfel_index = surfelCellBuffer[cell.offset + i];
Surfel surfel = surfelBuffer[surfel_index];
const float combined_radius = surfel.GetRadius() + maxDistance;

float3 L = P - surfel.position;
float dist2 = dot(L, L);
if (dist2 < sqr(combined_radius))
{
float3 normal = normalize(unpack_unitvector(surfel.normal));
float dotN = dot(N, normal);
if (dotN > 0)
{
float dist = sqrt(dist2);
float contribution = 1;

contribution *= saturate(dotN);
contribution *= saturate(1 - dist / combined_radius);
contribution = smoothstep(0, 1, contribution);

float2 moments = surfelMomentsTexturePrev.SampleLevel(sampler_linear_clamp, surfel_moment_uv(surfel_index, normal, L / dist), 0);
contribution *= surfel_moment_weight(moments, dist);

result += float4(surfel.color, 1) * contribution;

}
}
}
}
result_cache[groupIndex] = result;
#endif // SURFEL_ENABLE_IRRADIANCE_SHARING

//同步
AllMemoryBarrierWithGroupSync();

//把刚算好的数据写入包围盒里
// Copy moment borders:
for (uint i = GTid.x; i < SURFEL_MOMENT_TEXELS; i += THREADCOUNT)
{
for (uint j = GTid.y; j < SURFEL_MOMENT_TEXELS; j += THREADCOUNT)
{
uint2 pixel_write = moments_topleft + uint2(i, j);
uint2 pixel_read = clamp(pixel_write, moments_topleft + 1, moments_topleft + 1 + SURFEL_MOMENT_RESOLUTION - 1);
surfelMomentsTexture[pixel_write] = surfelMomentsTexture[pixel_read];
}
}

if (groupIndex > 0)
return;

#ifdef SURFEL_ENABLE_IRRADIANCE_SHARING
result = 0;
for (uint c = 0; c < CACHE_SIZE; ++c)
{
result += result_cache[c];
}
#endif // SURFEL_ENABLE_IRRADIANCE_SHARING

if (result.a > 0)
{
result /= result.a;
float diff = 0.2;
float3 dev = sqrt(max(1e-5, surfel_data.variance));
float3 shortDiff = surfel_data.mean - surfel_data.shortMean;

//个人改动了一下这块的代码,引入短期变化使得surfel收敛更快
float relativeDiff = dot(float3(0.299, 0.587, 0.114),
abs(shortDiff) / max(1e-5, dev));
relativeDiff = clamp(relativeDiff, 0, 1);
diff += relativeDiff * 0.1;
MultiscaleMeanEstimator(result.rgb, surfel_data, diff);
}

life++;

//计算recycle周期,不用的surfel回收
float3 cam_to_surfel = surfel.position - GetCamera().position;
if (length(cam_to_surfel) > SURFEL_RECYCLE_DISTANCE)
{
ShaderSphere sphere;
sphere.center = surfel.position;
sphere.radius = surfel.GetRadius();

if (GetCamera().frustum.intersects(sphere))
{
recycle = 0;
}
else
{
recycle++;
}
}
else
{
recycle = 0;
}

surfel_data.life_recycle = 0;
surfel_data.life_recycle |= life & 0xFFFF;
surfel_data.life_recycle |= (recycle & 0xFFFF) << 16u;

surfelDataBuffer[surfel_index] = surfel_data;
}

调参

ShaderInterop_SurfelGI.h函数开头有很多参数可以调。

surfel_integrateCS.hlsl我添加了权重计算,基于rgb的变化速度。其中MultiscaleMeanEstimator函数里面有很对权重可以微调,但是注意surfel更新过快会造成闪烁。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
void MultiscaleMeanEstimator(
float3 y,
inout SurfelData data,
float shortWindowBlend
)
{
float3 mean = data.mean;
float3 shortMean = data.shortMean;
float vbbr = data.vbbr;
float3 variance = data.variance;
float inconsistency = data.inconsistency;

// Suppress fireflies.
{
float3 dev = sqrt(max(1e-5, variance));
float3 highThreshold = 0.1 + shortMean + dev * 8;
float3 overflow = max(0, y - highThreshold);
y -= overflow;
}

float3 delta = y - shortMean;
shortMean = lerp(shortMean, y, shortWindowBlend);
float3 delta2 = y - shortMean;

// This should be a longer window than shortWindowBlend to avoid bias
// from the variance getting smaller when the short-term mean does.
float varianceBlend = shortWindowBlend * 0.5;
variance = lerp(variance, delta * delta2, varianceBlend);
float3 dev = sqrt(max(1e-5, variance));

float3 shortDiff = mean - shortMean;

float relativeDiff = dot(float3(0.299, 0.587, 0.114),
abs(shortDiff) / max(1e-5, dev));
inconsistency = lerp(inconsistency, relativeDiff, 0.08);

float varianceBasedBlendReduction =
clamp(dot(float3(0.299, 0.587, 0.114),
0.5 * shortMean / max(1e-5, dev)), 1.0 / 32, 1);

float3 catchUpBlend = clamp(smoothstep(0, 1,
relativeDiff * max(0.02, inconsistency - 0.2)), 1.0 / 256, 1);
catchUpBlend *= vbbr;

vbbr = lerp(vbbr, varianceBasedBlendReduction, 0.1);
mean = lerp(mean, y, saturate(catchUpBlend));

// Output
data.mean = mean;
data.shortMean = shortMean;
data.vbbr = vbbr;
data.variance = variance;
data.inconsistency = inconsistency;
}