voxcity 1.0.2__py3-none-any.whl → 1.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. voxcity/downloader/ocean.py +559 -0
  2. voxcity/generator/api.py +6 -0
  3. voxcity/generator/grids.py +45 -32
  4. voxcity/generator/pipeline.py +327 -27
  5. voxcity/geoprocessor/draw.py +14 -8
  6. voxcity/geoprocessor/raster/__init__.py +2 -0
  7. voxcity/geoprocessor/raster/core.py +31 -0
  8. voxcity/geoprocessor/raster/landcover.py +173 -49
  9. voxcity/geoprocessor/raster/raster.py +1 -1
  10. voxcity/models.py +2 -0
  11. voxcity/simulator_gpu/__init__.py +115 -0
  12. voxcity/simulator_gpu/common/__init__.py +9 -0
  13. voxcity/simulator_gpu/common/geometry.py +11 -0
  14. voxcity/simulator_gpu/core.py +322 -0
  15. voxcity/simulator_gpu/domain.py +262 -0
  16. voxcity/simulator_gpu/environment.yml +11 -0
  17. voxcity/simulator_gpu/init_taichi.py +154 -0
  18. voxcity/simulator_gpu/integration.py +15 -0
  19. voxcity/simulator_gpu/kernels.py +56 -0
  20. voxcity/simulator_gpu/radiation.py +28 -0
  21. voxcity/simulator_gpu/raytracing.py +623 -0
  22. voxcity/simulator_gpu/sky.py +9 -0
  23. voxcity/simulator_gpu/solar/__init__.py +178 -0
  24. voxcity/simulator_gpu/solar/core.py +66 -0
  25. voxcity/simulator_gpu/solar/csf.py +1249 -0
  26. voxcity/simulator_gpu/solar/domain.py +561 -0
  27. voxcity/simulator_gpu/solar/epw.py +421 -0
  28. voxcity/simulator_gpu/solar/integration.py +2953 -0
  29. voxcity/simulator_gpu/solar/radiation.py +3019 -0
  30. voxcity/simulator_gpu/solar/raytracing.py +686 -0
  31. voxcity/simulator_gpu/solar/reflection.py +533 -0
  32. voxcity/simulator_gpu/solar/sky.py +907 -0
  33. voxcity/simulator_gpu/solar/solar.py +337 -0
  34. voxcity/simulator_gpu/solar/svf.py +446 -0
  35. voxcity/simulator_gpu/solar/volumetric.py +1151 -0
  36. voxcity/simulator_gpu/solar/voxcity.py +2953 -0
  37. voxcity/simulator_gpu/temporal.py +13 -0
  38. voxcity/simulator_gpu/utils.py +25 -0
  39. voxcity/simulator_gpu/view.py +32 -0
  40. voxcity/simulator_gpu/visibility/__init__.py +109 -0
  41. voxcity/simulator_gpu/visibility/geometry.py +278 -0
  42. voxcity/simulator_gpu/visibility/integration.py +808 -0
  43. voxcity/simulator_gpu/visibility/landmark.py +753 -0
  44. voxcity/simulator_gpu/visibility/view.py +944 -0
  45. voxcity/visualizer/renderer.py +2 -1
  46. {voxcity-1.0.2.dist-info → voxcity-1.0.13.dist-info}/METADATA +16 -53
  47. {voxcity-1.0.2.dist-info → voxcity-1.0.13.dist-info}/RECORD +50 -15
  48. {voxcity-1.0.2.dist-info → voxcity-1.0.13.dist-info}/WHEEL +0 -0
  49. {voxcity-1.0.2.dist-info → voxcity-1.0.13.dist-info}/licenses/AUTHORS.rst +0 -0
  50. {voxcity-1.0.2.dist-info → voxcity-1.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,533 @@
1
+ """
2
+ Optimized Radiation Computation for GPU
3
+
4
+ This module provides optimized GPU kernels for radiation computation
5
+ that minimize kernel launches and synchronization overhead.
6
+
7
+ Key optimizations:
8
+ 1. Fused kernels - combine multiple operations into single kernel launches
9
+ 2. Reduced synchronization - batch operations to minimize ti.sync() calls
10
+ 3. Better memory access patterns - coalesced memory access
11
+ 4. Reduced atomic operations - use local accumulation where possible
12
+ """
13
+
14
+ import taichi as ti
15
+ import numpy as np
16
+ from typing import Optional
17
+
18
+ # Vector type
19
+ Vector3 = ti.math.vec3
20
+
21
+
22
+ @ti.data_oriented
23
+ class OptimizedReflectionSolver:
24
+ """
25
+ Optimized GPU solver for multi-bounce radiation reflections.
26
+
27
+ This replaces the per-step kernel launches with fused operations
28
+ that run the entire reflection loop on GPU without CPU intervention.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ n_surfaces: int,
34
+ max_svf_entries: int,
35
+ n_reflection_steps: int = 3
36
+ ):
37
+ self.n_surfaces = n_surfaces
38
+ self.max_svf_entries = max_svf_entries
39
+ self.n_reflection_steps = n_reflection_steps
40
+
41
+ # Preallocate ping-pong buffers for reflection iterations
42
+ # This avoids memory allocation during computation
43
+ self._surfins_a = ti.field(dtype=ti.f32, shape=(n_surfaces,))
44
+ self._surfins_b = ti.field(dtype=ti.f32, shape=(n_surfaces,))
45
+ self._surfout = ti.field(dtype=ti.f32, shape=(n_surfaces,))
46
+
47
+ # Accumulated totals
48
+ self._total_incoming = ti.field(dtype=ti.f32, shape=(n_surfaces,))
49
+ self._total_outgoing = ti.field(dtype=ti.f32, shape=(n_surfaces,))
50
+
51
+ @ti.kernel
52
+ def solve_reflections_fused(
53
+ self,
54
+ # Initial radiation
55
+ initial_sw: ti.template(),
56
+ # Surface properties
57
+ albedo: ti.template(),
58
+ svf: ti.template(),
59
+ # Cached SVF matrix (sparse COO)
60
+ svf_source: ti.template(),
61
+ svf_target: ti.template(),
62
+ svf_vf: ti.template(),
63
+ svf_trans: ti.template(),
64
+ svf_nnz: ti.i32,
65
+ # Number of reflection steps
66
+ n_steps: ti.i32
67
+ ):
68
+ """
69
+ Fused kernel for complete multi-bounce reflection computation.
70
+
71
+ This runs all reflection iterations in a single kernel launch,
72
+ eliminating CPU-GPU synchronization overhead between steps.
73
+
74
+ Uses ping-pong buffers to avoid race conditions between iterations.
75
+ """
76
+ n_surf = self.n_surfaces
77
+
78
+ # Initialize - copy initial radiation to buffer A and totals
79
+ for i in range(n_surf):
80
+ self._surfins_a[i] = initial_sw[i]
81
+ self._total_incoming[i] = initial_sw[i]
82
+ self._total_outgoing[i] = 0.0
83
+
84
+ # Sync after initialization
85
+ ti.sync()
86
+
87
+ # Reflection loop - alternate between buffers A and B
88
+ for step in range(n_steps):
89
+ # Determine which buffer is input and which is output
90
+ use_a_as_input = (step % 2 == 0)
91
+
92
+ # Phase 1: Compute outgoing = albedo * incoming
93
+ for i in range(n_surf):
94
+ if use_a_as_input:
95
+ self._surfout[i] = albedo[i] * self._surfins_a[i]
96
+ else:
97
+ self._surfout[i] = albedo[i] * self._surfins_b[i]
98
+ self._total_outgoing[i] += self._surfout[i]
99
+
100
+ ti.sync()
101
+
102
+ # Phase 2: Reset output buffer
103
+ for i in range(n_surf):
104
+ if use_a_as_input:
105
+ self._surfins_b[i] = 0.0
106
+ else:
107
+ self._surfins_a[i] = 0.0
108
+
109
+ ti.sync()
110
+
111
+ # Phase 3: Sparse matrix-vector multiply for reflection distribution
112
+ for idx in range(svf_nnz):
113
+ source = svf_source[idx]
114
+ target = svf_target[idx]
115
+ vf = svf_vf[idx]
116
+ trans = svf_trans[idx]
117
+
118
+ outgoing = self._surfout[source]
119
+ if outgoing > 0.01:
120
+ contribution = outgoing * vf * trans
121
+ if use_a_as_input:
122
+ ti.atomic_add(self._surfins_b[target], contribution)
123
+ else:
124
+ ti.atomic_add(self._surfins_a[target], contribution)
125
+
126
+ ti.sync()
127
+
128
+ # Phase 4: Apply urban view factor scaling and accumulate
129
+ for i in range(n_surf):
130
+ urban_vf = 1.0 - svf[i]
131
+ if urban_vf < 0.01:
132
+ if use_a_as_input:
133
+ self._surfins_b[i] = 0.0
134
+ else:
135
+ self._surfins_a[i] = 0.0
136
+ else:
137
+ if use_a_as_input:
138
+ self._surfins_b[i] *= urban_vf
139
+ self._total_incoming[i] += self._surfins_b[i]
140
+ else:
141
+ self._surfins_a[i] *= urban_vf
142
+ self._total_incoming[i] += self._surfins_a[i]
143
+
144
+ ti.sync()
145
+
146
+ def get_results(self):
147
+ """Get accumulated totals as numpy arrays."""
148
+ return {
149
+ 'total_incoming': self._total_incoming.to_numpy(),
150
+ 'total_outgoing': self._total_outgoing.to_numpy()
151
+ }
152
+
153
+
154
+ @ti.kernel
155
+ def fused_reflection_step_kernel(
156
+ # Current incoming radiation (input)
157
+ surfins_in: ti.template(),
158
+ # Next incoming radiation (output, will be accumulated)
159
+ surfins_out: ti.template(),
160
+ # Outgoing buffer (temporary)
161
+ surfout: ti.template(),
162
+ # Surface properties
163
+ albedo: ti.template(),
164
+ svf: ti.template(),
165
+ # Accumulated totals
166
+ total_incoming: ti.template(),
167
+ total_outgoing: ti.template(),
168
+ # Cached SVF matrix
169
+ svf_source: ti.template(),
170
+ svf_target: ti.template(),
171
+ svf_vf: ti.template(),
172
+ svf_trans: ti.template(),
173
+ svf_nnz: ti.i32,
174
+ n_surfaces: ti.i32
175
+ ):
176
+ """
177
+ Single fused kernel for one reflection step.
178
+
179
+ Combines: outgoing computation + distribution + accumulation
180
+ into fewer synchronization points.
181
+ """
182
+ # Phase 1: Compute outgoing and reset output buffer
183
+ for i in range(n_surfaces):
184
+ out = albedo[i] * surfins_in[i]
185
+ surfout[i] = out
186
+ total_outgoing[i] += out
187
+ surfins_out[i] = 0.0
188
+
189
+ ti.sync()
190
+
191
+ # Phase 2: Sparse matrix-vector multiply
192
+ for idx in range(svf_nnz):
193
+ source = svf_source[idx]
194
+ target = svf_target[idx]
195
+ vf = svf_vf[idx]
196
+ trans = svf_trans[idx]
197
+
198
+ outgoing = surfout[source]
199
+ if outgoing > 0.01:
200
+ ti.atomic_add(surfins_out[target], outgoing * vf * trans)
201
+
202
+ ti.sync()
203
+
204
+ # Phase 3: Apply urban VF scaling and accumulate
205
+ for i in range(n_surfaces):
206
+ urban_vf = 1.0 - svf[i]
207
+ if urban_vf < 0.01:
208
+ surfins_out[i] = 0.0
209
+ else:
210
+ surfins_out[i] *= urban_vf
211
+ total_incoming[i] += surfins_out[i]
212
+
213
+
214
+ @ti.kernel
215
+ def compute_initial_and_reflections_fused(
216
+ # Surface properties
217
+ surf_direction: ti.template(),
218
+ surf_svf: ti.template(),
219
+ surf_shadow: ti.template(),
220
+ surf_canopy_trans: ti.template(),
221
+ surf_albedo: ti.template(),
222
+ surf_normal: ti.template(),
223
+ # Sun properties
224
+ sun_dir_x: ti.f32,
225
+ sun_dir_y: ti.f32,
226
+ sun_dir_z: ti.f32,
227
+ cos_zenith: ti.f32,
228
+ # Radiation inputs
229
+ sw_direct: ti.f32,
230
+ sw_diffuse: ti.f32,
231
+ # SVF matrix
232
+ svf_source: ti.template(),
233
+ svf_target: ti.template(),
234
+ svf_vf: ti.template(),
235
+ svf_trans: ti.template(),
236
+ svf_nnz: ti.i32,
237
+ # Number of surfaces and reflection steps
238
+ n_surfaces: ti.i32,
239
+ n_ref_steps: ti.i32,
240
+ # Output arrays (preallocated)
241
+ sw_in_direct: ti.template(),
242
+ sw_in_diffuse: ti.template(),
243
+ sw_in_reflected: ti.template(),
244
+ sw_out_total: ti.template(),
245
+ # Temporary buffers (ping-pong)
246
+ surfins_a: ti.template(),
247
+ surfins_b: ti.template(),
248
+ surfout: ti.template()
249
+ ):
250
+ """
251
+ Fully fused kernel: initial radiation + all reflection iterations.
252
+
253
+ This is the most optimized version that runs everything in one kernel.
254
+ """
255
+ min_stable_coszen = 0.0262
256
+
257
+ # ========== Phase 1: Initial radiation pass ==========
258
+ for i in range(n_surfaces):
259
+ direction = surf_direction[i]
260
+ svf_val = surf_svf[i]
261
+ shadow = surf_shadow[i]
262
+ canopy_trans = surf_canopy_trans[i]
263
+
264
+ # Get surface normal
265
+ normal_x, normal_y, normal_z = 0.0, 0.0, 0.0
266
+ if direction == 0: # Up
267
+ normal_z = 1.0
268
+ elif direction == 1: # Down
269
+ normal_z = -1.0
270
+ elif direction == 2: # North
271
+ normal_y = 1.0
272
+ elif direction == 3: # South
273
+ normal_y = -1.0
274
+ elif direction == 4: # East
275
+ normal_x = 1.0
276
+ elif direction == 5: # West
277
+ normal_x = -1.0
278
+
279
+ # Cosine of incidence
280
+ cos_inc = sun_dir_x * normal_x + sun_dir_y * normal_y + sun_dir_z * normal_z
281
+ cos_inc = ti.max(0.0, cos_inc)
282
+
283
+ # Direct radiation
284
+ sw_dir = 0.0
285
+ if cos_zenith > min_stable_coszen and shadow < 0.5:
286
+ sw_dir = sw_direct * cos_inc * canopy_trans
287
+
288
+ # Diffuse radiation
289
+ sw_dif = 0.0
290
+ if direction != 1: # Not downward
291
+ sw_dif = sw_diffuse * svf_val
292
+
293
+ # Store results
294
+ sw_in_direct[i] = sw_dir
295
+ sw_in_diffuse[i] = sw_dif
296
+ sw_in_reflected[i] = 0.0
297
+ sw_out_total[i] = 0.0
298
+
299
+ # Initialize reflection buffer
300
+ surfins_a[i] = sw_dir + sw_dif
301
+
302
+ ti.sync()
303
+
304
+ # ========== Phase 2: Reflection iterations ==========
305
+ for step in range(n_ref_steps):
306
+ use_a = (step % 2 == 0)
307
+
308
+ # Compute outgoing and reset next buffer
309
+ for i in range(n_surfaces):
310
+ if use_a:
311
+ surfout[i] = surf_albedo[i] * surfins_a[i]
312
+ surfins_b[i] = 0.0
313
+ else:
314
+ surfout[i] = surf_albedo[i] * surfins_b[i]
315
+ surfins_a[i] = 0.0
316
+ sw_out_total[i] += surfout[i]
317
+
318
+ ti.sync()
319
+
320
+ # Sparse matmul for reflection distribution
321
+ for idx in range(svf_nnz):
322
+ src = svf_source[idx]
323
+ tgt = svf_target[idx]
324
+ vf = svf_vf[idx]
325
+ trans = svf_trans[idx]
326
+
327
+ out_val = surfout[src]
328
+ if out_val > 0.01:
329
+ contrib = out_val * vf * trans
330
+ if use_a:
331
+ ti.atomic_add(surfins_b[tgt], contrib)
332
+ else:
333
+ ti.atomic_add(surfins_a[tgt], contrib)
334
+
335
+ ti.sync()
336
+
337
+ # Apply urban VF and accumulate to reflected
338
+ for i in range(n_surfaces):
339
+ urban_vf = 1.0 - surf_svf[i]
340
+ if urban_vf < 0.01:
341
+ if use_a:
342
+ surfins_b[i] = 0.0
343
+ else:
344
+ surfins_a[i] = 0.0
345
+ else:
346
+ if use_a:
347
+ surfins_b[i] *= urban_vf
348
+ sw_in_reflected[i] += surfins_b[i]
349
+ else:
350
+ surfins_a[i] *= urban_vf
351
+ sw_in_reflected[i] += surfins_a[i]
352
+
353
+ ti.sync()
354
+
355
+
356
+ def benchmark_reflections():
357
+ """Benchmark the reflection solver."""
358
+ import time
359
+
360
+ print("Creating test data...")
361
+ n_surfaces = 10000
362
+ svf_nnz = 500000 # 5% sparse
363
+ n_ref_steps = 3
364
+
365
+ # Create test arrays
366
+ initial_sw = ti.field(dtype=ti.f32, shape=(n_surfaces,))
367
+ albedo = ti.field(dtype=ti.f32, shape=(n_surfaces,))
368
+ svf = ti.field(dtype=ti.f32, shape=(n_surfaces,))
369
+
370
+ svf_source = ti.field(dtype=ti.i32, shape=(svf_nnz,))
371
+ svf_target = ti.field(dtype=ti.i32, shape=(svf_nnz,))
372
+ svf_vf = ti.field(dtype=ti.f32, shape=(svf_nnz,))
373
+ svf_trans = ti.field(dtype=ti.f32, shape=(svf_nnz,))
374
+
375
+ # Output buffers
376
+ surfins_a = ti.field(dtype=ti.f32, shape=(n_surfaces,))
377
+ surfins_b = ti.field(dtype=ti.f32, shape=(n_surfaces,))
378
+ surfout = ti.field(dtype=ti.f32, shape=(n_surfaces,))
379
+ total_incoming = ti.field(dtype=ti.f32, shape=(n_surfaces,))
380
+ total_outgoing = ti.field(dtype=ti.f32, shape=(n_surfaces,))
381
+
382
+ # Initialize with random data
383
+ np.random.seed(42)
384
+ initial_sw.from_numpy(np.random.rand(n_surfaces).astype(np.float32) * 500)
385
+ albedo.from_numpy(np.random.rand(n_surfaces).astype(np.float32) * 0.3 + 0.1)
386
+ svf.from_numpy(np.random.rand(n_surfaces).astype(np.float32) * 0.5 + 0.3)
387
+
388
+ svf_source.from_numpy(np.random.randint(0, n_surfaces, svf_nnz).astype(np.int32))
389
+ svf_target.from_numpy(np.random.randint(0, n_surfaces, svf_nnz).astype(np.int32))
390
+ svf_vf.from_numpy(np.random.rand(svf_nnz).astype(np.float32) * 0.1)
391
+ svf_trans.from_numpy(np.random.rand(svf_nnz).astype(np.float32) * 0.5 + 0.5)
392
+
393
+ # Warmup with separate kernel approach
394
+ print("Warming up...")
395
+
396
+ @ti.kernel
397
+ def init_step(initial: ti.template(), ins_a: ti.template(), tot_in: ti.template(), tot_out: ti.template(), n: ti.i32):
398
+ for i in range(n):
399
+ ins_a[i] = initial[i]
400
+ tot_in[i] = initial[i]
401
+ tot_out[i] = 0.0
402
+
403
+ @ti.kernel
404
+ def compute_outgoing_step(ins: ti.template(), out: ti.template(), alb: ti.template(), tot_out: ti.template(), n: ti.i32):
405
+ for i in range(n):
406
+ o = alb[i] * ins[i]
407
+ out[i] = o
408
+ tot_out[i] += o
409
+
410
+ @ti.kernel
411
+ def reset_buffer(buf: ti.template(), n: ti.i32):
412
+ for i in range(n):
413
+ buf[i] = 0.0
414
+
415
+ @ti.kernel
416
+ def sparse_matmul_step(
417
+ out: ti.template(),
418
+ ins_next: ti.template(),
419
+ src: ti.template(),
420
+ tgt: ti.template(),
421
+ vf: ti.template(),
422
+ trans: ti.template(),
423
+ nnz: ti.i32
424
+ ):
425
+ for idx in range(nnz):
426
+ s = src[idx]
427
+ t = tgt[idx]
428
+ v = vf[idx]
429
+ tr = trans[idx]
430
+ o = out[s]
431
+ if o > 0.01:
432
+ ti.atomic_add(ins_next[t], o * v * tr)
433
+
434
+ @ti.kernel
435
+ def scale_and_accumulate(ins: ti.template(), svf_arr: ti.template(), tot_in: ti.template(), n: ti.i32):
436
+ for i in range(n):
437
+ urban_vf = 1.0 - svf_arr[i]
438
+ if urban_vf < 0.01:
439
+ ins[i] = 0.0
440
+ else:
441
+ ins[i] *= urban_vf
442
+ tot_in[i] += ins[i]
443
+
444
+ # Warmup run
445
+ init_step(initial_sw, surfins_a, total_incoming, total_outgoing, n_surfaces)
446
+ for step in range(n_ref_steps):
447
+ if step % 2 == 0:
448
+ compute_outgoing_step(surfins_a, surfout, albedo, total_outgoing, n_surfaces)
449
+ reset_buffer(surfins_b, n_surfaces)
450
+ sparse_matmul_step(surfout, surfins_b, svf_source, svf_target, svf_vf, svf_trans, svf_nnz)
451
+ scale_and_accumulate(surfins_b, svf, total_incoming, n_surfaces)
452
+ else:
453
+ compute_outgoing_step(surfins_b, surfout, albedo, total_outgoing, n_surfaces)
454
+ reset_buffer(surfins_a, n_surfaces)
455
+ sparse_matmul_step(surfout, surfins_a, svf_source, svf_target, svf_vf, svf_trans, svf_nnz)
456
+ scale_and_accumulate(surfins_a, svf, total_incoming, n_surfaces)
457
+ ti.sync()
458
+
459
+ # Benchmark with separate kernel launches (like current implementation)
460
+ print(f"\nBenchmarking SEPARATE KERNELS ({n_ref_steps} reflection steps)...")
461
+ n_iterations = 20
462
+ times_separate = []
463
+
464
+ for i in range(n_iterations):
465
+ t0 = time.perf_counter()
466
+ init_step(initial_sw, surfins_a, total_incoming, total_outgoing, n_surfaces)
467
+ for step in range(n_ref_steps):
468
+ if step % 2 == 0:
469
+ compute_outgoing_step(surfins_a, surfout, albedo, total_outgoing, n_surfaces)
470
+ reset_buffer(surfins_b, n_surfaces)
471
+ sparse_matmul_step(surfout, surfins_b, svf_source, svf_target, svf_vf, svf_trans, svf_nnz)
472
+ scale_and_accumulate(surfins_b, svf, total_incoming, n_surfaces)
473
+ else:
474
+ compute_outgoing_step(surfins_b, surfout, albedo, total_outgoing, n_surfaces)
475
+ reset_buffer(surfins_a, n_surfaces)
476
+ sparse_matmul_step(surfout, surfins_a, svf_source, svf_target, svf_vf, svf_trans, svf_nnz)
477
+ scale_and_accumulate(surfins_a, svf, total_incoming, n_surfaces)
478
+ ti.sync()
479
+ times_separate.append(time.perf_counter() - t0)
480
+
481
+ mean_sep = np.mean(times_separate) * 1000
482
+ min_sep = np.min(times_separate) * 1000
483
+ print(f" Mean time: {mean_sep:.2f}ms")
484
+ print(f" Min time: {min_sep:.2f}ms")
485
+
486
+ # Compare with fused version
487
+ print(f"\nBenchmarking FUSED KERNEL ({n_ref_steps} reflection steps)...")
488
+ solver = OptimizedReflectionSolver(n_surfaces, svf_nnz, n_ref_steps)
489
+
490
+ # Warmup fused
491
+ solver.solve_reflections_fused(
492
+ initial_sw, albedo, svf,
493
+ svf_source, svf_target, svf_vf, svf_trans,
494
+ svf_nnz, n_ref_steps
495
+ )
496
+ ti.sync()
497
+
498
+ times_fused = []
499
+ for i in range(n_iterations):
500
+ t0 = time.perf_counter()
501
+ solver.solve_reflections_fused(
502
+ initial_sw, albedo, svf,
503
+ svf_source, svf_target, svf_vf, svf_trans,
504
+ svf_nnz, n_ref_steps
505
+ )
506
+ ti.sync()
507
+ times_fused.append(time.perf_counter() - t0)
508
+
509
+ mean_fused = np.mean(times_fused) * 1000
510
+ min_fused = np.min(times_fused) * 1000
511
+ print(f" Mean time: {mean_fused:.2f}ms")
512
+ print(f" Min time: {min_fused:.2f}ms")
513
+
514
+ print(f"\n Surfaces: {n_surfaces}, SVF entries: {svf_nnz}")
515
+ print(f"\n Comparison: Separate={min_sep:.2f}ms, Fused={min_fused:.2f}ms")
516
+ if min_fused < min_sep:
517
+ print(f" Fused is {min_sep/min_fused:.2f}x faster")
518
+ else:
519
+ print(f" Separate is {min_fused/min_sep:.2f}x faster")
520
+
521
+ return times_separate, times_fused
522
+
523
+
524
+ if __name__ == "__main__":
525
+ # Test with GPU
526
+ print("="*60)
527
+ print("Testing Reflection Solver on GPU")
528
+ print("="*60)
529
+ ti.init(arch=ti.gpu, default_fp=ti.f32)
530
+ gpu_times = benchmark_reflections()
531
+
532
+ # Note: Can't reinitialize Taichi in same process for CPU comparison
533
+ print("\nNote: To compare with CPU, run with ti.init(arch=ti.cpu)")