voxcity 0.7.0__py3-none-any.whl → 1.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. voxcity/__init__.py +14 -14
  2. voxcity/downloader/ocean.py +559 -0
  3. voxcity/exporter/__init__.py +12 -12
  4. voxcity/exporter/cityles.py +633 -633
  5. voxcity/exporter/envimet.py +733 -728
  6. voxcity/exporter/magicavoxel.py +333 -333
  7. voxcity/exporter/netcdf.py +238 -238
  8. voxcity/exporter/obj.py +1480 -1480
  9. voxcity/generator/__init__.py +47 -44
  10. voxcity/generator/api.py +727 -675
  11. voxcity/generator/grids.py +394 -379
  12. voxcity/generator/io.py +94 -94
  13. voxcity/generator/pipeline.py +582 -282
  14. voxcity/generator/update.py +429 -0
  15. voxcity/generator/voxelizer.py +18 -6
  16. voxcity/geoprocessor/__init__.py +75 -75
  17. voxcity/geoprocessor/draw.py +1494 -1219
  18. voxcity/geoprocessor/merge_utils.py +91 -91
  19. voxcity/geoprocessor/mesh.py +806 -806
  20. voxcity/geoprocessor/network.py +708 -708
  21. voxcity/geoprocessor/raster/__init__.py +2 -0
  22. voxcity/geoprocessor/raster/buildings.py +435 -428
  23. voxcity/geoprocessor/raster/core.py +31 -0
  24. voxcity/geoprocessor/raster/export.py +93 -93
  25. voxcity/geoprocessor/raster/landcover.py +178 -51
  26. voxcity/geoprocessor/raster/raster.py +1 -1
  27. voxcity/geoprocessor/utils.py +824 -824
  28. voxcity/models.py +115 -113
  29. voxcity/simulator/solar/__init__.py +66 -43
  30. voxcity/simulator/solar/integration.py +336 -336
  31. voxcity/simulator/solar/sky.py +668 -0
  32. voxcity/simulator/solar/temporal.py +792 -434
  33. voxcity/simulator_gpu/__init__.py +115 -0
  34. voxcity/simulator_gpu/common/__init__.py +9 -0
  35. voxcity/simulator_gpu/common/geometry.py +11 -0
  36. voxcity/simulator_gpu/core.py +322 -0
  37. voxcity/simulator_gpu/domain.py +262 -0
  38. voxcity/simulator_gpu/environment.yml +11 -0
  39. voxcity/simulator_gpu/init_taichi.py +154 -0
  40. voxcity/simulator_gpu/integration.py +15 -0
  41. voxcity/simulator_gpu/kernels.py +56 -0
  42. voxcity/simulator_gpu/radiation.py +28 -0
  43. voxcity/simulator_gpu/raytracing.py +623 -0
  44. voxcity/simulator_gpu/sky.py +9 -0
  45. voxcity/simulator_gpu/solar/__init__.py +178 -0
  46. voxcity/simulator_gpu/solar/core.py +66 -0
  47. voxcity/simulator_gpu/solar/csf.py +1249 -0
  48. voxcity/simulator_gpu/solar/domain.py +561 -0
  49. voxcity/simulator_gpu/solar/epw.py +421 -0
  50. voxcity/simulator_gpu/solar/integration.py +2953 -0
  51. voxcity/simulator_gpu/solar/radiation.py +3019 -0
  52. voxcity/simulator_gpu/solar/raytracing.py +686 -0
  53. voxcity/simulator_gpu/solar/reflection.py +533 -0
  54. voxcity/simulator_gpu/solar/sky.py +907 -0
  55. voxcity/simulator_gpu/solar/solar.py +337 -0
  56. voxcity/simulator_gpu/solar/svf.py +446 -0
  57. voxcity/simulator_gpu/solar/volumetric.py +1151 -0
  58. voxcity/simulator_gpu/solar/voxcity.py +2953 -0
  59. voxcity/simulator_gpu/temporal.py +13 -0
  60. voxcity/simulator_gpu/utils.py +25 -0
  61. voxcity/simulator_gpu/view.py +32 -0
  62. voxcity/simulator_gpu/visibility/__init__.py +109 -0
  63. voxcity/simulator_gpu/visibility/geometry.py +278 -0
  64. voxcity/simulator_gpu/visibility/integration.py +808 -0
  65. voxcity/simulator_gpu/visibility/landmark.py +753 -0
  66. voxcity/simulator_gpu/visibility/view.py +944 -0
  67. voxcity/utils/__init__.py +11 -0
  68. voxcity/utils/classes.py +194 -0
  69. voxcity/utils/lc.py +80 -39
  70. voxcity/utils/shape.py +230 -0
  71. voxcity/visualizer/__init__.py +24 -24
  72. voxcity/visualizer/builder.py +43 -43
  73. voxcity/visualizer/grids.py +141 -141
  74. voxcity/visualizer/maps.py +187 -187
  75. voxcity/visualizer/renderer.py +1146 -928
  76. {voxcity-0.7.0.dist-info → voxcity-1.0.13.dist-info}/METADATA +56 -52
  77. voxcity-1.0.13.dist-info/RECORD +116 -0
  78. voxcity-0.7.0.dist-info/RECORD +0 -77
  79. {voxcity-0.7.0.dist-info → voxcity-1.0.13.dist-info}/WHEEL +0 -0
  80. {voxcity-0.7.0.dist-info → voxcity-1.0.13.dist-info}/licenses/AUTHORS.rst +0 -0
  81. {voxcity-0.7.0.dist-info → voxcity-1.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,3019 @@
1
+ """Radiation solver for palm-solar.
2
+
3
+ Main module that integrates all components to compute
4
+ shortwave (solar) radiation fluxes with multi-bounce reflections
5
+ following PALM's RTM (Radiative Transfer Model) methodology.
6
+
7
+ PALM Alignment Notes:
8
+ - Solar position: Uses PALM's calc_zenith formula exactly (solar.py)
9
+ - SVF calculation: Uses PALM's vffrac_up formula with proper weighting (svf.py)
10
+ - Reflection steps: Default nrefsteps=3 matches PALM
11
+ - Extinction coefficient: Default ext_coef=0.6 matches PALM
12
+ - Beer-Lambert law: Same exponential attenuation through canopy
13
+ - Direction indices: IUP=0, IDOWN=1, etc. match PALM convention
14
+
15
+ Key differences from PALM:
16
+ - GPU-accelerated via Taichi (PALM uses Fortran+MPI)
17
+ - Real-time view factor computation (PALM pre-computes sparse matrix)
18
+ - Shortwave only (PALM includes longwave radiation)
19
+ - Axis-aligned surfaces only (PALM supports slant surfaces)
20
+
21
+ Input convention:
22
+ - sw_direct: Direct Normal Irradiance (DNI) in W/m²
23
+ - sw_diffuse: Diffuse Horizontal Irradiance (DHI) in W/m²
24
+ """
25
+
26
+ import taichi as ti
27
+ import numpy as np
28
+ from typing import Optional, Tuple
29
+ from dataclasses import dataclass, field
30
+
31
+ from .core import (
32
+ Vector3, Point3, SOLAR_CONSTANT, EXT_COEF,
33
+ PI, TWO_PI
34
+ )
35
+ from .domain import Domain, Surfaces, extract_surfaces_from_domain
36
+ from .solar import SolarPosition, calc_zenith, SolarCalculator
37
+ from .raytracing import RayTracer, ray_point_to_point_transmissivity, ray_voxel_first_hit
38
+ from .svf import SVFCalculator
39
+ from .csf import CSFCalculator
40
+ from .volumetric import VolumetricFluxCalculator
41
+
42
+
43
+ # Direction indices (matching PALM convention from radiation_model_mod.f90)
44
+ # PALM: iup=0, idown=1, inorth=2, isouth=3, ieast=4, iwest=5
45
+ IUP = 0
46
+ IDOWN = 1
47
+ INORTH = 2
48
+ ISOUTH = 3
49
+ IEAST = 4
50
+ IWEST = 5
51
+
52
+
53
+ @dataclass
54
+ class RadiationConfig:
55
+ """
56
+ Configuration for radiation model.
57
+
58
+ Attributes:
59
+ albedo_ground: Default ground albedo
60
+ albedo_wall: Default wall albedo
61
+ albedo_roof: Default roof albedo
62
+ albedo_leaf: Tree/leaf albedo (PALM default: 0.15)
63
+ n_azimuth: Number of azimuthal divisions for SVF
64
+ n_elevation: Number of elevation divisions for SVF
65
+ ext_coef: Extinction coefficient for canopy
66
+ skip_svf: Skip SVF calculation (use 1.0)
67
+ n_reflection_steps: Number of reflection iterations (PALM default: 3)
68
+ surface_reflections: Enable surface-to-surface reflections
69
+ canopy_reflections: Enable reflection attenuation through canopy
70
+ volumetric_flux: Enable volumetric flux calculation
71
+ volumetric_n_azimuth: Number of azimuths for volumetric horizon tracing
72
+ min_opaque_lad: Minimum LAD considered opaque for volumetric shadows
73
+ canopy_radiation: Enable plant canopy radiation absorption (CSF)
74
+ canopy_to_canopy: Enable canopy-to-canopy scattering (not in PALM,
75
+ improves accuracy for dense vegetation where leaves scatter light
76
+ to neighboring leaves)
77
+ """
78
+ albedo_ground: float = 0.2
79
+ albedo_wall: float = 0.3
80
+ albedo_roof: float = 0.3
81
+ albedo_leaf: float = 0.15 # PALM default tree albedo
82
+ n_azimuth: int = 80 # PALM: raytrace_discrete_azims = 80
83
+ n_elevation: int = 40 # PALM: raytrace_discrete_elevs = 40
84
+ ext_coef: float = EXT_COEF # PALM: ext_coef = 0.6
85
+ skip_svf: bool = False
86
+ n_reflection_steps: int = 3 # PALM: nrefsteps = 3
87
+ surface_reflections: bool = True
88
+ canopy_reflections: bool = True # Enable LAD attenuation in reflections
89
+ volumetric_flux: bool = False
90
+ volumetric_n_azimuth: int = 36
91
+ min_opaque_lad: float = 0.5
92
+ canopy_radiation: bool = True # Enable CSF-based canopy absorption
93
+ canopy_to_canopy: bool = True # Enable canopy-to-canopy scattering (improves accuracy in dense vegetation)
94
+ # SVF matrix caching for multi-timestep efficiency (PALM-like approach)
95
+ cache_svf_matrix: bool = True # Pre-compute SVF matrix for fast reflections
96
+ svf_min_threshold: float = 0.01 # Minimum VF to store (sparsity threshold, 0.01 sufficient for 1% accuracy)
97
+ # FP16 optimization for intermediate calculations (reduces memory bandwidth, ~2x faster)
98
+ # fp16 range: ±65,504 with ~3 decimal digits precision - sufficient for W/m² irradiance values
99
+ use_fp16_intermediate: bool = True # Use float16 for intermediate reflection buffers
100
+
101
+
102
+ @ti.data_oriented
103
+ class RadiationModel:
104
+ """
105
+ GPU-accelerated solar radiation transfer model.
106
+
107
+ Computes shortwave (direct and diffuse) radiation
108
+ for all surface elements in the domain.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ domain: Domain,
114
+ config: Optional[RadiationConfig] = None
115
+ ):
116
+ """
117
+ Initialize radiation model.
118
+
119
+ Args:
120
+ domain: Domain object with geometry
121
+ config: Radiation configuration (uses defaults if None)
122
+ """
123
+ self.domain = domain
124
+ self.config = config or RadiationConfig()
125
+
126
+ # Extract surfaces from domain
127
+ print("Extracting surfaces from domain...")
128
+ self.surfaces = extract_surfaces_from_domain(domain)
129
+ self.n_surfaces = self.surfaces.count
130
+ print(f"Found {self.n_surfaces} surface elements")
131
+
132
+ # Initialize sub-components
133
+ self.solar_calc = SolarCalculator(
134
+ domain.origin_lat or 0.0,
135
+ domain.origin_lon or 0.0
136
+ )
137
+
138
+ self.ray_tracer = RayTracer(domain)
139
+ self.ray_tracer.ext_coef = self.config.ext_coef
140
+
141
+ self.svf_calc = SVFCalculator(
142
+ domain,
143
+ self.config.n_azimuth,
144
+ self.config.n_elevation
145
+ )
146
+
147
+ self.csf_calc = CSFCalculator(
148
+ domain,
149
+ self.config.n_azimuth,
150
+ self.config.n_elevation
151
+ )
152
+
153
+ # Set default surface properties based on direction
154
+ self._set_default_properties()
155
+
156
+ # Determine dtype for intermediate buffers (fp16 reduces memory bandwidth ~2x)
157
+ # fp16: range ±65,504, ~3 decimal digits - sufficient for W/m² irradiance (0-1400 range)
158
+ inter_dtype = ti.f16 if config.use_fp16_intermediate else ti.f32
159
+
160
+ # Allocate arrays for multi-bounce reflections
161
+ # These store radiation fluxes during reflection iterations
162
+ self._surfins = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,)) # Incoming SW per reflection step
163
+ self._surfinl = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,)) # Incoming LW per reflection step
164
+ self._surfoutsl = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,)) # Outgoing SW per reflection step
165
+ self._surfoutll = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,)) # Outgoing LW per reflection step
166
+
167
+ # Ping-pong buffers for optimized reflection iterations
168
+ # Using separate kernels without internal ti.sync() is ~100x faster than fused kernels
169
+ self._surfins_ping = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,))
170
+ self._surfins_pong = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,))
171
+
172
+ # Total accumulated radiation - keep as f32 for precision in cumulative sums
173
+ self._surfinsw = ti.field(dtype=ti.f32, shape=(self.n_surfaces,)) # Total incoming SW
174
+ self._surfinlw = ti.field(dtype=ti.f32, shape=(self.n_surfaces,)) # Total incoming LW
175
+ self._surfoutsw = ti.field(dtype=ti.f32, shape=(self.n_surfaces,)) # Total outgoing SW
176
+ self._surfoutlw = ti.field(dtype=ti.f32, shape=(self.n_surfaces,)) # Total outgoing LW
177
+
178
+ # Direct and diffuse components - keep as f32 for final output precision
179
+ self._surfinswdir = ti.field(dtype=ti.f32, shape=(self.n_surfaces,)) # Direct SW
180
+ self._surfinswdif = ti.field(dtype=ti.f32, shape=(self.n_surfaces,)) # Diffuse SW from sky
181
+
182
+ # For optimized reflection computation - store weighted totals
183
+ self._total_reflected_flux = ti.field(dtype=ti.f32, shape=()) # Sum of (surfout * area)
184
+ self._total_reflecting_area = ti.field(dtype=ti.f32, shape=()) # Sum of areas
185
+
186
+ # Surface-to-surface view factors (sparse representation)
187
+ # svf_matrix[i, j] = view factor from surface j to surface i
188
+ # For efficiency, we'll compute this on-demand during reflections
189
+ self._svf_matrix_computed = False
190
+
191
+ # SVF computed flag
192
+ self._svf_computed = False
193
+
194
+ # Initialize volumetric flux calculator if enabled
195
+ self.volumetric_calc = None
196
+ if self.config.volumetric_flux:
197
+ self.volumetric_calc = VolumetricFluxCalculator(
198
+ domain,
199
+ n_azimuth=self.config.volumetric_n_azimuth,
200
+ min_opaque_lad=self.config.min_opaque_lad
201
+ )
202
+
203
+ # Plant canopy absorption arrays (like PALM's pcbinsw, etc.)
204
+ # Indexed by (i, j, k) grid coordinates
205
+ # Units: W/m³ (power absorbed per unit volume)
206
+ # Total arrays use f32 for precision in cumulative sums
207
+ self._pcbinsw = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Total absorbed SW
208
+ self._pcbinswdir = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Direct SW absorbed
209
+ self._pcbinswdif = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Diffuse SW absorbed
210
+
211
+ # Received radiation (before absorption) in W/m²
212
+ self._pcinsw = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Total received SW
213
+ self._pcinswdir = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Direct SW received
214
+ self._pcinswdif = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Diffuse SW received
215
+
216
+ # Canopy scattered/reflected radiation (W/m³) - intermediate uses inter_dtype
217
+ self._pcbinswref = ti.field(dtype=inter_dtype, shape=(domain.nx, domain.ny, domain.nz)) # Reflected SW from canopy
218
+
219
+ # Canopy-to-canopy scattering contribution (W/m³)
220
+ # Stores radiation scattered from other canopy cells to this cell (per iteration)
221
+ self._pcbinswc2c = ti.field(dtype=inter_dtype, shape=(domain.nx, domain.ny, domain.nz)) # Canopy-to-canopy SW (temp)
222
+ # Cumulative total of c2c contribution (for output) - f32 for precision
223
+ self._pcbinswc2c_total = ti.field(dtype=ti.f32, shape=(domain.nx, domain.ny, domain.nz)) # Canopy-to-canopy SW (total)
224
+
225
+ # Canopy-to-surface contribution per reflection step - intermediate
226
+ self._surfinswpc = ti.field(dtype=inter_dtype, shape=(self.n_surfaces,)) # SW from plant canopy
227
+
228
+ # ========== SVF Matrix Caching (PALM-like approach) ==========
229
+ # For multi-timestep efficiency, pre-compute surface-to-surface view factors
230
+ # Stored in sparse COO format: (source_idx, target_idx, vf_value, transmissivity)
231
+ # This makes reflection iterations O(nnz) instead of O(n²)
232
+ self._svf_matrix_cached = False
233
+ self._svf_nnz = 0 # Number of non-zero entries
234
+
235
+ # Estimate max non-zeros based on threshold:
236
+ # With svf_min_threshold=0.01, ~99.7% of entries are filtered out
237
+ # For 250k surfaces: from 585M entries down to ~1.8M entries
238
+ #
239
+ # Memory per entry: 12 bytes (2 int32 + 2 float16)
240
+ # At 0.01 threshold: ~22 MB for 1.8M entries
241
+ # At 0.001 threshold: ~260 MB for 21M entries
242
+ # At 0.0001 threshold: ~1.8 GB for 148M entries
243
+ #
244
+ # Estimate based on threshold (empirically derived from 250k surface domain):
245
+ threshold = config.svf_min_threshold
246
+ if threshold >= 0.01:
247
+ entries_per_surface = 8 # ~0.3% of 2500 avg neighbors
248
+ elif threshold >= 0.001:
249
+ entries_per_surface = 85 # ~3.5% of avg neighbors
250
+ elif threshold >= 0.0001:
251
+ entries_per_surface = 600 # ~25% of avg neighbors
252
+ else:
253
+ entries_per_surface = 2500 # Full density
254
+
255
+ estimated = self.n_surfaces * entries_per_surface
256
+
257
+ # For small domains, allow full N*N
258
+ if self.n_surfaces < 5000:
259
+ self._max_svf_entries = min(self.n_surfaces * self.n_surfaces, estimated * 2)
260
+ else:
261
+ # Add 50% buffer for safety
262
+ self._max_svf_entries = int(estimated * 1.5)
263
+
264
+ # Sanity check against GPU memory (each entry = 12 bytes with f16)
265
+ try:
266
+ import subprocess
267
+ result = subprocess.run(
268
+ ['nvidia-smi', '--query-gpu=memory.free', '--format=csv,noheader,nounits'],
269
+ capture_output=True, text=True, timeout=5
270
+ )
271
+ if result.returncode == 0:
272
+ free_mb = int(result.stdout.strip().split('\n')[0])
273
+ # Use up to 50% of free memory for SVF matrix
274
+ available_bytes = free_mb * 1024 * 1024 * 0.5
275
+ # Each entry = 12 bytes (2 int32 + 2 float16)
276
+ memory_based_limit = int(available_bytes / 12)
277
+
278
+ if self._max_svf_entries > memory_based_limit:
279
+ import warnings
280
+ warnings.warn(
281
+ f"SVF buffer limited to {memory_based_limit:,} entries due to GPU memory. "
282
+ f"Estimated {self._max_svf_entries:,} entries needed.",
283
+ RuntimeWarning
284
+ )
285
+ self._max_svf_entries = memory_based_limit
286
+ except Exception:
287
+ pass # Keep estimated size
288
+
289
+ # Pre-allocate sparse COO arrays upfront to avoid CUDA issues with dynamic allocation
290
+ # These are allocated during __init__ to ensure proper CUDA memory management
291
+ if config.cache_svf_matrix and config.surface_reflections:
292
+ self._svf_source = ti.field(dtype=ti.i32, shape=(self._max_svf_entries,))
293
+ self._svf_target = ti.field(dtype=ti.i32, shape=(self._max_svf_entries,))
294
+ # Use float16 to reduce memory by 50% (sufficient for VF accuracy ~0.01)
295
+ self._svf_vf = ti.field(dtype=ti.f16, shape=(self._max_svf_entries,))
296
+ self._svf_trans = ti.field(dtype=ti.f16, shape=(self._max_svf_entries,))
297
+ self._svf_count = ti.field(dtype=ti.i32, shape=())
298
+
299
+ # CSR format arrays for optimized sparse matmul
300
+ # NOTE: CSR is currently disabled because:
301
+ # 1. COO with modern GPU atomics is actually faster for this workload
302
+ # 2. CSR building from numpy adds significant overhead (12+ seconds)
303
+ # 3. CSR suffers from load imbalance (row sizes vary from 33 to 4789)
304
+ # Keep the arrays allocated for potential future use with GPU-based sorting
305
+ self._svf_csr_row_ptr = None # Disabled
306
+ self._svf_csr_col_idx = None # Disabled
307
+ self._svf_csr_val = None # Disabled
308
+ self._svf_csr_ready = False
309
+ else:
310
+ self._svf_source = None
311
+ self._svf_target = None
312
+ self._svf_vf = None
313
+ self._svf_trans = None
314
+ self._svf_count = None
315
+ self._svf_csr_row_ptr = None
316
+ self._svf_csr_col_idx = None
317
+ self._svf_csr_val = None
318
+ self._svf_csr_ready = False
319
+
320
+ # ========== CSF Matrix Caching (Canopy-Surface Factors) ==========
321
+ # For efficient canopy-surface interactions during reflections
322
+ # Stored in sparse COO format: (canopy_idx, surface_idx, csf_value)
323
+ self._csf_matrix_cached = False
324
+ self._csf_nnz = 0
325
+
326
+ # Estimate max non-zeros for CSF
327
+ # Assume each canopy cell sees ~100 surfaces on average
328
+ # This is a rough heuristic
329
+ self._max_csf_entries = 10_000_000 # 10M entries ~ 120MB
330
+
331
+ if config.cache_svf_matrix and config.canopy_radiation and domain.lad is not None:
332
+ self._csf_canopy_idx = ti.field(dtype=ti.i32, shape=(self._max_csf_entries,))
333
+ self._csf_surface_idx = ti.field(dtype=ti.i32, shape=(self._max_csf_entries,))
334
+ # Use float16 for CSF values (sufficient for ~0.01 accuracy)
335
+ self._csf_val = ti.field(dtype=ti.f16, shape=(self._max_csf_entries,))
336
+ self._csf_count = ti.field(dtype=ti.i32, shape=())
337
+
338
+ # Lookup table for surface index from grid position and direction
339
+ # (nx, ny, nz, 6) -> surface_index
340
+ self._grid_to_surf = ti.field(dtype=ti.i32, shape=(domain.nx, domain.ny, domain.nz, 6))
341
+ else:
342
+ self._csf_canopy_idx = None
343
+ self._csf_surface_idx = None
344
+ self._csf_val = None
345
+ self._csf_count = None
346
+ self._grid_to_surf = None
347
+
348
+ # Canopy radiation computed flag
349
+ self._canopy_radiation_computed = False
350
+
351
+ def _set_default_properties(self):
352
+ """Set default albedo for surfaces."""
353
+ self._set_defaults_kernel(
354
+ self.surfaces.direction,
355
+ self.surfaces.albedo,
356
+ self.n_surfaces,
357
+ self.config.albedo_ground,
358
+ self.config.albedo_wall,
359
+ self.config.albedo_roof
360
+ )
361
+
362
+ @ti.kernel
363
+ def _set_defaults_kernel(
364
+ self,
365
+ direction: ti.template(),
366
+ albedo: ti.template(),
367
+ n_surf: ti.i32,
368
+ alb_ground: ti.f32,
369
+ alb_wall: ti.f32,
370
+ alb_roof: ti.f32
371
+ ):
372
+ for i in range(n_surf):
373
+ d = direction[i]
374
+ if d == 0: # Up (roof or ground)
375
+ albedo[i] = alb_roof
376
+ elif d == 1: # Down (typically building underside)
377
+ albedo[i] = alb_ground
378
+ else: # Walls
379
+ albedo[i] = alb_wall
380
+
381
+ def compute_svf(self):
382
+ """
383
+ Compute Sky View Factors for all surfaces.
384
+
385
+ This is computationally expensive, call once per domain setup.
386
+ """
387
+ if self.config.skip_svf:
388
+ # Set SVF to 1.0 for all surfaces
389
+ self._set_svf_one()
390
+ self._svf_computed = True
391
+ return
392
+
393
+ print("Computing per-surface Sky View Factors (for diffuse sky radiation)...")
394
+
395
+ if self.domain.lad is not None:
396
+ self.svf_calc.compute_svf_with_canopy(
397
+ self.surfaces.center, # Use world coordinates, not grid indices
398
+ self.surfaces.direction,
399
+ self.domain.is_solid,
400
+ self.domain.lad,
401
+ self.n_surfaces,
402
+ self.config.ext_coef,
403
+ self.surfaces.svf,
404
+ self.surfaces.svf_urban
405
+ )
406
+ else:
407
+ self.svf_calc.compute_svf(
408
+ self.surfaces.center, # Use world coordinates, not grid indices
409
+ self.surfaces.direction,
410
+ self.domain.is_solid,
411
+ self.n_surfaces,
412
+ self.surfaces.svf
413
+ )
414
+ # Copy to svf_urban
415
+ self._copy_svf()
416
+
417
+ self._svf_computed = True
418
+ print("Per-surface SVF complete.")
419
+
420
+ # Pre-compute SVF matrix for efficient multi-timestep reflections
421
+ if self.config.cache_svf_matrix and self.config.surface_reflections:
422
+ self.compute_svf_matrix()
423
+
424
+ # Also compute CSF matrix if canopy is present
425
+ if self.config.canopy_radiation and self.domain.lad is not None:
426
+ self.compute_csf_matrix()
427
+
428
+ # Pre-compute CSF sky for efficient multi-timestep canopy absorption
429
+ # CSF sky is geometry-dependent only and doesn't change with sun position
430
+ # Only needed when reflections are enabled (for canopy-surface interactions)
431
+ if self.config.canopy_radiation and self.domain.lad is not None:
432
+ print("Pre-computing CSF sky (geometry-dependent, computed once)...")
433
+ self.csf_calc.compute_csf_sky_cached(
434
+ self.domain.is_solid,
435
+ self.domain.lad,
436
+ self.config.n_azimuth,
437
+ self.config.n_elevation
438
+ )
439
+ print("CSF sky computation complete.")
440
+
441
+ @ti.kernel
442
+ def _set_svf_one(self):
443
+ for i in range(self.n_surfaces):
444
+ self.surfaces.svf[i] = 1.0
445
+ self.surfaces.svf_urban[i] = 1.0
446
+
447
+ @ti.kernel
448
+ def _copy_svf(self):
449
+ for i in range(self.n_surfaces):
450
+ self.surfaces.svf_urban[i] = self.surfaces.svf[i]
451
+
452
+ def update_solar_position(self, day_of_year: int, second_of_day: float):
453
+ """
454
+ Update solar position.
455
+
456
+ Args:
457
+ day_of_year: Day number (1-365)
458
+ second_of_day: Seconds since midnight UTC
459
+ """
460
+ self.solar_calc.update(day_of_year, second_of_day)
461
+
462
+ def compute_shortwave_radiation(
463
+ self,
464
+ sw_direct: float,
465
+ sw_diffuse: float
466
+ ):
467
+ """
468
+ Compute shortwave (solar) radiation for all surfaces.
469
+
470
+ Args:
471
+ sw_direct: Direct normal irradiance (W/m²)
472
+ sw_diffuse: Diffuse horizontal irradiance (W/m²)
473
+ """
474
+ if not self._svf_computed:
475
+ print("Warning: SVF not computed, computing now...")
476
+ self.compute_svf()
477
+
478
+ # Get sun direction
479
+ sun_dir = self.solar_calc.sun_direction[None]
480
+ cos_zenith = self.solar_calc.cos_zenith[None]
481
+
482
+ if cos_zenith > 0:
483
+ # Compute direct shadows
484
+ if self.domain.lad is not None:
485
+ self.ray_tracer.compute_direct_with_canopy(
486
+ self.surfaces.center, # Use world coordinates, not grid indices
487
+ self.surfaces.direction,
488
+ sun_dir,
489
+ self.domain.is_solid,
490
+ self.domain.lad,
491
+ self.n_surfaces,
492
+ self.surfaces.shadow_factor,
493
+ self.surfaces.canopy_transmissivity
494
+ )
495
+ else:
496
+ self.ray_tracer.compute_direct_shadows(
497
+ self.surfaces.center, # Use world coordinates, not grid indices
498
+ self.surfaces.direction,
499
+ sun_dir,
500
+ self.domain.is_solid,
501
+ self.n_surfaces,
502
+ self.surfaces.shadow_factor
503
+ )
504
+ # Set canopy transmissivity to 1 (no canopy)
505
+ self._set_canopy_trans_one()
506
+ else:
507
+ # Night time - no direct radiation
508
+ self._clear_direct_radiation()
509
+
510
+ # Compute radiation fluxes with unified reflection loop
511
+ # This now includes all paths: Surface↔Surface, Surface↔Canopy, Canopy↔Canopy
512
+ self._compute_sw_fluxes(
513
+ sw_direct,
514
+ sw_diffuse,
515
+ cos_zenith
516
+ )
517
+
518
+ # Compute plant canopy radiation absorption from direct/diffuse (CSF)
519
+ # Note: If canopy_reflections is enabled, initial absorption is computed inside
520
+ # _compute_sw_fluxes via _compute_canopy_radiation_initial, so we skip here.
521
+ # We only call _compute_canopy_radiation if NOT using unified reflection loop.
522
+ if self.config.canopy_radiation and self.domain.lad is not None:
523
+ if self.config.canopy_reflections:
524
+ # Unified reflection loop already computed initial absorption
525
+ # Just compute received radiation and mark as computed
526
+ grid_volume = self.domain.dx * self.domain.dy * self.domain.dz
527
+ self._compute_received_radiation(sw_direct, sw_diffuse, cos_zenith, grid_volume)
528
+ self._canopy_radiation_computed = True
529
+ else:
530
+ # Legacy path: compute canopy radiation the old way
531
+ self._compute_canopy_radiation(
532
+ sw_direct,
533
+ sw_diffuse,
534
+ sun_dir,
535
+ cos_zenith
536
+ )
537
+
538
+ # Compute volumetric fluxes if enabled
539
+ if self.volumetric_calc is not None:
540
+ sun_dir_tuple = (float(sun_dir[0]), float(sun_dir[1]), float(sun_dir[2]))
541
+ self.volumetric_calc.compute_swflux_vol(
542
+ sw_direct,
543
+ sw_diffuse,
544
+ cos_zenith,
545
+ sun_dir_tuple,
546
+ self.domain.lad # Pass LAD for attenuation
547
+ )
548
+
549
+ @ti.kernel
550
+ def _set_canopy_trans_one(self):
551
+ for i in range(self.n_surfaces):
552
+ self.surfaces.canopy_transmissivity[i] = 1.0
553
+
554
+ @ti.kernel
555
+ def _clear_direct_radiation(self):
556
+ for i in range(self.n_surfaces):
557
+ self.surfaces.shadow_factor[i] = 1.0
558
+ self.surfaces.canopy_transmissivity[i] = 0.0
559
+ self.surfaces.sw_in_direct[i] = 0.0
560
+
561
+ def _compute_sw_fluxes(
562
+ self,
563
+ sw_direct: float,
564
+ sw_diffuse: float,
565
+ cos_zenith: float
566
+ ):
567
+ """
568
+ Compute shortwave fluxes for all surfaces with multi-bounce reflections.
569
+
570
+ Following PALM's RTM methodology (radiation_model_mod.f90 lines ~9300-9500):
571
+
572
+ 1. Initial pass: direct + diffuse from sky
573
+ - PALM: surfinswdir = rad_sw_in_dir * surf_costh * dsitrans * sun_direct_factor
574
+ - PALM: surfinswdif = rad_sw_in_diff * skyvft
575
+
576
+ 2. Reflection loop (DO refstep = 1, nrefsteps):
577
+ - PALM: surfoutsl = albedo_surf * surfins
578
+ - PALM: surfins(isurf) += svf(1,isvf) * svf(2,isvf) * surfoutsl(isurfsrc)
579
+ - PALM: pcbinsw += csf * surfoutsl(isurfsrc) * asrc * grid_volume_inverse
580
+
581
+ 3. Accumulate totals:
582
+ - PALM: surfinsw = surfinsw + surfins
583
+ - PALM: surfoutsw = surfoutsw + surfoutsl
584
+ """
585
+ # Initialize all flux arrays
586
+ self._init_flux_arrays()
587
+ self._init_canopy_arrays()
588
+
589
+ # Compute initial (first pass) radiation: direct + diffuse from sky
590
+ self._compute_initial_sw_pass(sw_direct, sw_diffuse, cos_zenith)
591
+
592
+ # Compute initial canopy absorption from direct/diffuse BEFORE reflection loop
593
+ # This allows canopy scattering to participate in reflections
594
+ if self.domain.lad is not None and self.config.canopy_radiation:
595
+ sun_dir = self.solar_calc.sun_direction[None]
596
+ self._compute_canopy_radiation_initial(
597
+ sw_direct,
598
+ sw_diffuse,
599
+ sun_dir,
600
+ cos_zenith
601
+ )
602
+ # Set up initial scattered radiation for the reflection loop
603
+ if self.config.canopy_reflections:
604
+ self._update_canopy_scattered_radiation(
605
+ self.domain.lad,
606
+ self.config.albedo_leaf
607
+ )
608
+
609
+ # Multi-bounce reflections (PALM: DO refstep = 1, nrefsteps)
610
+ # Extended to include canopy scattering within the loop for full path coverage:
611
+ # - Surface → Surface
612
+ # - Surface → Canopy (absorption)
613
+ # - Canopy → Surface (scattering)
614
+ # - Canopy → Canopy (scattering)
615
+ # This captures all multi-bounce paths like: Canopy→Surface→Canopy, Surface→Canopy→Surface, etc.
616
+ if self.config.surface_reflections and self.config.n_reflection_steps > 0:
617
+ # Check if we have canopy - use optimized fast path if not
618
+ has_canopy = self.domain.lad is not None and self.config.canopy_radiation
619
+
620
+ # Determine which optimized path to use
621
+ canopy_ready = has_canopy and self._csf_matrix_cached and self._svf_matrix_cached
622
+ no_canopy_ready = not has_canopy and self._svf_matrix_cached
623
+
624
+ if no_canopy_ready:
625
+ # === OPTIMIZED FAST PATH (no canopy) ===
626
+ # Reduces kernel launches by fusing operations
627
+ self._run_optimized_reflection_loop(self.config.n_reflection_steps)
628
+ elif canopy_ready:
629
+ # === OPTIMIZED FAST PATH (with canopy) ===
630
+ # Uses sparse matrices for both SVF and CSF
631
+ self._run_optimized_reflection_loop_with_canopy(self.config.n_reflection_steps)
632
+ else:
633
+ # === STANDARD PATH (fallback) ===
634
+ for refstep in range(self.config.n_reflection_steps):
635
+ # PALM: surfoutsl = albedo_surf * surfins
636
+ # PALM: surfoutsw = surfoutsw + surfoutsl
637
+ # Fused: compute outgoing AND accumulate in one kernel
638
+ self._compute_outgoing_and_accumulate()
639
+
640
+ # PALM: pcbinsw += csf * surfoutsl(isurfsrc) * asrc * grid_volume_inverse
641
+ # Surface → Canopy: absorption from surface reflections
642
+ if has_canopy:
643
+ self._accumulate_canopy_absorption_from_reflections(
644
+ self.domain.lad,
645
+ self.domain.is_solid,
646
+ self.config.ext_coef,
647
+ self._pcbinsw
648
+ )
649
+
650
+ # PALM: surfins(isurf) += svf(1,isvf) * svf(2,isvf) * surfoutsl(isurfsrc)
651
+ # Surface → Surface reflections
652
+ self._compute_surface_reflections()
653
+
654
+ # === Canopy scattering within reflection loop ===
655
+ if self.domain.lad is not None and self.config.canopy_reflections:
656
+ self._update_canopy_scattered_radiation(
657
+ self.domain.lad,
658
+ self.config.albedo_leaf
659
+ )
660
+ self._compute_canopy_to_surface_scattering(
661
+ self.domain.lad,
662
+ self.domain.is_solid,
663
+ self.config.ext_coef
664
+ )
665
+ if self.config.canopy_to_canopy:
666
+ self._compute_canopy_to_canopy_scattering(
667
+ self.domain.lad,
668
+ self.domain.is_solid,
669
+ self.config.albedo_leaf,
670
+ self.config.ext_coef
671
+ )
672
+ self._accumulate_canopy_to_canopy(
673
+ self.domain.lad,
674
+ self.config.albedo_leaf
675
+ )
676
+
677
+ # PALM: surfinsw = surfinsw + surfins
678
+ self._accumulate_incoming()
679
+ else:
680
+ # No reflections - just compute single-bounce outgoing
681
+ self._compute_surface_outgoing_no_exchange()
682
+
683
+ # Note: Canopy scattering is applied AFTER canopy radiation is computed
684
+ # (see compute_shortwave_radiation which calls _apply_canopy_scattering after CSF)
685
+
686
+ # Copy final results to surface arrays
687
+ self._copy_final_fluxes()
688
+
689
+ @ti.kernel
690
+ def _init_flux_arrays(self):
691
+ """Initialize all flux arrays to zero."""
692
+ # Use ti.cast for fp16 fields to avoid precision loss warnings
693
+ zero_f16 = ti.cast(0.0, ti.f16)
694
+ for i in range(self.n_surfaces):
695
+ self._surfins[i] = zero_f16
696
+ self._surfinl[i] = zero_f16
697
+ self._surfoutsl[i] = zero_f16
698
+ self._surfoutll[i] = zero_f16
699
+ self._surfinsw[i] = 0.0
700
+ self._surfinlw[i] = 0.0
701
+ self._surfoutsw[i] = 0.0
702
+ self._surfoutlw[i] = 0.0
703
+ self._surfinswdir[i] = 0.0
704
+ self._surfinswdif[i] = 0.0
705
+ self._surfinswpc[i] = zero_f16 # From plant canopy
706
+
707
+ @ti.kernel
708
+ def _init_canopy_arrays(self):
709
+ """Initialize canopy radiation arrays to zero."""
710
+ zero_f16 = ti.cast(0.0, ti.f16)
711
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
712
+ self._pcbinswref[i, j, k] = zero_f16
713
+ self._pcbinswc2c[i, j, k] = zero_f16
714
+ self._pcbinswc2c_total[i, j, k] = 0.0
715
+
716
+ @ti.kernel
717
+ def _compute_initial_sw_pass(
718
+ self,
719
+ sw_direct: ti.f32,
720
+ sw_diffuse: ti.f32,
721
+ cos_zenith: ti.f32
722
+ ):
723
+ """
724
+ Compute initial radiation pass: direct solar + diffuse sky radiation.
725
+
726
+ This is the first pass before any surface reflections.
727
+ """
728
+ # Minimum stable cosine of zenith angle (PALM default)
729
+ min_stable_coszen = 0.0262
730
+
731
+ for i in range(self.n_surfaces):
732
+ direction = self.surfaces.direction[i]
733
+ svf = self.surfaces.svf[i]
734
+ shadow = self.surfaces.shadow_factor[i]
735
+ canopy_trans = self.surfaces.canopy_transmissivity[i]
736
+
737
+ # Get surface normal
738
+ normal = Vector3(0.0, 0.0, 0.0)
739
+ if direction == 0: # Up
740
+ normal = Vector3(0.0, 0.0, 1.0)
741
+ elif direction == 1: # Down
742
+ normal = Vector3(0.0, 0.0, -1.0)
743
+ elif direction == 2: # North
744
+ normal = Vector3(0.0, 1.0, 0.0)
745
+ elif direction == 3: # South
746
+ normal = Vector3(0.0, -1.0, 0.0)
747
+ elif direction == 4: # East
748
+ normal = Vector3(1.0, 0.0, 0.0)
749
+ elif direction == 5: # West
750
+ normal = Vector3(-1.0, 0.0, 0.0)
751
+
752
+ # Sun direction
753
+ sun_dir = self.solar_calc.sun_direction[None]
754
+
755
+ # Cosine of incidence angle (angle between sun and surface normal)
756
+ cos_incidence = (sun_dir[0] * normal[0] +
757
+ sun_dir[1] * normal[1] +
758
+ sun_dir[2] * normal[2])
759
+ cos_incidence = ti.max(0.0, cos_incidence)
760
+
761
+ # Direct radiation
762
+ # PALM formula: surfinswdir = rad_sw_in_dir * surf_costh * dsitrans * sun_direct_factor
763
+ # where sun_direct_factor = 1 / max(min_stable_coszen, cos_zenith)
764
+ #
765
+ # PALM's rad_sw_in_dir is Direct Horizontal Irradiance, so it multiplies by
766
+ # sun_direct_factor to convert to DNI, then by surf_costh for surface projection.
767
+ #
768
+ # palm_solar assumes sw_direct input is already DNI (Direct Normal Irradiance),
769
+ # so we only need: sw_direct * cos_incidence * canopy_trans
770
+ # This is equivalent to PALM when: sw_direct = rad_sw_in_dir * sun_direct_factor
771
+ sw_in_dir = 0.0
772
+ if cos_zenith > min_stable_coszen and shadow < 0.5:
773
+ sw_in_dir = sw_direct * cos_incidence * canopy_trans
774
+
775
+ # Diffuse radiation from sky (weighted by sky view factor)
776
+ # The input sw_diffuse is diffuse horizontal irradiance (DHI)
777
+ #
778
+ # For vertical surfaces, the SVF already incorporates the geometric
779
+ # factor that vertical walls can only see half the sky hemisphere.
780
+ # The SVF calculation uses view factor fractions weighted by cos(angle)
781
+ # and normalizes against the theoretical maximum for that surface type.
782
+ #
783
+ # Therefore: sw_diffuse * svf gives the correct diffuse irradiance
784
+ # for all surface orientations.
785
+
786
+ # Diffuse radiation
787
+ # PALM formula: surfinswdif = rad_sw_in_diff * skyvft
788
+ # where skyvft is the transmissivity-weighted sky view factor
789
+ # palm_solar's svf is equivalent to PALM's skyvft (computed in svf.py)
790
+ sw_in_dif = 0.0
791
+ if direction == 0: # Upward facing - full hemisphere (PALM: iup)
792
+ sw_in_dif = sw_diffuse * svf
793
+ elif direction == 1: # Downward facing - cannot see sky (PALM: idown)
794
+ # Downward surfaces face away from sky, receive no sky diffuse
795
+ sw_in_dif = 0.0
796
+ else: # Vertical walls (PALM: inorth, isouth, ieast, iwest)
797
+ sw_in_dif = sw_diffuse * svf
798
+
799
+ # Store initial pass results
800
+ self._surfinswdir[i] = sw_in_dir
801
+ self._surfinswdif[i] = sw_in_dif
802
+
803
+ # Initial incoming for reflection loop (cast to fp16)
804
+ self._surfins[i] = ti.cast(sw_in_dir + sw_in_dif, ti.f16)
805
+
806
+ # Accumulate to totals
807
+ self._surfinsw[i] = self._surfins[i]
808
+
809
+ @ti.kernel
810
+ def _compute_surface_outgoing(self):
811
+ """
812
+ Compute outgoing (reflected) radiation from each surface.
813
+
814
+ PALM formula: surfoutsl = albedo * surfins
815
+ """
816
+ for i in range(self.n_surfaces):
817
+ albedo = self.surfaces.albedo[i]
818
+ self._surfoutsl[i] = ti.cast(albedo * self._surfins[i], ti.f16)
819
+
820
+ @ti.kernel
821
+ def _compute_outgoing_and_accumulate(self):
822
+ """
823
+ Fused kernel: Compute outgoing AND accumulate to totals in one pass.
824
+
825
+ Combines _compute_surface_outgoing + _accumulate_outgoing.
826
+ Reduces kernel launch overhead by 50% for this operation.
827
+ """
828
+ for i in range(self.n_surfaces):
829
+ albedo = self.surfaces.albedo[i]
830
+ outgoing = albedo * self._surfins[i]
831
+ self._surfoutsl[i] = ti.cast(outgoing, ti.f16)
832
+ self._surfoutsw[i] += outgoing
833
+
834
+ @ti.kernel
835
+ def _compute_total_reflected(self):
836
+ """
837
+ Compute total reflected flux weighted by area (parallel reduction).
838
+
839
+ This is O(n) and fully parallelized by Taichi.
840
+ """
841
+ self._total_reflected_flux[None] = 0.0
842
+ self._total_reflecting_area[None] = 0.0
843
+
844
+ for i in range(self.n_surfaces):
845
+ area_i = self.surfaces.area[i]
846
+ flux_i = self._surfoutsl[i] * area_i
847
+ ti.atomic_add(self._total_reflected_flux[None], flux_i)
848
+ ti.atomic_add(self._total_reflecting_area[None], area_i)
849
+
850
+ @ti.kernel
851
+ def _distribute_reflected_radiation(self):
852
+ """
853
+ Distribute reflected radiation based on distance-weighted view factors.
854
+
855
+ PALM uses pre-computed sparse SVF matrix:
856
+ surfins(isurf) += svf(1,isvf) * svf(2,isvf) * surfoutsl(isurfsrc)
857
+ where svf(1,isvf) is the geometric view factor and svf(2,isvf) is transmissivity.
858
+
859
+ palm_solar computes view factors dynamically for efficiency on GPU:
860
+ - Distance between surfaces (inverse square law)
861
+ - Orientation (dot product of normals with connection vector)
862
+ - Visibility (simplified - assume visible if facing each other)
863
+
864
+ This gives equivalent physics with different numerical implementation.
865
+ The O(n^2) pairwise computation is fully parallelized on GPU.
866
+ """
867
+ PI = 3.14159265359
868
+
869
+ for i in range(self.n_surfaces):
870
+ # Receiving surface properties
871
+ pos_i = self.surfaces.center[i]
872
+ normal_i = self.surfaces.normal[i]
873
+ urban_vf_i = 1.0 - self.surfaces.svf[i]
874
+
875
+ # Skip if surface sees only sky
876
+ if urban_vf_i < 0.01:
877
+ self._surfins[i] = ti.cast(0.0, ti.f16)
878
+ continue
879
+
880
+ # Accumulate contributions from all emitting surfaces
881
+ total_incoming = 0.0
882
+
883
+ for j in range(self.n_surfaces):
884
+ if i == j:
885
+ continue
886
+
887
+ outgoing_j = self._surfoutsl[j]
888
+ if outgoing_j < 0.01:
889
+ continue
890
+
891
+ # Emitting surface properties
892
+ pos_j = self.surfaces.center[j]
893
+ normal_j = self.surfaces.normal[j]
894
+ area_j = self.surfaces.area[j]
895
+
896
+ # Vector from j to i
897
+ dx = pos_i[0] - pos_j[0]
898
+ dy = pos_i[1] - pos_j[1]
899
+ dz = pos_i[2] - pos_j[2]
900
+ dist_sq = dx*dx + dy*dy + dz*dz
901
+
902
+ if dist_sq < 0.1:
903
+ continue
904
+
905
+ dist = ti.sqrt(dist_sq)
906
+
907
+ # Unit vector from j to i
908
+ dir_x = dx / dist
909
+ dir_y = dy / dist
910
+ dir_z = dz / dist
911
+
912
+ # Cosine of angle at emitting surface (must be positive - facing towards i)
913
+ cos_emit = normal_j[0]*dir_x + normal_j[1]*dir_y + normal_j[2]*dir_z
914
+
915
+ # Cosine of angle at receiving surface (must be positive - facing towards j)
916
+ cos_recv = -(normal_i[0]*dir_x + normal_i[1]*dir_y + normal_i[2]*dir_z)
917
+
918
+ # Both surfaces must face each other
919
+ if cos_emit > 0.0 and cos_recv > 0.0:
920
+ # Radiative view factor formula:
921
+ # F_ij = (cos_emit * cos_recv * area_j) / (pi * dist^2)
922
+ # Incoming irradiance from j = outgoing_j * F_ij
923
+ view_factor = (cos_emit * cos_recv * area_j) / (PI * dist_sq)
924
+
925
+ # Clamp to reasonable maximum
926
+ view_factor = ti.min(view_factor, 1.0)
927
+
928
+ total_incoming += outgoing_j * view_factor
929
+
930
+ # Scale by urban view factor (what fraction of hemisphere sees urban surfaces)
931
+ self._surfins[i] = ti.cast(total_incoming * urban_vf_i, ti.f16)
932
+
933
+ @ti.kernel
934
+ def _distribute_reflected_radiation_with_canopy(
935
+ self,
936
+ lad: ti.template(),
937
+ is_solid: ti.template(),
938
+ ext_coef: ti.f32,
939
+ albedo_leaf: ti.f32
940
+ ):
941
+ """
942
+ Distribute reflected radiation with LAD transmissivity.
943
+
944
+ For each surface pair, traces ray through canopy applying Beer-Lambert
945
+ attenuation. Radiation absorbed by canopy is partially scattered
946
+ back to surfaces (based on leaf albedo).
947
+
948
+ Args:
949
+ lad: 3D field of Leaf Area Density
950
+ is_solid: 3D field of solid cells
951
+ ext_coef: Extinction coefficient for canopy
952
+ albedo_leaf: Leaf/tree albedo for scattering
953
+ """
954
+ PI = 3.14159265359
955
+ nx = self.domain.nx
956
+ ny = self.domain.ny
957
+ nz = self.domain.nz
958
+ dx = self.domain.dx
959
+ dy = self.domain.dy
960
+ dz = self.domain.dz
961
+
962
+ for i in range(self.n_surfaces):
963
+ # Receiving surface properties
964
+ pos_i = self.surfaces.center[i]
965
+ normal_i = self.surfaces.normal[i]
966
+ urban_vf_i = 1.0 - self.surfaces.svf[i]
967
+
968
+ # Skip if surface sees only sky
969
+ if urban_vf_i < 0.01:
970
+ self._surfins[i] = 0.0
971
+ continue
972
+
973
+ # Accumulate contributions from all emitting surfaces
974
+ total_incoming = 0.0
975
+
976
+ for j in range(self.n_surfaces):
977
+ if i == j:
978
+ continue
979
+
980
+ outgoing_j = self._surfoutsl[j]
981
+ if outgoing_j < 0.01:
982
+ continue
983
+
984
+ # Emitting surface properties
985
+ pos_j = self.surfaces.center[j]
986
+ normal_j = self.surfaces.normal[j]
987
+ area_j = self.surfaces.area[j]
988
+
989
+ # Vector from j to i
990
+ diff_x = pos_i[0] - pos_j[0]
991
+ diff_y = pos_i[1] - pos_j[1]
992
+ diff_z = pos_i[2] - pos_j[2]
993
+ dist_sq = diff_x*diff_x + diff_y*diff_y + diff_z*diff_z
994
+
995
+ if dist_sq < 0.1:
996
+ continue
997
+
998
+ dist = ti.sqrt(dist_sq)
999
+
1000
+ # Unit vector from j to i
1001
+ dir_x = diff_x / dist
1002
+ dir_y = diff_y / dist
1003
+ dir_z = diff_z / dist
1004
+
1005
+ # Cosine of angle at emitting surface
1006
+ cos_emit = normal_j[0]*dir_x + normal_j[1]*dir_y + normal_j[2]*dir_z
1007
+
1008
+ # Cosine of angle at receiving surface
1009
+ cos_recv = -(normal_i[0]*dir_x + normal_i[1]*dir_y + normal_i[2]*dir_z)
1010
+
1011
+ # Both surfaces must face each other
1012
+ if cos_emit > 0.0 and cos_recv > 0.0:
1013
+ # Compute transmissivity through canopy
1014
+ trans, blocked = ray_point_to_point_transmissivity(
1015
+ pos_j, pos_i,
1016
+ lad, is_solid,
1017
+ nx, ny, nz,
1018
+ dx, dy, dz,
1019
+ ext_coef
1020
+ )
1021
+
1022
+ # Skip if blocked by solid
1023
+ if blocked == 1:
1024
+ continue
1025
+
1026
+ # View factor
1027
+ view_factor = (cos_emit * cos_recv * area_j) / (PI * dist_sq)
1028
+ view_factor = ti.min(view_factor, 1.0)
1029
+
1030
+ # Apply canopy transmissivity
1031
+ total_incoming += outgoing_j * view_factor * trans
1032
+
1033
+ # Scale by urban view factor
1034
+ self._surfins[i] = total_incoming * urban_vf_i
1035
+
1036
+ def _compute_surface_reflections(self):
1037
+ """
1038
+ Compute radiation exchange between surfaces for one reflection step.
1039
+
1040
+ If canopy_reflections is enabled and LAD exists, uses ray tracing
1041
+ through vegetation with Beer-Lambert attenuation. Otherwise uses
1042
+ simplified geometry-based distribution.
1043
+
1044
+ Uses two-pass algorithm:
1045
+ 1. Parallel reduction to compute total reflected flux
1046
+ 2. Parallel distribution based on view factors (with optional LAD)
1047
+
1048
+ Note: For optimized multi-step reflection, use _compute_surface_reflections_optimized()
1049
+ which uses separate kernels with ping-pong buffers for ~100x GPU speedup.
1050
+ """
1051
+ # Pass 1: Compute total reflected flux (parallel reduction)
1052
+ self._compute_total_reflected()
1053
+
1054
+ # Pass 2: Distribute to each surface
1055
+ if self.config.canopy_reflections and self.domain.lad is not None:
1056
+ # Use canopy-aware version with LAD transmissivity
1057
+ if self.config.cache_svf_matrix and self._svf_matrix_cached:
1058
+ # Use cached SVF matrix with optimized separate kernels
1059
+ self._distribute_reflected_cached_single_step()
1060
+ else:
1061
+ # Compute dynamically (O(n²))
1062
+ self._distribute_reflected_radiation_with_canopy(
1063
+ self.domain.lad,
1064
+ self.domain.is_solid,
1065
+ self.config.ext_coef,
1066
+ self.config.albedo_leaf
1067
+ )
1068
+ else:
1069
+ # Use simple version without canopy
1070
+ if self.config.cache_svf_matrix and self._svf_matrix_cached:
1071
+ self._distribute_reflected_cached_single_step()
1072
+ else:
1073
+ self._distribute_reflected_radiation()
1074
+
1075
+ def _distribute_reflected_cached_single_step(self):
1076
+ """
1077
+ Single-step reflection distribution using optimized separate kernels.
1078
+
1079
+ Uses separate kernels instead of fused kernel with internal ti.sync()
1080
+ for ~100x GPU speedup. Each kernel is fully parallel without barriers.
1081
+ """
1082
+ n = self.n_surfaces
1083
+ svf_nnz = self._svf_nnz
1084
+
1085
+ # Reset incoming buffer
1086
+ self._reset_buffer(self._surfins, n)
1087
+
1088
+ # Sparse matmul for reflection distribution
1089
+ self._sparse_matmul_step(self._surfoutsl, self._surfins, svf_nnz)
1090
+
1091
+ # Scale by urban view factor
1092
+ self._scale_by_urban_vf(self._surfins, n)
1093
+
1094
+ def compute_svf_matrix(self):
1095
+ """
1096
+ Pre-compute surface-to-surface view factor matrix (PALM-like approach).
1097
+
1098
+ This is expensive O(n²) but only needs to be done once for fixed geometry.
1099
+ Subsequent reflection iterations become O(nnz) instead of O(n²).
1100
+
1101
+ Call this before running multi-timestep simulations for efficiency.
1102
+ """
1103
+ if self._svf_matrix_cached:
1104
+ print("SVF matrix already cached, skipping recomputation.")
1105
+ return
1106
+
1107
+ # Check if arrays were allocated in __init__
1108
+ if self._svf_source is None:
1109
+ print("Warning: SVF caching not enabled in config, skipping matrix computation.")
1110
+ return
1111
+
1112
+ print(f"Pre-computing SVF matrix for {self.n_surfaces} surfaces...")
1113
+ print(" This is O(n²) but only runs once for fixed geometry.")
1114
+
1115
+ # Compute the matrix
1116
+ if self.domain.lad is not None and self.config.canopy_reflections:
1117
+ self._compute_svf_matrix_with_canopy(
1118
+ self.domain.lad,
1119
+ self.domain.is_solid,
1120
+ self.config.ext_coef,
1121
+ self.config.svf_min_threshold
1122
+ )
1123
+ else:
1124
+ self._compute_svf_matrix_simple(self.config.svf_min_threshold)
1125
+
1126
+ # Clamp nnz to max entries to avoid out-of-bounds reads
1127
+ computed_nnz = int(self._svf_count[None])
1128
+ if computed_nnz > self._max_svf_entries:
1129
+ truncated_pct = (computed_nnz - self._max_svf_entries) / computed_nnz * 100
1130
+ print(f"Warning: SVF matrix truncated! Computed {computed_nnz:,} entries but buffer size is {self._max_svf_entries:,}.")
1131
+ print(f" {truncated_pct:.1f}% of surface-to-surface view factors are being discarded.")
1132
+ print(f" This may affect reflection accuracy. To fix: clear_all_caches() before creating this model,")
1133
+ print(f" or increase svf_min_threshold in RadiationConfig to reduce entries.")
1134
+ self._svf_nnz = self._max_svf_entries
1135
+ else:
1136
+ self._svf_nnz = computed_nnz
1137
+
1138
+ self._svf_matrix_cached = True
1139
+
1140
+ sparsity = self._svf_nnz / (self.n_surfaces * self.n_surfaces) * 100
1141
+ memory_mb = self._svf_nnz * 12 / 1e6 # 12 bytes per entry (2 int32 + 2 float16)
1142
+ print(f" SVF matrix computed: {self._svf_nnz:,} non-zero entries ({memory_mb:.1f} MB)")
1143
+ print(f" Sparsity: {sparsity:.2f}% of full matrix")
1144
+ print(f" Speedup factor: ~{self.n_surfaces * self.n_surfaces / max(1, self._svf_nnz):.1f}x per reflection step")
1145
+
1146
+ # Build CSR format for optimized sparse matmul
1147
+ if self._svf_csr_row_ptr is not None:
1148
+ self._build_csr_format()
1149
+
1150
+ def _build_csr_format(self):
1151
+ """
1152
+ Convert COO format to CSR format for optimized sparse matmul.
1153
+
1154
+ CSR (Compressed Sparse Row by target) provides:
1155
+ 1. O(n_surfaces) parallelism with one thread per row
1156
+ 2. Local accumulation instead of atomic operations
1157
+ 3. Better cache locality for reading source values
1158
+
1159
+ Reduces sparse matmul time by ~2-5x on GPU.
1160
+ """
1161
+ import numpy as np
1162
+
1163
+ print(" Building CSR format for optimized sparse matmul...")
1164
+
1165
+ # Copy COO data to numpy for sorting
1166
+ coo_target = self._svf_target.to_numpy()[:self._svf_nnz]
1167
+ coo_source = self._svf_source.to_numpy()[:self._svf_nnz]
1168
+ coo_vf = self._svf_vf.to_numpy()[:self._svf_nnz]
1169
+ coo_trans = self._svf_trans.to_numpy()[:self._svf_nnz]
1170
+
1171
+ # Pre-multiply vf * trans
1172
+ coo_val = coo_vf * coo_trans
1173
+
1174
+ # Sort by target (row) for CSR format
1175
+ sort_idx = np.argsort(coo_target)
1176
+ sorted_target = coo_target[sort_idx]
1177
+ sorted_source = coo_source[sort_idx]
1178
+ sorted_val = coo_val[sort_idx]
1179
+
1180
+ # Build row pointers
1181
+ row_ptr = np.zeros(self.n_surfaces + 1, dtype=np.int32)
1182
+ for t in sorted_target:
1183
+ row_ptr[t + 1] += 1
1184
+ row_ptr = np.cumsum(row_ptr)
1185
+
1186
+ # Pad arrays to match Taichi field shape (required for from_numpy)
1187
+ # The fields are allocated to _max_svf_entries, but we only use _svf_nnz
1188
+ padded_col_idx = np.zeros(self._max_svf_entries, dtype=np.int32)
1189
+ padded_val = np.zeros(self._max_svf_entries, dtype=np.float32)
1190
+ padded_col_idx[:self._svf_nnz] = sorted_source
1191
+ padded_val[:self._svf_nnz] = sorted_val
1192
+
1193
+ # Copy to Taichi fields
1194
+ self._svf_csr_row_ptr.from_numpy(row_ptr)
1195
+ self._svf_csr_col_idx.from_numpy(padded_col_idx)
1196
+ self._svf_csr_val.from_numpy(padded_val)
1197
+
1198
+ self._svf_csr_ready = True
1199
+ print(f" CSR format ready: {self.n_surfaces} rows, {self._svf_nnz:,} entries")
1200
+
1201
+ def compute_csf_matrix(self):
1202
+ """
1203
+ Pre-compute Canopy-Surface Factor matrix.
1204
+
1205
+ Stores geometric factors for Surface <-> Canopy interactions.
1206
+ This allows O(nnz) computation for canopy absorption and scattering
1207
+ instead of O(N_cells * N_surfaces).
1208
+ """
1209
+ if self._csf_matrix_cached:
1210
+ return
1211
+
1212
+ if self._csf_canopy_idx is None:
1213
+ return
1214
+
1215
+ print(f"Pre-computing CSF matrix for canopy-surface interactions...")
1216
+
1217
+ self._compute_csf_matrix_kernel(
1218
+ self.domain.lad,
1219
+ self.domain.is_solid,
1220
+ self.config.ext_coef,
1221
+ 1e-7 # Threshold
1222
+ )
1223
+
1224
+ computed_nnz = int(self._csf_count[None])
1225
+ if computed_nnz > self._max_csf_entries:
1226
+ print(f"Warning: CSF matrix truncated! {computed_nnz} > {self._max_csf_entries}")
1227
+ self._csf_nnz = self._max_csf_entries
1228
+ else:
1229
+ self._csf_nnz = computed_nnz
1230
+
1231
+ self._csf_matrix_cached = True
1232
+ print(f" CSF matrix computed: {self._csf_nnz:,} entries")
1233
+
1234
+ @ti.kernel
1235
+ def _compute_csf_matrix_kernel(
1236
+ self,
1237
+ lad: ti.template(),
1238
+ is_solid: ti.template(),
1239
+ ext_coef: ti.f32,
1240
+ min_threshold: ti.f32
1241
+ ):
1242
+ """
1243
+ Compute CSF matrix entries.
1244
+
1245
+ Stores (canopy_idx, surface_idx, base_factor) where:
1246
+ base_factor = (cell_area * cos_surf * trans) / (4 * PI * dist_sq)
1247
+
1248
+ This factor is used for:
1249
+ 1. Surface->Canopy: absorbed = outgoing * area_surf * base_factor * abs_frac / grid_vol
1250
+ 2. Canopy->Surface: incoming = scattered_power * base_factor / area_surf
1251
+ """
1252
+ PI = 3.14159265359
1253
+ nx = self.domain.nx
1254
+ ny = self.domain.ny
1255
+ nz = self.domain.nz
1256
+ dx = self.domain.dx
1257
+ dy = self.domain.dy
1258
+ dz = self.domain.dz
1259
+
1260
+ # Reset counter
1261
+ self._csf_count[None] = 0
1262
+
1263
+ # Iterate over all canopy cells
1264
+ for ci, cj, ck in ti.ndrange(nx, ny, nz):
1265
+ cell_lad = lad[ci, cj, ck]
1266
+ if cell_lad <= 0.0:
1267
+ continue
1268
+
1269
+ # Cell center
1270
+ pos_cell = Vector3(
1271
+ (ci + 0.5) * dx,
1272
+ (cj + 0.5) * dy,
1273
+ (ck + 0.5) * dz
1274
+ )
1275
+
1276
+ # Linear index for canopy cell
1277
+ canopy_idx = ci * (ny * nz) + cj * nz + ck
1278
+
1279
+ # Iterate over all surfaces
1280
+ for surf_i in range(self.n_surfaces):
1281
+ pos_surf = self.surfaces.center[surf_i]
1282
+ normal_surf = self.surfaces.normal[surf_i]
1283
+
1284
+ # Vector from surface to cell
1285
+ diff = pos_cell - pos_surf
1286
+ dist_sq = diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]
1287
+
1288
+ if dist_sq < 0.1:
1289
+ continue
1290
+
1291
+ dist = ti.sqrt(dist_sq)
1292
+
1293
+ # Direction from surface to cell
1294
+ dir_x = diff[0] / dist
1295
+ dir_y = diff[1] / dist
1296
+ dir_z = diff[2] / dist
1297
+
1298
+ # Check if surface faces the cell
1299
+ cos_emit = (normal_surf[0]*dir_x + normal_surf[1]*dir_y + normal_surf[2]*dir_z)
1300
+
1301
+ if cos_emit > 0.0:
1302
+ # Geometric factor (solid angle / 4pi * cos)
1303
+ # cell_solid_angle = (dx * dy) / dist_sq (approx)
1304
+ # factor = (cell_solid_angle * cos_emit) / (4 * PI)
1305
+ # factor = (dx * dy * cos_emit) / (4 * PI * dist_sq)
1306
+
1307
+ base_factor = (dx * dy * cos_emit) / (4.0 * PI * dist_sq)
1308
+
1309
+ if base_factor > min_threshold:
1310
+ # Check transmissivity
1311
+ trans, blocked = ray_point_to_point_transmissivity(
1312
+ pos_surf, pos_cell,
1313
+ lad, is_solid,
1314
+ nx, ny, nz,
1315
+ dx, dy, dz,
1316
+ ext_coef
1317
+ )
1318
+
1319
+ if blocked == 0 and trans > 0.01:
1320
+ final_factor = base_factor * trans
1321
+
1322
+ if final_factor > min_threshold:
1323
+ idx = ti.atomic_add(self._csf_count[None], 1)
1324
+ if idx < self._max_csf_entries:
1325
+ self._csf_canopy_idx[idx] = canopy_idx
1326
+ self._csf_surface_idx[idx] = surf_i
1327
+ self._csf_val[idx] = final_factor
1328
+
1329
+ @ti.kernel
1330
+ def _compute_svf_matrix_simple(self, min_threshold: ti.f32):
1331
+ """
1332
+ Compute SVF matrix without canopy (simple geometry).
1333
+
1334
+ Stores entries where view factor > min_threshold in sparse COO format.
1335
+ """
1336
+ PI = 3.14159265359
1337
+
1338
+ # Reset counter
1339
+ self._svf_count[None] = 0
1340
+
1341
+ # Compute all pairwise view factors
1342
+ for i in range(self.n_surfaces):
1343
+ pos_i = self.surfaces.center[i]
1344
+ normal_i = self.surfaces.normal[i]
1345
+
1346
+ for j in range(self.n_surfaces):
1347
+ if i == j:
1348
+ continue
1349
+
1350
+ pos_j = self.surfaces.center[j]
1351
+ normal_j = self.surfaces.normal[j]
1352
+ area_j = self.surfaces.area[j]
1353
+
1354
+ # Vector from j to i
1355
+ dx = pos_i[0] - pos_j[0]
1356
+ dy = pos_i[1] - pos_j[1]
1357
+ dz = pos_i[2] - pos_j[2]
1358
+ dist_sq = dx*dx + dy*dy + dz*dz
1359
+
1360
+ if dist_sq < 0.1:
1361
+ continue
1362
+
1363
+ dist = ti.sqrt(dist_sq)
1364
+ dir_x = dx / dist
1365
+ dir_y = dy / dist
1366
+ dir_z = dz / dist
1367
+
1368
+ # Cosines
1369
+ cos_emit = normal_j[0]*dir_x + normal_j[1]*dir_y + normal_j[2]*dir_z
1370
+ cos_recv = -(normal_i[0]*dir_x + normal_i[1]*dir_y + normal_i[2]*dir_z)
1371
+
1372
+ if cos_emit > 0.0 and cos_recv > 0.0:
1373
+ view_factor = (cos_emit * cos_recv * area_j) / (PI * dist_sq)
1374
+ view_factor = ti.min(view_factor, 1.0)
1375
+
1376
+ if view_factor > min_threshold:
1377
+ # Atomic increment to get unique index
1378
+ idx = ti.atomic_add(self._svf_count[None], 1)
1379
+ if idx < self._max_svf_entries:
1380
+ self._svf_source[idx] = j
1381
+ self._svf_target[idx] = i
1382
+ self._svf_vf[idx] = view_factor
1383
+ self._svf_trans[idx] = 1.0 # No canopy
1384
+
1385
+ @ti.kernel
1386
+ def _compute_svf_matrix_with_canopy(
1387
+ self,
1388
+ lad: ti.template(),
1389
+ is_solid: ti.template(),
1390
+ ext_coef: ti.f32,
1391
+ min_threshold: ti.f32
1392
+ ):
1393
+ """
1394
+ Compute SVF matrix with canopy transmissivity.
1395
+
1396
+ For each surface pair, computes view factor AND transmissivity
1397
+ through intervening vegetation using Beer-Lambert law.
1398
+
1399
+ Stores entries where (vf * trans) > min_threshold.
1400
+ """
1401
+ PI = 3.14159265359
1402
+ nx = self.domain.nx
1403
+ ny = self.domain.ny
1404
+ nz = self.domain.nz
1405
+ dx = self.domain.dx
1406
+ dy = self.domain.dy
1407
+ dz = self.domain.dz
1408
+
1409
+ # Reset counter
1410
+ self._svf_count[None] = 0
1411
+
1412
+ for i in range(self.n_surfaces):
1413
+ pos_i = self.surfaces.center[i]
1414
+ normal_i = self.surfaces.normal[i]
1415
+
1416
+ for j in range(self.n_surfaces):
1417
+ if i == j:
1418
+ continue
1419
+
1420
+ pos_j = self.surfaces.center[j]
1421
+ normal_j = self.surfaces.normal[j]
1422
+ area_j = self.surfaces.area[j]
1423
+
1424
+ # Vector from j to i
1425
+ diff_x = pos_i[0] - pos_j[0]
1426
+ diff_y = pos_i[1] - pos_j[1]
1427
+ diff_z = pos_i[2] - pos_j[2]
1428
+ dist_sq = diff_x*diff_x + diff_y*diff_y + diff_z*diff_z
1429
+
1430
+ if dist_sq < 0.1:
1431
+ continue
1432
+
1433
+ dist = ti.sqrt(dist_sq)
1434
+ dir_x = diff_x / dist
1435
+ dir_y = diff_y / dist
1436
+ dir_z = diff_z / dist
1437
+
1438
+ cos_emit = normal_j[0]*dir_x + normal_j[1]*dir_y + normal_j[2]*dir_z
1439
+ cos_recv = -(normal_i[0]*dir_x + normal_i[1]*dir_y + normal_i[2]*dir_z)
1440
+
1441
+ if cos_emit > 0.0 and cos_recv > 0.0:
1442
+ # Compute transmissivity through canopy
1443
+ trans, blocked = ray_point_to_point_transmissivity(
1444
+ pos_j, pos_i,
1445
+ lad, is_solid,
1446
+ nx, ny, nz,
1447
+ dx, dy, dz,
1448
+ ext_coef
1449
+ )
1450
+
1451
+ if blocked == 0 and trans > 0.001:
1452
+ view_factor = (cos_emit * cos_recv * area_j) / (PI * dist_sq)
1453
+ view_factor = ti.min(view_factor, 1.0)
1454
+
1455
+ effective_vf = view_factor * trans
1456
+
1457
+ if effective_vf > min_threshold:
1458
+ idx = ti.atomic_add(self._svf_count[None], 1)
1459
+ if idx < self._max_svf_entries:
1460
+ self._svf_source[idx] = j
1461
+ self._svf_target[idx] = i
1462
+ self._svf_vf[idx] = view_factor
1463
+ self._svf_trans[idx] = trans
1464
+
1465
+ # ========== Optimized Reflection Kernels (Separate for GPU efficiency) ==========
1466
+ # Using separate kernels without internal ti.sync() is ~100x faster than fused kernels
1467
+ # with ti.sync() inside. Taichi handles synchronization between kernel calls efficiently.
1468
+
1469
+ @ti.kernel
1470
+ def _reset_buffer(self, buf: ti.template(), n: ti.i32):
1471
+ """Reset a buffer to zero - vectorized for GPU efficiency."""
1472
+ for i in range(n):
1473
+ buf[i] = 0.0
1474
+
1475
+ @ti.kernel
1476
+ def _compute_outgoing_step(self, surfins: ti.template(), surfout: ti.template(), n: ti.i32):
1477
+ """Compute outgoing radiation: surfout = albedo * surfins."""
1478
+ for i in range(n):
1479
+ surfout[i] = self.surfaces.albedo[i] * surfins[i]
1480
+
1481
+ @ti.kernel
1482
+ def _sparse_matmul_step(
1483
+ self,
1484
+ surfout: ti.template(),
1485
+ surfins_next: ti.template(),
1486
+ svf_nnz: ti.i32
1487
+ ):
1488
+ """
1489
+ Sparse matrix-vector multiply for reflection distribution.
1490
+
1491
+ PALM equivalent: surfins(isurf) += svf(1,isvf) * svf(2,isvf) * surfoutsl(isurfsrc)
1492
+
1493
+ Optimized for GPU with:
1494
+ - Coalesced memory access through sparse COO format
1495
+ - Minimal thread divergence with early threshold check
1496
+ - Efficient atomic operations for parallel accumulation
1497
+ """
1498
+ for idx in range(svf_nnz):
1499
+ source = self._svf_source[idx]
1500
+ target = self._svf_target[idx]
1501
+ vf = self._svf_vf[idx]
1502
+ trans = self._svf_trans[idx]
1503
+
1504
+ # Pre-multiply vf * trans to reduce FLOPs
1505
+ vf_trans = vf * trans
1506
+ outgoing = surfout[source]
1507
+
1508
+ # Use threshold to skip negligible contributions
1509
+ if outgoing * vf_trans > 0.001:
1510
+ ti.atomic_add(surfins_next[target], outgoing * vf_trans)
1511
+
1512
+ @ti.kernel
1513
+ def _sparse_matmul_step_batched(
1514
+ self,
1515
+ surfout: ti.template(),
1516
+ surfins_next: ti.template(),
1517
+ svf_nnz: ti.i32,
1518
+ batch_size: ti.i32
1519
+ ):
1520
+ """
1521
+ Batched sparse matrix-vector multiply for improved GPU utilization.
1522
+
1523
+ Processes multiple sparse entries per thread to improve memory locality
1524
+ and reduce atomic operation contention.
1525
+ """
1526
+ n_batches = (svf_nnz + batch_size - 1) // batch_size
1527
+
1528
+ for batch_idx in range(n_batches):
1529
+ start_idx = batch_idx * batch_size
1530
+ end_idx = ti.min(start_idx + batch_size, svf_nnz)
1531
+
1532
+ # Local accumulator to reduce atomics
1533
+ for idx in range(start_idx, end_idx):
1534
+ source = self._svf_source[idx]
1535
+ target = self._svf_target[idx]
1536
+ vf_trans = self._svf_vf[idx] * self._svf_trans[idx]
1537
+ outgoing = surfout[source]
1538
+
1539
+ if outgoing * vf_trans > 0.001:
1540
+ ti.atomic_add(surfins_next[target], outgoing * vf_trans)
1541
+
1542
+ @ti.kernel
1543
+ def _scale_by_urban_vf(self, surfins: ti.template(), n: ti.i32):
1544
+ """Scale incoming by urban view factor (1 - SVF)."""
1545
+ for i in range(n):
1546
+ urban_vf_i = 1.0 - self.surfaces.svf[i]
1547
+ if urban_vf_i < 0.01:
1548
+ surfins[i] = 0.0
1549
+ else:
1550
+ surfins[i] *= urban_vf_i
1551
+
1552
+ @ti.kernel
1553
+ def _sparse_matmul_csr(
1554
+ self,
1555
+ surfout: ti.template(),
1556
+ surfins_next: ti.template(),
1557
+ n_surfaces: ti.i32
1558
+ ):
1559
+ """
1560
+ CSR-format sparse matrix-vector multiply for reflection distribution.
1561
+
1562
+ MASSIVELY optimized for GPU:
1563
+ - One thread per target surface (row) = perfect parallelism
1564
+ - No atomic operations needed (each row is processed by one thread)
1565
+ - Local accumulation in registers before final write
1566
+ - Better cache locality from contiguous column access
1567
+
1568
+ Reduces atomic operations from O(nnz) = 45M to O(n) = 42K
1569
+ Expected speedup: 2-5x over COO format with atomics.
1570
+ """
1571
+ for row in range(n_surfaces):
1572
+ row_start = self._svf_csr_row_ptr[row]
1573
+ row_end = self._svf_csr_row_ptr[row + 1]
1574
+
1575
+ # Local accumulator - no atomics needed!
1576
+ local_sum = 0.0
1577
+
1578
+ for idx in range(row_start, row_end):
1579
+ source = self._svf_csr_col_idx[idx]
1580
+ vf_trans = self._svf_csr_val[idx] # Pre-multiplied vf * trans
1581
+ outgoing = surfout[source]
1582
+
1583
+ # Threshold check for negligible contributions
1584
+ if outgoing * vf_trans > 0.001:
1585
+ local_sum += outgoing * vf_trans
1586
+
1587
+ # Single write per row (no atomic needed since one thread per row)
1588
+ surfins_next[row] = local_sum
1589
+
1590
+ @ti.kernel
1591
+ def _accumulate_reflected(self, surfins: ti.template(), n: ti.i32):
1592
+ """Accumulate incoming reflected radiation to totals."""
1593
+ for i in range(n):
1594
+ self._surfins[i] += surfins[i]
1595
+
1596
+ def _distribute_reflected_cached_optimized(self, svf_nnz: int, use_ping: bool):
1597
+ """
1598
+ Optimized reflection distribution using separate kernels.
1599
+
1600
+ This is ~100x faster than the fused kernel approach because:
1601
+ 1. No ti.sync() inside kernels (Taichi handles sync between kernel calls)
1602
+ 2. Each kernel is fully parallel without internal barriers
1603
+ 3. GPU can overlap kernel launches with computation
1604
+
1605
+ Args:
1606
+ svf_nnz: Number of non-zero entries in SVF matrix
1607
+ use_ping: If True, read from ping buffer and write to pong buffer
1608
+ """
1609
+ n = self.n_surfaces
1610
+
1611
+ if use_ping:
1612
+ # Read from ping, write to pong
1613
+ self._compute_outgoing_step(self._surfins_ping, self._surfoutsl, n)
1614
+ self._reset_buffer(self._surfins_pong, n)
1615
+ self._sparse_matmul_step(self._surfoutsl, self._surfins_pong, svf_nnz)
1616
+ self._scale_by_urban_vf(self._surfins_pong, n)
1617
+ self._accumulate_reflected(self._surfins_pong, n)
1618
+ else:
1619
+ # Read from pong, write to ping
1620
+ self._compute_outgoing_step(self._surfins_pong, self._surfoutsl, n)
1621
+ self._reset_buffer(self._surfins_ping, n)
1622
+ self._sparse_matmul_step(self._surfoutsl, self._surfins_ping, svf_nnz)
1623
+ self._scale_by_urban_vf(self._surfins_ping, n)
1624
+ self._accumulate_reflected(self._surfins_ping, n)
1625
+
1626
+ # Accumulate outgoing to totals
1627
+ self._accumulate_outgoing()
1628
+
1629
+ @ti.kernel
1630
+ def _init_ping_buffer(self, n: ti.i32):
1631
+ """Initialize ping buffer with initial SW radiation for reflection loop."""
1632
+ for i in range(n):
1633
+ self._surfins_ping[i] = self._surfinswdir[i] + self._surfinswdif[i]
1634
+ self._surfins[i] = 0.0 # Reset accumulated reflected
1635
+
1636
+ def _run_optimized_reflection_loop(self, n_steps: int):
1637
+ """
1638
+ Optimized reflection loop using fused kernels and ping-pong buffers.
1639
+
1640
+ This is the GPU-optimized fast path for surface-only reflections
1641
+ (no canopy). Achieves ~10-20x speedup over CPU by:
1642
+ 1. Fusing outgoing + accumulate into single kernel
1643
+ 2. Using separate kernels (no internal ti.sync())
1644
+ 3. Ping-pong buffers for efficient memory access
1645
+ 4. Minimizing kernel launch count
1646
+ 5. Using ultra-fused kernel for small reflection counts
1647
+
1648
+ Only 4 kernel calls per reflection step vs 5+ in standard path.
1649
+
1650
+ Args:
1651
+ n_steps: Number of reflection iterations
1652
+ """
1653
+ n = self.n_surfaces
1654
+ svf_nnz = self._svf_nnz
1655
+
1656
+ # Use ultra-optimized path for typical reflection counts (1-5 steps)
1657
+ # This reduces kernel launch overhead significantly
1658
+ if n_steps <= 5:
1659
+ self._run_reflection_loop_ultra_fused(n_steps, n, svf_nnz)
1660
+ return
1661
+
1662
+ for step in range(n_steps):
1663
+ use_ping = (step % 2 == 0)
1664
+
1665
+ if use_ping:
1666
+ # Step 1: Compute outgoing from current incoming (fused with accumulate)
1667
+ self._compute_outgoing_fused(self._surfins, n)
1668
+
1669
+ # Step 2: Reset next buffer
1670
+ self._reset_buffer(self._surfins_pong, n)
1671
+
1672
+ # Step 3: Sparse matmul for reflection distribution
1673
+ self._sparse_matmul_step(self._surfoutsl, self._surfins_pong, svf_nnz)
1674
+
1675
+ # Step 4: Scale by urban VF and accumulate incoming (fused)
1676
+ self._scale_and_accumulate_incoming(self._surfins_pong, n)
1677
+
1678
+ # Copy to _surfins for next iteration
1679
+ self._copy_buffer(self._surfins_pong, self._surfins, n)
1680
+ else:
1681
+ self._compute_outgoing_fused(self._surfins, n)
1682
+ self._reset_buffer(self._surfins_ping, n)
1683
+ self._sparse_matmul_step(self._surfoutsl, self._surfins_ping, svf_nnz)
1684
+ self._scale_and_accumulate_incoming(self._surfins_ping, n)
1685
+ self._copy_buffer(self._surfins_ping, self._surfins, n)
1686
+
1687
+ def _run_reflection_loop_ultra_fused(self, n_steps: int, n: int, svf_nnz: int):
1688
+ """
1689
+ Ultra-optimized reflection loop with minimal kernel launches.
1690
+
1691
+ For typical 3-step reflections, this reduces from 15+ kernel calls
1692
+ to just 3-4 by doing more work per kernel.
1693
+
1694
+ Uses CSR format if available for ~2-5x faster sparse matmul.
1695
+ """
1696
+ # Choose sparse matmul method: CSR (fast, no atomics) or COO (fallback)
1697
+ use_csr = self._svf_csr_ready
1698
+
1699
+ def do_sparse_matmul(surfout, surfins_next):
1700
+ if use_csr:
1701
+ # CSR format: one thread per row, no atomics
1702
+ self._sparse_matmul_csr(surfout, surfins_next, n)
1703
+ else:
1704
+ # COO format fallback with atomics
1705
+ self._reset_buffer(surfins_next, n)
1706
+ self._sparse_matmul_step(surfout, surfins_next, svf_nnz)
1707
+
1708
+ # Step 1: First reflection iteration with combined operations
1709
+ # Compute outgoing and prepare for sparse matmul
1710
+ self._compute_outgoing_fused(self._surfins, n)
1711
+ do_sparse_matmul(self._surfoutsl, self._surfins_pong)
1712
+ self._scale_and_accumulate_incoming(self._surfins_pong, n)
1713
+
1714
+ if n_steps >= 2:
1715
+ # Step 2: Second reflection - use pong as input
1716
+ self._compute_outgoing_fused_from_buffer(self._surfins_pong, n)
1717
+ do_sparse_matmul(self._surfoutsl, self._surfins_ping)
1718
+ self._scale_and_accumulate_incoming(self._surfins_ping, n)
1719
+
1720
+ if n_steps >= 3:
1721
+ # Step 3: Third reflection - use ping as input
1722
+ self._compute_outgoing_fused_from_buffer(self._surfins_ping, n)
1723
+ do_sparse_matmul(self._surfoutsl, self._surfins_pong)
1724
+ self._scale_and_accumulate_incoming(self._surfins_pong, n)
1725
+
1726
+ # Handle remaining steps if any
1727
+ for step in range(3, n_steps):
1728
+ use_ping = (step % 2 == 1)
1729
+ src = self._surfins_ping if use_ping else self._surfins_pong
1730
+ dst = self._surfins_pong if use_ping else self._surfins_ping
1731
+
1732
+ self._compute_outgoing_fused_from_buffer(src, n)
1733
+ do_sparse_matmul(self._surfoutsl, dst)
1734
+ self._scale_and_accumulate_incoming(dst, n)
1735
+
1736
+ @ti.kernel
1737
+ def _compute_outgoing_fused_from_buffer(self, surfins: ti.template(), n: ti.i32):
1738
+ """Compute outgoing from a specific buffer and accumulate."""
1739
+ for i in range(n):
1740
+ outgoing = self.surfaces.albedo[i] * surfins[i]
1741
+ self._surfoutsl[i] = outgoing
1742
+ self._surfoutsw[i] += outgoing
1743
+
1744
+ def _run_optimized_reflection_loop_with_canopy(self, n_steps: int):
1745
+ """
1746
+ Optimized reflection loop with canopy interactions.
1747
+
1748
+ Uses sparse matrices for both Surface-Surface (SVF) and Surface-Canopy (CSF)
1749
+ interactions, achieving O(nnz) complexity instead of O(N_surf * N_cell).
1750
+ """
1751
+ n = self.n_surfaces
1752
+ svf_nnz = self._svf_nnz
1753
+ csf_nnz = self._csf_nnz
1754
+
1755
+ # Initialize ping buffer with initial radiation
1756
+ self._init_ping_buffer(n)
1757
+
1758
+ for step in range(n_steps):
1759
+ use_ping = (step % 2 == 0)
1760
+
1761
+ # Input buffer for this step (contains incoming radiation)
1762
+ src_buf = self._surfins_ping if use_ping else self._surfins_pong
1763
+ # Output buffer for next step (will accumulate reflected radiation)
1764
+ dst_buf = self._surfins_pong if use_ping else self._surfins_ping
1765
+
1766
+ # 1. Compute outgoing from surfaces (and accumulate to total outgoing)
1767
+ self._compute_outgoing_fused(src_buf, n)
1768
+
1769
+ # 2. Surface -> Canopy Absorption (using CSF matrix)
1770
+ self._csf_absorb_step(self._surfoutsl, csf_nnz)
1771
+
1772
+ # 3. Update Canopy Scattering (based on absorbed)
1773
+ self._update_canopy_scattered_optimized(self.domain.lad, self.config.albedo_leaf)
1774
+
1775
+ # 4. Reset destination buffer and do sparse matmul
1776
+ if self._svf_csr_ready:
1777
+ # CSR format: one thread per row, no atomics, includes reset
1778
+ self._sparse_matmul_csr(self._surfoutsl, dst_buf, n)
1779
+ else:
1780
+ # COO format fallback
1781
+ self._reset_buffer(dst_buf, n)
1782
+ self._sparse_matmul_step(self._surfoutsl, dst_buf, svf_nnz)
1783
+
1784
+ # 6. Canopy -> Surface Scattering (CSF matrix transposed)
1785
+ self._csf_scatter_step(dst_buf, csf_nnz)
1786
+
1787
+ # 7. Scale by urban VF and accumulate to total incoming
1788
+ self._scale_and_accumulate_incoming(dst_buf, n)
1789
+
1790
+ # Copy to _surfins for consistency (though not strictly needed for loop)
1791
+ self._copy_buffer(dst_buf, self._surfins, n)
1792
+
1793
+ @ti.kernel
1794
+ def _csf_absorb_step(self, surfout: ti.template(), csf_nnz: ti.i32):
1795
+ """
1796
+ Surface -> Canopy absorption using sparse CSF matrix.
1797
+ """
1798
+ grid_vol_inv = 1.0 / (self.domain.dx * self.domain.dy * self.domain.dz)
1799
+ ext_coef = self.config.ext_coef
1800
+
1801
+ for idx in range(csf_nnz):
1802
+ canopy_idx = self._csf_canopy_idx[idx]
1803
+ surf_idx = self._csf_surface_idx[idx]
1804
+ base_factor = self._csf_val[idx]
1805
+
1806
+ outgoing = surfout[surf_idx]
1807
+ if outgoing > 0.01:
1808
+ # Reconstruct 3D indices from linear index
1809
+ # canopy_idx = i * (ny * nz) + j * nz + k
1810
+ tmp = canopy_idx
1811
+ k = tmp % self.domain.nz
1812
+ tmp //= self.domain.nz
1813
+ j = tmp % self.domain.ny
1814
+ i = tmp // self.domain.ny
1815
+
1816
+ lad_val = self.domain.lad[i, j, k]
1817
+ area_surf = self.surfaces.area[surf_idx]
1818
+
1819
+ # Absorption fraction (approximate path = dz)
1820
+ abs_frac = 1.0 - ti.exp(-ext_coef * lad_val * self.domain.dz)
1821
+
1822
+ # absorbed = outgoing * area_surf * base_factor * abs_frac / grid_vol
1823
+ absorbed = outgoing * area_surf * base_factor * abs_frac * grid_vol_inv
1824
+
1825
+ ti.atomic_add(self._pcbinsw[i, j, k], absorbed)
1826
+
1827
+ @ti.kernel
1828
+ def _csf_scatter_step(self, surfins_next: ti.template(), csf_nnz: ti.i32):
1829
+ """
1830
+ Canopy -> Surface scattering using sparse CSF matrix.
1831
+ """
1832
+ grid_vol = self.domain.dx * self.domain.dy * self.domain.dz
1833
+
1834
+ for idx in range(csf_nnz):
1835
+ canopy_idx = self._csf_canopy_idx[idx]
1836
+ surf_idx = self._csf_surface_idx[idx]
1837
+ base_factor = self._csf_val[idx]
1838
+
1839
+ # Reconstruct indices
1840
+ tmp = canopy_idx
1841
+ k = tmp % self.domain.nz
1842
+ tmp //= self.domain.nz
1843
+ j = tmp % self.domain.ny
1844
+ i = tmp // self.domain.ny
1845
+
1846
+ # Scattered power from this cell (W/m^3 * m^3 = W)
1847
+ scattered_flux_vol = self._pcbinswref[i, j, k]
1848
+ if scattered_flux_vol > 0.001:
1849
+ scattered_power = scattered_flux_vol * grid_vol
1850
+
1851
+ area_surf = self.surfaces.area[surf_idx]
1852
+
1853
+ # incoming = scattered_power * base_factor / area_surf
1854
+ contribution = scattered_power * base_factor / area_surf
1855
+
1856
+ ti.atomic_add(surfins_next[surf_idx], contribution)
1857
+ ti.atomic_add(self._surfinswpc[surf_idx], contribution)
1858
+
1859
+ @ti.kernel
1860
+ def _update_canopy_scattered_optimized(self, lad: ti.template(), albedo_leaf: ti.f32):
1861
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
1862
+ if lad[i, j, k] > 0.0:
1863
+ self._pcbinswref[i, j, k] = self._pcbinsw[i, j, k] * albedo_leaf
1864
+
1865
+ @ti.kernel
1866
+ def _compute_outgoing_fused(self, surfins: ti.template(), n: ti.i32):
1867
+ """Fused: compute outgoing AND accumulate to totals."""
1868
+ for i in range(n):
1869
+ outgoing = self.surfaces.albedo[i] * surfins[i]
1870
+ self._surfoutsl[i] = outgoing
1871
+ self._surfoutsw[i] += outgoing
1872
+
1873
+ @ti.kernel
1874
+ def _scale_and_accumulate_incoming(self, surfins: ti.template(), n: ti.i32):
1875
+ """Fused: scale by urban VF AND accumulate to incoming totals."""
1876
+ for i in range(n):
1877
+ urban_vf = 1.0 - self.surfaces.svf[i]
1878
+ if urban_vf < 0.01:
1879
+ surfins[i] = 0.0
1880
+ else:
1881
+ surfins[i] *= urban_vf
1882
+ self._surfinsw[i] += surfins[i]
1883
+
1884
+ @ti.kernel
1885
+ def _copy_buffer(self, src: ti.template(), dst: ti.template(), n: ti.i32):
1886
+ """Copy buffer contents."""
1887
+ for i in range(n):
1888
+ dst[i] = src[i]
1889
+
1890
+ @ti.kernel
1891
+ def _distribute_reflected_cached(self, svf_nnz: ti.i32):
1892
+ """
1893
+ Distribute reflected radiation using cached SVF matrix (PALM-like).
1894
+
1895
+ NOTE: This is the legacy single-kernel version kept for compatibility.
1896
+ The optimized version using separate kernels (_distribute_reflected_cached_optimized)
1897
+ is ~100x faster and should be preferred.
1898
+
1899
+ This is O(nnz) instead of O(n²), providing major speedup for
1900
+ multi-timestep simulations with fixed geometry.
1901
+
1902
+ PALM equivalent: surfins(isurf) += svf(1,isvf) * svf(2,isvf) * surfoutsl(isurfsrc)
1903
+ """
1904
+ # Reset incoming first
1905
+ for i in range(self.n_surfaces):
1906
+ self._surfins[i] = 0.0
1907
+
1908
+ # Sync point - ensure reset completes before sparse matmul
1909
+ ti.sync()
1910
+
1911
+ # Apply sparse matrix-vector multiply
1912
+ for idx in range(svf_nnz):
1913
+ source = self._svf_source[idx]
1914
+ target = self._svf_target[idx]
1915
+ vf = self._svf_vf[idx]
1916
+ trans = self._svf_trans[idx]
1917
+
1918
+ outgoing = self._surfoutsl[source]
1919
+ # Use same threshold as non-cached version (0.01)
1920
+ if outgoing > 0.01:
1921
+ contribution = outgoing * vf * trans
1922
+ ti.atomic_add(self._surfins[target], contribution)
1923
+
1924
+ # Sync before scaling
1925
+ ti.sync()
1926
+
1927
+ # Scale by urban view factor
1928
+ for i in range(self.n_surfaces):
1929
+ urban_vf_i = 1.0 - self.surfaces.svf[i]
1930
+ # Skip surfaces that only see sky (match non-cached behavior)
1931
+ if urban_vf_i < 0.01:
1932
+ self._surfins[i] = 0.0
1933
+ else:
1934
+ self._surfins[i] *= urban_vf_i
1935
+
1936
+ def invalidate_svf_cache(self):
1937
+ """
1938
+ Invalidate the cached SVF matrix.
1939
+
1940
+ Call this if geometry (buildings, terrain, vegetation) changes.
1941
+ The matrix will be recomputed on the next compute_svf() call.
1942
+ """
1943
+ self._svf_matrix_cached = False
1944
+ self._svf_nnz = 0
1945
+ print("SVF matrix cache invalidated. Will recompute on next compute_svf() call.")
1946
+
1947
+ @property
1948
+ def svf_matrix_cached(self) -> bool:
1949
+ """Check if SVF matrix is currently cached."""
1950
+ return self._svf_matrix_cached
1951
+
1952
+ @property
1953
+ def svf_matrix_entries(self) -> int:
1954
+ """Get number of non-zero entries in cached SVF matrix."""
1955
+ return self._svf_nnz
1956
+
1957
+ @ti.kernel
1958
+ def _accumulate_outgoing(self):
1959
+ """Accumulate outgoing radiation to totals."""
1960
+ for i in range(self.n_surfaces):
1961
+ self._surfoutsw[i] += self._surfoutsl[i]
1962
+
1963
+ @ti.kernel
1964
+ def _accumulate_incoming(self):
1965
+ """Accumulate incoming reflected radiation to totals."""
1966
+ for i in range(self.n_surfaces):
1967
+ self._surfinsw[i] += self._surfins[i]
1968
+
1969
+ @ti.kernel
1970
+ def _accumulate_canopy_absorption_from_reflections(
1971
+ self,
1972
+ lad: ti.template(),
1973
+ is_solid: ti.template(),
1974
+ ext_coef: ti.f32,
1975
+ pcbinsw: ti.template()
1976
+ ):
1977
+ """
1978
+ Accumulate canopy absorption from surface reflections (PALM's reflection loop).
1979
+
1980
+ PALM formula: pcbinsw(ipcgb) += csf * surfoutsl(isurfsrc) * asrc * grid_volume_inverse
1981
+
1982
+ For each canopy cell, computes absorption from all reflecting surfaces.
1983
+ The CSF from surface to canopy includes:
1984
+ - View factor (solid angle from surface to canopy cell)
1985
+ - Transmissivity through intervening canopy (Beer-Lambert)
1986
+ - Absorption fraction in target canopy cell
1987
+
1988
+ Args:
1989
+ lad: 3D LAD field
1990
+ is_solid: 3D solid field
1991
+ ext_coef: Extinction coefficient
1992
+ pcbinsw: Output array for accumulated absorption (W/m³)
1993
+ """
1994
+ PI = 3.14159265359
1995
+ nx = self.domain.nx
1996
+ ny = self.domain.ny
1997
+ nz = self.domain.nz
1998
+ dx = self.domain.dx
1999
+ dy = self.domain.dy
2000
+ dz = self.domain.dz
2001
+ grid_volume = dx * dy * dz
2002
+ grid_volume_inverse = 1.0 / grid_volume
2003
+
2004
+ # For each canopy cell, accumulate absorption from all reflecting surfaces
2005
+ for ci, cj, ck in ti.ndrange(nx, ny, nz):
2006
+ cell_lad = lad[ci, cj, ck]
2007
+ if cell_lad <= 0.0:
2008
+ continue
2009
+
2010
+ # Cell center position
2011
+ pos_cell = Vector3(
2012
+ (ci + 0.5) * dx,
2013
+ (cj + 0.5) * dy,
2014
+ (ck + 0.5) * dz
2015
+ )
2016
+
2017
+ total_absorbed = 0.0
2018
+
2019
+ # Loop over all surfaces with outgoing radiation
2020
+ for surf_i in range(self.n_surfaces):
2021
+ outgoing = self._surfoutsl[surf_i]
2022
+ if outgoing < 0.01:
2023
+ continue
2024
+
2025
+ # Surface properties
2026
+ pos_surf = self.surfaces.center[surf_i]
2027
+ normal_surf = self.surfaces.normal[surf_i]
2028
+ area_surf = self.surfaces.area[surf_i]
2029
+
2030
+ # Vector from surface to canopy cell
2031
+ diff = pos_cell - pos_surf
2032
+ dist_sq = diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]
2033
+
2034
+ if dist_sq < 0.01:
2035
+ continue
2036
+
2037
+ dist = ti.sqrt(dist_sq)
2038
+
2039
+ # Direction from surface to canopy
2040
+ dir_to_cell_x = diff[0] / dist
2041
+ dir_to_cell_y = diff[1] / dist
2042
+ dir_to_cell_z = diff[2] / dist
2043
+
2044
+ # Check if surface faces the canopy cell
2045
+ cos_emit = (normal_surf[0]*dir_to_cell_x +
2046
+ normal_surf[1]*dir_to_cell_y +
2047
+ normal_surf[2]*dir_to_cell_z)
2048
+
2049
+ if cos_emit > 0.0:
2050
+ # Compute transmissivity from surface to canopy cell (through intervening canopy)
2051
+ trans, blocked = ray_point_to_point_transmissivity(
2052
+ pos_surf, pos_cell,
2053
+ lad, is_solid,
2054
+ nx, ny, nz,
2055
+ dx, dy, dz,
2056
+ ext_coef
2057
+ )
2058
+
2059
+ if blocked == 0:
2060
+ # Approximate path length through target cell
2061
+ path_in_cell = dz # Simplified; could be more accurate
2062
+
2063
+ # Absorption fraction in this cell
2064
+ abs_frac = 1.0 - ti.exp(-ext_coef * cell_lad * path_in_cell)
2065
+
2066
+ # View factor from surface to canopy cell (simplified)
2067
+ # CSF = view_factor * transmissivity * absorption_fraction
2068
+ cell_solid_angle = (dx * dy) / (4.0 * PI * dist_sq)
2069
+ csf_factor = cell_solid_angle * cos_emit * trans * abs_frac
2070
+ csf_factor = ti.min(csf_factor, 1.0) # Clamp
2071
+
2072
+ # PALM formula: pcbinsw += csf * surfoutsl * asrc * grid_volume_inverse
2073
+ absorbed = csf_factor * outgoing * area_surf * grid_volume_inverse
2074
+ total_absorbed += absorbed
2075
+
2076
+ ti.atomic_add(pcbinsw[ci, cj, ck], total_absorbed)
2077
+
2078
+ @ti.kernel
2079
+ def _update_canopy_scattered_radiation(self, lad: ti.template(), albedo_leaf: ti.f32):
2080
+ """
2081
+ Update scattered radiation field based on current absorbed radiation.
2082
+
2083
+ pcbinswref = albedo_leaf * pcbinsw (fraction that gets scattered)
2084
+
2085
+ This is called at each reflection step to update what's available for scattering.
2086
+ """
2087
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2088
+ if lad[i, j, k] > 0.0:
2089
+ absorbed = self._pcbinsw[i, j, k]
2090
+ self._pcbinswref[i, j, k] = albedo_leaf * absorbed
2091
+
2092
+ @ti.kernel
2093
+ def _compute_canopy_to_surface_scattering(
2094
+ self,
2095
+ lad: ti.template(),
2096
+ is_solid: ti.template(),
2097
+ ext_coef: ti.f32
2098
+ ):
2099
+ """
2100
+ Compute radiation scattered from canopy cells toward surfaces.
2101
+
2102
+ For each canopy cell with scattered radiation (pcbinswref), distribute
2103
+ to surfaces based on solid angle and transmissivity.
2104
+
2105
+ This is called within the reflection loop to capture Canopy→Surface paths.
2106
+ The contribution is added to _surfins so it participates in subsequent reflections.
2107
+ """
2108
+ PI = 3.14159265359
2109
+ nx = self.domain.nx
2110
+ ny = self.domain.ny
2111
+ nz = self.domain.nz
2112
+ dx = self.domain.dx
2113
+ dy = self.domain.dy
2114
+ dz = self.domain.dz
2115
+ grid_volume = dx * dy * dz
2116
+
2117
+ # For each canopy cell with scattered radiation
2118
+ for ci, cj, ck in ti.ndrange(nx, ny, nz):
2119
+ cell_lad = lad[ci, cj, ck]
2120
+ if cell_lad <= 0.0:
2121
+ continue
2122
+
2123
+ scattered_power = self._pcbinswref[ci, cj, ck] * grid_volume
2124
+ if scattered_power <= 0.01:
2125
+ continue
2126
+
2127
+ # Cell center position
2128
+ pos_cell = Vector3(
2129
+ (ci + 0.5) * dx,
2130
+ (cj + 0.5) * dy,
2131
+ (ck + 0.5) * dz
2132
+ )
2133
+
2134
+ # Distribute to all surfaces
2135
+ for surf_i in range(self.n_surfaces):
2136
+ pos_surf = self.surfaces.center[surf_i]
2137
+ normal_surf = self.surfaces.normal[surf_i]
2138
+
2139
+ # Vector from cell to surface
2140
+ diff = pos_surf - pos_cell
2141
+ dist_sq = diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]
2142
+
2143
+ if dist_sq < 0.01:
2144
+ continue
2145
+
2146
+ dist = ti.sqrt(dist_sq)
2147
+
2148
+ # Direction from cell to surface
2149
+ dir_x = diff[0] / dist
2150
+ dir_y = diff[1] / dist
2151
+ dir_z = diff[2] / dist
2152
+
2153
+ # Check if surface faces the cell
2154
+ cos_recv = (normal_surf[0]*dir_x + normal_surf[1]*dir_y + normal_surf[2]*dir_z)
2155
+
2156
+ if cos_recv > 0.0:
2157
+ # Solid angle factor
2158
+ cell_cross = dx * dy
2159
+ solid_angle_factor = cell_cross / (4.0 * PI * dist_sq)
2160
+ solid_angle_factor = ti.min(solid_angle_factor, 0.25)
2161
+
2162
+ # Transmissivity through intervening canopy
2163
+ trans, blocked = ray_point_to_point_transmissivity(
2164
+ pos_cell, pos_surf,
2165
+ lad, is_solid,
2166
+ nx, ny, nz,
2167
+ dx, dy, dz,
2168
+ ext_coef
2169
+ )
2170
+
2171
+ if blocked == 0:
2172
+ contribution = scattered_power * solid_angle_factor * cos_recv * trans
2173
+ contribution /= self.surfaces.area[surf_i]
2174
+ # Add to surfins so it participates in next reflection step
2175
+ ti.atomic_add(self._surfins[surf_i], contribution)
2176
+ # Also track separately for output
2177
+ ti.atomic_add(self._surfinswpc[surf_i], contribution)
2178
+
2179
+ @ti.kernel
2180
+ def _compute_surface_outgoing_no_exchange(self):
2181
+ """Compute outgoing radiation without inter-surface exchange."""
2182
+ for i in range(self.n_surfaces):
2183
+ albedo = self.surfaces.albedo[i]
2184
+ self._surfoutsl[i] = albedo * self._surfins[i]
2185
+ self._surfoutsw[i] = self._surfoutsl[i]
2186
+
2187
+ @ti.kernel
2188
+ def _copy_final_fluxes(self):
2189
+ """Copy final computed fluxes to surface arrays."""
2190
+ for i in range(self.n_surfaces):
2191
+ self.surfaces.sw_in_direct[i] = self._surfinswdir[i]
2192
+ self.surfaces.sw_in_diffuse[i] = self._surfinswdif[i] + (
2193
+ self._surfinsw[i] - self._surfinswdir[i] - self._surfinswdif[i]
2194
+ ) + self._surfinswpc[i] # Include reflected and canopy scattered as diffuse
2195
+ self.surfaces.sw_out[i] = self._surfoutsw[i]
2196
+
2197
+ @ti.kernel
2198
+ def _compute_canopy_scattering(
2199
+ self,
2200
+ lad: ti.template(),
2201
+ is_solid: ti.template(),
2202
+ albedo_leaf: ti.f32,
2203
+ ext_coef: ti.f32
2204
+ ):
2205
+ """
2206
+ Compute radiation scattered from canopy cells toward surfaces.
2207
+
2208
+ For each canopy cell that absorbs radiation, a fraction (albedo_leaf)
2209
+ is scattered isotropically. This scattered radiation contributes to
2210
+ nearby surfaces based on solid angle and distance.
2211
+
2212
+ Following PALM's methodology:
2213
+ - pcrad = absorbed_radiation * albedo_leaf (what gets scattered)
2214
+ - Distribution based on CSF-like view factors to surfaces
2215
+
2216
+ Args:
2217
+ lad: 3D field of Leaf Area Density
2218
+ is_solid: 3D field of solid cells
2219
+ albedo_leaf: Leaf albedo (fraction scattered vs absorbed)
2220
+ ext_coef: Extinction coefficient
2221
+ """
2222
+ PI = 3.14159265359
2223
+ nx = self.domain.nx
2224
+ ny = self.domain.ny
2225
+ nz = self.domain.nz
2226
+ dx = self.domain.dx
2227
+ dy = self.domain.dy
2228
+ dz = self.domain.dz
2229
+ grid_volume = dx * dy * dz
2230
+
2231
+ # First: compute scattered radiation for each canopy cell
2232
+ for i, j, k in ti.ndrange(nx, ny, nz):
2233
+ cell_lad = lad[i, j, k]
2234
+ if cell_lad > 0.0:
2235
+ absorbed = self._pcbinsw[i, j, k] * grid_volume
2236
+ self._pcbinswref[i, j, k] = albedo_leaf * absorbed / grid_volume
2237
+
2238
+ # Second: for each canopy cell, contribute to nearby surfaces
2239
+ # Iterate over canopy cells (outer) and surfaces (inner)
2240
+ for ci, cj, ck in ti.ndrange(nx, ny, nz):
2241
+ cell_lad = lad[ci, cj, ck]
2242
+ if cell_lad <= 0.0:
2243
+ continue
2244
+
2245
+ scattered_power = self._pcbinswref[ci, cj, ck] * grid_volume
2246
+ if scattered_power <= 0.0:
2247
+ continue
2248
+
2249
+ # Cell center position
2250
+ pos_cell = Vector3(
2251
+ (ci + 0.5) * dx,
2252
+ (cj + 0.5) * dy,
2253
+ (ck + 0.5) * dz
2254
+ )
2255
+
2256
+ # Distribute to all surfaces
2257
+ for surf_i in range(self.n_surfaces):
2258
+ pos_surf = self.surfaces.center[surf_i]
2259
+ normal_surf = self.surfaces.normal[surf_i]
2260
+
2261
+ # Vector from cell to surface
2262
+ diff = pos_surf - pos_cell
2263
+ dist_sq = diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]
2264
+
2265
+ if dist_sq < 0.01:
2266
+ continue
2267
+
2268
+ dist = ti.sqrt(dist_sq)
2269
+
2270
+ # Direction from surface to cell (for checking if surface "sees" the cell)
2271
+ dir_to_cell_x = -diff[0] / dist
2272
+ dir_to_cell_y = -diff[1] / dist
2273
+ dir_to_cell_z = -diff[2] / dist
2274
+
2275
+ # Check if surface faces the cell (cell is in hemisphere surface faces)
2276
+ cos_recv = (normal_surf[0]*dir_to_cell_x +
2277
+ normal_surf[1]*dir_to_cell_y +
2278
+ normal_surf[2]*dir_to_cell_z)
2279
+
2280
+ if cos_recv > 0.0:
2281
+ # Solid angle factor
2282
+ cell_cross = dx * dy
2283
+ solid_angle_factor = cell_cross / (4.0 * PI * dist_sq)
2284
+ solid_angle_factor = ti.min(solid_angle_factor, 0.25)
2285
+
2286
+ # Transmissivity through intervening canopy
2287
+ trans, blocked = ray_point_to_point_transmissivity(
2288
+ pos_cell, pos_surf,
2289
+ lad, is_solid,
2290
+ nx, ny, nz,
2291
+ dx, dy, dz,
2292
+ ext_coef
2293
+ )
2294
+
2295
+ if blocked == 0:
2296
+ contribution = scattered_power * solid_angle_factor * cos_recv * trans
2297
+ contribution /= self.surfaces.area[surf_i]
2298
+ ti.atomic_add(self._surfinswpc[surf_i], contribution)
2299
+
2300
+ @ti.kernel
2301
+ def _compute_canopy_to_canopy_scattering(
2302
+ self,
2303
+ lad: ti.template(),
2304
+ is_solid: ti.template(),
2305
+ albedo_leaf: ti.f32,
2306
+ ext_coef: ti.f32
2307
+ ):
2308
+ """
2309
+ Compute radiation scattered from one canopy cell to another.
2310
+
2311
+ For each canopy cell that scatters radiation (pcbinswref), distribute
2312
+ that scattered radiation to neighboring canopy cells based on:
2313
+ - Solid angle (distance-based)
2314
+ - Transmissivity through intervening canopy
2315
+ - Absorption fraction in target cell
2316
+
2317
+ This implements canopy-to-canopy scattering which PALM does not
2318
+ explicitly model but is important for dense vegetation canopies.
2319
+
2320
+ The formula follows the same CSF methodology:
2321
+ pcbinswc2c[target] += scattered[source] × view_factor × trans × abs_frac
2322
+
2323
+ Args:
2324
+ lad: 3D field of Leaf Area Density
2325
+ is_solid: 3D field of solid cells
2326
+ albedo_leaf: Leaf albedo (fraction scattered vs absorbed)
2327
+ ext_coef: Extinction coefficient
2328
+ """
2329
+ PI = 3.14159265359
2330
+ nx = self.domain.nx
2331
+ ny = self.domain.ny
2332
+ nz = self.domain.nz
2333
+ dx = self.domain.dx
2334
+ dy = self.domain.dy
2335
+ dz = self.domain.dz
2336
+ grid_volume = dx * dy * dz
2337
+
2338
+ # For each source canopy cell with scattered radiation
2339
+ for si, sj, sk in ti.ndrange(nx, ny, nz):
2340
+ source_lad = lad[si, sj, sk]
2341
+ if source_lad <= 0.0:
2342
+ continue
2343
+
2344
+ scattered_power = self._pcbinswref[si, sj, sk] * grid_volume
2345
+ if scattered_power <= 0.01:
2346
+ continue
2347
+
2348
+ # Source cell center position
2349
+ pos_source = Vector3(
2350
+ (si + 0.5) * dx,
2351
+ (sj + 0.5) * dy,
2352
+ (sk + 0.5) * dz
2353
+ )
2354
+
2355
+ # Distribute to nearby canopy cells (limit search radius for efficiency)
2356
+ # Use a search radius based on typical canopy interaction distance
2357
+ search_radius_cells = 5 # Cells in each direction
2358
+
2359
+ i_min = ti.max(0, si - search_radius_cells)
2360
+ i_max = ti.min(nx, si + search_radius_cells + 1)
2361
+ j_min = ti.max(0, sj - search_radius_cells)
2362
+ j_max = ti.min(ny, sj + search_radius_cells + 1)
2363
+ k_min = ti.max(0, sk - search_radius_cells)
2364
+ k_max = ti.min(nz, sk + search_radius_cells + 1)
2365
+
2366
+ for ti_idx in range(i_min, i_max):
2367
+ for tj_idx in range(j_min, j_max):
2368
+ for tk_idx in range(k_min, k_max):
2369
+ # Skip self
2370
+ if ti_idx == si and tj_idx == sj and tk_idx == sk:
2371
+ continue
2372
+
2373
+ target_lad = lad[ti_idx, tj_idx, tk_idx]
2374
+ if target_lad <= 0.0:
2375
+ continue
2376
+
2377
+ # Target cell center position
2378
+ pos_target = Vector3(
2379
+ (ti_idx + 0.5) * dx,
2380
+ (tj_idx + 0.5) * dy,
2381
+ (tk_idx + 0.5) * dz
2382
+ )
2383
+
2384
+ # Distance between cells
2385
+ diff = pos_target - pos_source
2386
+ dist_sq = diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]
2387
+
2388
+ if dist_sq < 0.01:
2389
+ continue
2390
+
2391
+ dist = ti.sqrt(dist_sq)
2392
+
2393
+ # Solid angle factor (cell cross-section / distance²)
2394
+ # Using cell cross-sectional area perpendicular to ray
2395
+ cell_cross = dx * dy # Simplified; could use projection
2396
+ solid_angle_factor = cell_cross / (4.0 * PI * dist_sq)
2397
+ solid_angle_factor = ti.min(solid_angle_factor, 0.25)
2398
+
2399
+ # Compute transmissivity from source to target through intervening canopy
2400
+ trans, blocked = ray_point_to_point_transmissivity(
2401
+ pos_source, pos_target,
2402
+ lad, is_solid,
2403
+ nx, ny, nz,
2404
+ dx, dy, dz,
2405
+ ext_coef
2406
+ )
2407
+
2408
+ if blocked == 0 and trans > 0.01:
2409
+ # Absorption fraction in target cell
2410
+ path_in_cell = dz # Simplified path length
2411
+ abs_frac = 1.0 - ti.exp(-ext_coef * target_lad * path_in_cell)
2412
+
2413
+ # Contribution to target cell (W/m³)
2414
+ contribution = scattered_power * solid_angle_factor * trans * abs_frac / grid_volume
2415
+ ti.atomic_add(self._pcbinswc2c[ti_idx, tj_idx, tk_idx], contribution)
2416
+
2417
+ @ti.kernel
2418
+ def _accumulate_canopy_to_canopy(self, lad: ti.template(), albedo_leaf: ti.f32):
2419
+ """
2420
+ Accumulate canopy-to-canopy contribution to total canopy absorption
2421
+ and prepare for next iteration.
2422
+
2423
+ The c2c contribution is added to pcbinsw, and a fraction (albedo_leaf)
2424
+ is added to pcbinswref for the next scattering iteration.
2425
+ """
2426
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2427
+ if lad[i, j, k] > 0.0:
2428
+ c2c = self._pcbinswc2c[i, j, k]
2429
+ if c2c > 0.0:
2430
+ # Add to total absorbed
2431
+ self._pcbinsw[i, j, k] += c2c
2432
+ # Add to cumulative c2c total (for output)
2433
+ self._pcbinswc2c_total[i, j, k] += c2c
2434
+ # Fraction that gets scattered again
2435
+ self._pcbinswref[i, j, k] += albedo_leaf * c2c
2436
+ # Reset for next iteration
2437
+ self._pcbinswc2c[i, j, k] = 0.0
2438
+
2439
+ def _apply_canopy_scattering(self):
2440
+ """
2441
+ Apply initial canopy scattering from direct/diffuse absorption.
2442
+
2443
+ Note: Most canopy scattering is now handled inside the reflection loop
2444
+ via _compute_canopy_to_surface_scattering and _compute_canopy_to_canopy_scattering.
2445
+ This function handles the initial scattering that feeds into the loop.
2446
+ """
2447
+ if self.domain.lad is None:
2448
+ return
2449
+
2450
+ # Update scattered radiation based on current absorbed (from direct + diffuse)
2451
+ self._update_canopy_scattered_radiation(
2452
+ self.domain.lad,
2453
+ self.config.albedo_leaf
2454
+ )
2455
+
2456
+ @ti.kernel
2457
+ def _add_canopy_to_diffuse(self):
2458
+ """Add canopy-scattered radiation to surface diffuse component."""
2459
+ for i in range(self.n_surfaces):
2460
+ self.surfaces.sw_in_diffuse[i] += self._surfinswpc[i]
2461
+
2462
+ def compute_radiation(
2463
+ self,
2464
+ day_of_year: int,
2465
+ second_of_day: float,
2466
+ sw_direct: float,
2467
+ sw_diffuse: float
2468
+ ):
2469
+ """
2470
+ Compute shortwave radiation components.
2471
+
2472
+ Args:
2473
+ day_of_year: Day number (1-365)
2474
+ second_of_day: Seconds since midnight UTC
2475
+ sw_direct: Direct normal irradiance (W/m²)
2476
+ sw_diffuse: Diffuse horizontal irradiance (W/m²)
2477
+ """
2478
+ self.update_solar_position(day_of_year, second_of_day)
2479
+ self.compute_shortwave_radiation(sw_direct, sw_diffuse)
2480
+
2481
+ def get_surface_fluxes(self) -> dict:
2482
+ """
2483
+ Get radiation fluxes as numpy arrays.
2484
+
2485
+ Returns:
2486
+ Dictionary with flux arrays including:
2487
+ - position: Grid indices (i, j, k)
2488
+ - direction: Surface direction index
2489
+ - area: Surface area (m²)
2490
+ - svf: Sky view factor
2491
+ - shadow_factor: Shadow factor (0=sunlit, 1=shaded)
2492
+ - sw_in_direct: Direct SW radiation (W/m²)
2493
+ - sw_in_diffuse: Diffuse SW radiation including reflections (W/m²)
2494
+ - sw_out: Outgoing (reflected) SW radiation (W/m²)
2495
+ - sw_in_total: Total incoming SW (W/m²)
2496
+ - sw_net: Net absorbed SW (W/m²)
2497
+ """
2498
+ sw_in_direct = self.surfaces.sw_in_direct.to_numpy()[:self.n_surfaces]
2499
+ sw_in_diffuse = self.surfaces.sw_in_diffuse.to_numpy()[:self.n_surfaces]
2500
+ sw_out = self.surfaces.sw_out.to_numpy()[:self.n_surfaces]
2501
+ sw_in_total = sw_in_direct + sw_in_diffuse
2502
+ sw_net = sw_in_total - sw_out
2503
+
2504
+ return {
2505
+ 'position': self.surfaces.position.to_numpy()[:self.n_surfaces],
2506
+ 'direction': self.surfaces.direction.to_numpy()[:self.n_surfaces],
2507
+ 'area': self.surfaces.area.to_numpy()[:self.n_surfaces],
2508
+ 'svf': self.surfaces.svf.to_numpy()[:self.n_surfaces],
2509
+ 'shadow_factor': self.surfaces.shadow_factor.to_numpy()[:self.n_surfaces],
2510
+ 'sw_in_direct': sw_in_direct,
2511
+ 'sw_in_diffuse': sw_in_diffuse,
2512
+ 'sw_out': sw_out,
2513
+ 'sw_in_total': sw_in_total,
2514
+ 'sw_net': sw_net,
2515
+ }
2516
+
2517
+ def get_total_absorbed_sw(self) -> float:
2518
+ """Get total absorbed shortwave radiation (W)."""
2519
+ fluxes = self.get_surface_fluxes()
2520
+ return float((fluxes['sw_net'] * fluxes['area']).sum())
2521
+
2522
+ def get_domain_shadow_map(self) -> np.ndarray:
2523
+ """
2524
+ Get 2D shadow map at ground/terrain level.
2525
+
2526
+ Only includes ground surfaces (k=0), not building rooftops.
2527
+ Building footprints are marked with NaN.
2528
+
2529
+ Returns:
2530
+ 2D array of shadow factors (0=shadowed, 1=sunlit, NaN=building)
2531
+ """
2532
+ shadow_map = np.full((self.domain.nx, self.domain.ny), np.nan)
2533
+ fluxes = self.get_surface_fluxes()
2534
+
2535
+ # Find upward-facing surfaces at ground level only (k=0)
2536
+ for i in range(self.n_surfaces):
2537
+ if fluxes['direction'][i] == 0: # Upward
2538
+ pos = fluxes['position'][i]
2539
+ ix = int(pos[0]) # Position is already grid indices
2540
+ iy = int(pos[1])
2541
+ iz = int(pos[2])
2542
+ if 0 <= ix < self.domain.nx and 0 <= iy < self.domain.ny:
2543
+ # Only include ground-level surfaces (terrain at k=0)
2544
+ if iz == 0:
2545
+ # Invert: shadow_factor=0 means sunlit, =1 means shaded
2546
+ # For display we want 1=sunlit, 0=shaded
2547
+ shadow_map[ix, iy] = 1.0 - fluxes['shadow_factor'][i]
2548
+
2549
+ return shadow_map
2550
+
2551
+ def get_irradiance_map(self) -> np.ndarray:
2552
+ """
2553
+ Get 2D map of total incoming shortwave irradiance at ground level.
2554
+
2555
+ Returns:
2556
+ 2D array of irradiance values (W/m²), NaN for building footprints
2557
+ """
2558
+ irradiance_map = np.full((self.domain.nx, self.domain.ny), np.nan)
2559
+ fluxes = self.get_surface_fluxes()
2560
+
2561
+ # Find upward-facing surfaces at ground level only (k=0)
2562
+ for i in range(self.n_surfaces):
2563
+ if fluxes['direction'][i] == 0: # Upward
2564
+ pos = fluxes['position'][i]
2565
+ ix = int(pos[0])
2566
+ iy = int(pos[1])
2567
+ iz = int(pos[2])
2568
+ if 0 <= ix < self.domain.nx and 0 <= iy < self.domain.ny:
2569
+ if iz == 0:
2570
+ # Total incoming = direct + diffuse
2571
+ sw_in = fluxes['sw_in_direct'][i] + fluxes['sw_in_diffuse'][i]
2572
+ irradiance_map[ix, iy] = sw_in
2573
+
2574
+ return irradiance_map
2575
+
2576
+ def get_net_sw_radiation_map(self) -> np.ndarray:
2577
+ """
2578
+ Get 2D map of net shortwave radiation at ground level.
2579
+
2580
+ Returns:
2581
+ 2D array of net SW radiation values (W/m²), NaN for building footprints
2582
+ """
2583
+ net_map = np.full((self.domain.nx, self.domain.ny), np.nan)
2584
+ fluxes = self.get_surface_fluxes()
2585
+
2586
+ for i in range(self.n_surfaces):
2587
+ if fluxes['direction'][i] == 0: # Upward
2588
+ pos = fluxes['position'][i]
2589
+ ix = int(pos[0])
2590
+ iy = int(pos[1])
2591
+ iz = int(pos[2])
2592
+ if 0 <= ix < self.domain.nx and 0 <= iy < self.domain.ny:
2593
+ if iz == 0:
2594
+ sw_in = fluxes['sw_in_direct'][i] + fluxes['sw_in_diffuse'][i]
2595
+ sw_out = fluxes['sw_out'][i]
2596
+ net_map[ix, iy] = sw_in - sw_out
2597
+
2598
+ return net_map
2599
+
2600
+ # =========================================================================
2601
+ # Volumetric flux methods
2602
+ # =========================================================================
2603
+
2604
+ def compute_volumetric_svf(self):
2605
+ """
2606
+ Compute volumetric sky view factors.
2607
+
2608
+ This must be called before computing volumetric SW fluxes.
2609
+ Only needed once unless domain geometry changes.
2610
+
2611
+ Raises:
2612
+ RuntimeError: If volumetric_flux is not enabled in config
2613
+ """
2614
+ if self.volumetric_calc is None:
2615
+ raise RuntimeError(
2616
+ "Volumetric flux not enabled. Set volumetric_flux=True in RadiationConfig."
2617
+ )
2618
+ self.volumetric_calc.compute_skyvf_vol()
2619
+
2620
+ def get_volumetric_skyvf(self) -> np.ndarray:
2621
+ """
2622
+ Get volumetric sky view factor as 3D numpy array.
2623
+
2624
+ Returns:
2625
+ 3D array of shape (nx, ny, nz) with SVF values [0, 1]
2626
+
2627
+ Raises:
2628
+ RuntimeError: If volumetric_flux is not enabled
2629
+ """
2630
+ if self.volumetric_calc is None:
2631
+ raise RuntimeError(
2632
+ "Volumetric flux not enabled. Set volumetric_flux=True in RadiationConfig."
2633
+ )
2634
+ return self.volumetric_calc.get_skyvf_vol()
2635
+
2636
+ def get_volumetric_swflux(self) -> np.ndarray:
2637
+ """
2638
+ Get volumetric shortwave flux as 3D numpy array.
2639
+
2640
+ This is the omnidirectional SW flux at each grid cell,
2641
+ representing average irradiance onto an imaginary sphere (W/m²).
2642
+
2643
+ Returns:
2644
+ 3D array of shape (nx, ny, nz) with SW flux values
2645
+
2646
+ Raises:
2647
+ RuntimeError: If volumetric_flux is not enabled
2648
+ """
2649
+ if self.volumetric_calc is None:
2650
+ raise RuntimeError(
2651
+ "Volumetric flux not enabled. Set volumetric_flux=True in RadiationConfig."
2652
+ )
2653
+ return self.volumetric_calc.get_swflux_vol()
2654
+
2655
+ def get_volumetric_shadow_top(self) -> np.ndarray:
2656
+ """
2657
+ Get shadow top level as 2D numpy array.
2658
+
2659
+ Shadow top is the highest grid level that is in shadow
2660
+ for the current solar position.
2661
+
2662
+ Returns:
2663
+ 2D array of shape (nx, ny) with vertical indices
2664
+
2665
+ Raises:
2666
+ RuntimeError: If volumetric_flux is not enabled
2667
+ """
2668
+ if self.volumetric_calc is None:
2669
+ raise RuntimeError(
2670
+ "Volumetric flux not enabled. Set volumetric_flux=True in RadiationConfig."
2671
+ )
2672
+ return self.volumetric_calc.get_shadow_top()
2673
+
2674
+ def get_volumetric_shadow_mask(self) -> np.ndarray:
2675
+ """
2676
+ Get 3D shadow mask.
2677
+
2678
+ Returns:
2679
+ 3D boolean array where True indicates shadowed cells
2680
+
2681
+ Raises:
2682
+ RuntimeError: If volumetric_flux is not enabled
2683
+ """
2684
+ if self.volumetric_calc is None:
2685
+ raise RuntimeError(
2686
+ "Volumetric flux not enabled. Set volumetric_flux=True in RadiationConfig."
2687
+ )
2688
+ return self.volumetric_calc.get_shadow_mask_3d()
2689
+
2690
+ def get_volumetric_swflux_slice(
2691
+ self,
2692
+ level: Optional[int] = None,
2693
+ axis: Optional[str] = None,
2694
+ index: Optional[int] = None
2695
+ ) -> np.ndarray:
2696
+ """
2697
+ Get a 2D slice of volumetric SW flux.
2698
+
2699
+ Args:
2700
+ level: Vertical level for horizontal slice (k index)
2701
+ axis: 'x' or 'y' for vertical slices
2702
+ index: Index along the axis for vertical slices
2703
+
2704
+ Returns:
2705
+ 2D array of SW flux values (W/m²)
2706
+
2707
+ Raises:
2708
+ RuntimeError: If volumetric_flux is not enabled
2709
+ ValueError: If invalid slice parameters
2710
+ """
2711
+ if self.volumetric_calc is None:
2712
+ raise RuntimeError(
2713
+ "Volumetric flux not enabled. Set volumetric_flux=True in RadiationConfig."
2714
+ )
2715
+
2716
+ if level is not None:
2717
+ return self.volumetric_calc.get_horizontal_slice(level, 'swflux')
2718
+ elif axis is not None and index is not None:
2719
+ return self.volumetric_calc.get_vertical_slice(axis, index, 'swflux')
2720
+ else:
2721
+ raise ValueError("Specify either 'level' or both 'axis' and 'index'")
2722
+
2723
+ # =========================================================================
2724
+ # Plant Canopy Radiation Methods
2725
+ # =========================================================================
2726
+
2727
+ def _compute_canopy_radiation(
2728
+ self,
2729
+ sw_direct: float,
2730
+ sw_diffuse: float,
2731
+ sun_dir: Vector3,
2732
+ cos_zenith: float
2733
+ ):
2734
+ """
2735
+ Compute radiation absorption in plant canopy using CSF.
2736
+
2737
+ This implements PALM's plant canopy radiation balance:
2738
+ - pcbinswdir: Direct SW absorbed per canopy cell (W/m³)
2739
+ - pcbinswdif: Diffuse SW absorbed per canopy cell (W/m³)
2740
+ - pcbinsw: Total SW absorbed (W/m³) - includes direct, diffuse, AND reflected
2741
+
2742
+ Note: pcbinsw may already contain reflection-step contributions from
2743
+ _compute_sw_fluxes. This method ADDS direct and diffuse to pcbinsw
2744
+ rather than resetting it.
2745
+
2746
+ Args:
2747
+ sw_direct: Direct normal irradiance (W/m²)
2748
+ sw_diffuse: Diffuse horizontal irradiance (W/m²)
2749
+ sun_dir: Sun direction vector
2750
+ cos_zenith: Cosine of solar zenith angle
2751
+ """
2752
+ # Reset CSF calculator (not the _pcbinsw - it may have reflection contributions)
2753
+ self.csf_calc.reset_csf()
2754
+
2755
+ # Reset only direct and diffuse arrays, NOT total pcbinsw
2756
+ self._reset_canopy_dir_dif_arrays()
2757
+
2758
+ grid_volume = self.domain.dx * self.domain.dy * self.domain.dz
2759
+
2760
+ # Compute direct SW absorption if sun is up
2761
+ # Use PALM's method: box_absorb + dsitransc + per-box absorption
2762
+ if cos_zenith > 0.0262: # min_stable_coszen
2763
+ self.csf_calc.compute_canopy_absorption_direct_palm(
2764
+ sun_dir,
2765
+ self.domain.is_solid,
2766
+ self.domain.lad,
2767
+ sw_direct
2768
+ )
2769
+
2770
+ # CSF now contains absorption in W/m³, copy to pcbinswdir
2771
+ self._copy_csf_to_pcbinswdir_direct()
2772
+
2773
+ # Compute diffuse SW absorption using PALM's method
2774
+ # Traces rays from each canopy cell to sky hemisphere (not from surfaces)
2775
+ self.csf_calc.compute_canopy_absorption_diffuse_palm(
2776
+ self.domain.is_solid,
2777
+ self.domain.lad,
2778
+ sw_diffuse,
2779
+ self._pcbinswdif,
2780
+ self.config.n_azimuth,
2781
+ self.config.n_elevation
2782
+ )
2783
+
2784
+ # Total absorbed = direct + diffuse + reflected (already in pcbinsw from reflections)
2785
+ self._sum_canopy_absorption()
2786
+
2787
+ # Compute received radiation (before absorption)
2788
+ self._compute_received_radiation(sw_direct, sw_diffuse, cos_zenith, grid_volume)
2789
+
2790
+ self._canopy_radiation_computed = True
2791
+
2792
+ def _compute_canopy_radiation_initial(
2793
+ self,
2794
+ sw_direct: float,
2795
+ sw_diffuse: float,
2796
+ sun_dir, # Vector3
2797
+ cos_zenith: float
2798
+ ):
2799
+ """
2800
+ Compute initial canopy radiation absorption (direct + diffuse) BEFORE reflection loop.
2801
+
2802
+ This is called before the surface reflection loop so that canopy-scattered
2803
+ radiation can participate in reflections (enabling paths like C→S→C, S→C→S).
2804
+
2805
+ After this, _update_canopy_scattered_radiation should be called to prepare
2806
+ pcbinswref for the reflection loop.
2807
+
2808
+ Args:
2809
+ sw_direct: Direct normal irradiance (W/m²)
2810
+ sw_diffuse: Diffuse horizontal irradiance (W/m²)
2811
+ sun_dir: Sun direction vector
2812
+ cos_zenith: Cosine of solar zenith angle
2813
+ """
2814
+ # Reset CSF calculator and all canopy arrays
2815
+ self.csf_calc.reset_csf()
2816
+ self._reset_canopy_arrays()
2817
+
2818
+ # Compute direct SW absorption if sun is up
2819
+ if cos_zenith > 0.0262: # min_stable_coszen
2820
+ self.csf_calc.compute_canopy_absorption_direct_palm(
2821
+ sun_dir,
2822
+ self.domain.is_solid,
2823
+ self.domain.lad,
2824
+ sw_direct
2825
+ )
2826
+ # CSF now contains absorption in W/m³, copy to pcbinswdir
2827
+ self._copy_csf_to_pcbinswdir_direct()
2828
+
2829
+ # Compute diffuse SW absorption using PALM's method
2830
+ self.csf_calc.compute_canopy_absorption_diffuse_palm(
2831
+ self.domain.is_solid,
2832
+ self.domain.lad,
2833
+ sw_diffuse,
2834
+ self._pcbinswdif,
2835
+ self.config.n_azimuth,
2836
+ self.config.n_elevation
2837
+ )
2838
+
2839
+ # Total initial absorbed = direct + diffuse
2840
+ self._sum_canopy_absorption_initial()
2841
+
2842
+ @ti.kernel
2843
+ def _sum_canopy_absorption_initial(self):
2844
+ """Sum direct + diffuse for initial canopy absorption (resets pcbinsw first)."""
2845
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2846
+ self._pcbinsw[i, j, k] = self._pcbinswdir[i, j, k] + self._pcbinswdif[i, j, k]
2847
+
2848
+ @ti.kernel
2849
+ def _reset_canopy_dir_dif_arrays(self):
2850
+ """Reset direct and diffuse canopy absorption arrays to zero (not total)."""
2851
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2852
+ self._pcbinswdir[i, j, k] = 0.0
2853
+ self._pcbinswdif[i, j, k] = 0.0
2854
+
2855
+ @ti.kernel
2856
+ def _reset_canopy_arrays(self):
2857
+ """Reset all canopy absorption arrays to zero."""
2858
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2859
+ self._pcbinsw[i, j, k] = 0.0
2860
+ self._pcbinsw[i, j, k] = 0.0
2861
+ self._pcbinswdir[i, j, k] = 0.0
2862
+ self._pcbinswdif[i, j, k] = 0.0
2863
+ self._pcinsw[i, j, k] = 0.0
2864
+ self._pcinswdir[i, j, k] = 0.0
2865
+ self._pcinswdif[i, j, k] = 0.0
2866
+
2867
+ @ti.kernel
2868
+ def _copy_csf_to_pcbinswdir(self, grid_volume: ti.f32):
2869
+ """Copy CSF field to direct absorption array, converting to W/m³."""
2870
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2871
+ # CSF contains absorbed power in W, convert to W/m³
2872
+ self._pcbinswdir[i, j, k] = self.csf_calc.csf[i, j, k] / grid_volume
2873
+
2874
+ @ti.kernel
2875
+ def _copy_csf_to_pcbinswdir_direct(self):
2876
+ """Copy CSF field to direct absorption array (CSF already in W/m³ from PALM method)."""
2877
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2878
+ self._pcbinswdir[i, j, k] = self.csf_calc.csf[i, j, k]
2879
+
2880
+ @ti.kernel
2881
+ def _copy_csf_to_pcbinswdif(self, grid_volume: ti.f32):
2882
+ """Copy CSF field to diffuse absorption array, converting to W/m³."""
2883
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2884
+ self._pcbinswdif[i, j, k] = self.csf_calc.csf[i, j, k] / grid_volume
2885
+
2886
+ @ti.kernel
2887
+ def _sum_canopy_absorption(self):
2888
+ """Add direct and diffuse to total canopy absorption.
2889
+
2890
+ Note: pcbinsw may already contain reflection contributions from
2891
+ _accumulate_canopy_absorption_from_reflections. This method ADDS
2892
+ direct + diffuse rather than replacing.
2893
+ """
2894
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2895
+ # Add direct + diffuse to existing value (which may have reflection contributions)
2896
+ self._pcbinsw[i, j, k] += self._pcbinswdir[i, j, k] + self._pcbinswdif[i, j, k]
2897
+
2898
+ @ti.kernel
2899
+ def _compute_received_radiation(
2900
+ self,
2901
+ sw_direct: ti.f32,
2902
+ sw_diffuse: ti.f32,
2903
+ cos_zenith: ti.f32,
2904
+ grid_volume: ti.f32
2905
+ ):
2906
+ """
2907
+ Compute received (incident) radiation at each canopy cell.
2908
+
2909
+ This is the radiation before absorption, useful for photosynthesis models.
2910
+ """
2911
+ for i, j, k in ti.ndrange(self.domain.nx, self.domain.ny, self.domain.nz):
2912
+ lad = self.domain.lad[i, j, k]
2913
+ if lad > 0.0:
2914
+ # Leaf area in this cell
2915
+ leaf_area = lad * grid_volume
2916
+
2917
+ # Received = absorbed / absorption_fraction
2918
+ # For small absorption: received ≈ absorbed / (ext_coef * LAD * path_length)
2919
+ # Simplified: use absorbed * 2 as rough estimate (50% absorption typical)
2920
+ if self._pcbinswdir[i, j, k] > 0:
2921
+ self._pcinswdir[i, j, k] = self._pcbinswdir[i, j, k] * grid_volume / leaf_area
2922
+ if self._pcbinswdif[i, j, k] > 0:
2923
+ self._pcinswdif[i, j, k] = self._pcbinswdif[i, j, k] * grid_volume / leaf_area
2924
+
2925
+ self._pcinsw[i, j, k] = self._pcinswdir[i, j, k] + self._pcinswdif[i, j, k]
2926
+
2927
+ def get_canopy_absorbed_sw(self) -> np.ndarray:
2928
+ """
2929
+ Get total absorbed SW radiation in plant canopy.
2930
+
2931
+ Returns:
2932
+ 3D array of absorbed SW radiation (W/m³)
2933
+ """
2934
+ return self._pcbinsw.to_numpy()
2935
+
2936
+ def get_canopy_absorbed_sw_direct(self) -> np.ndarray:
2937
+ """
2938
+ Get direct SW radiation absorbed in plant canopy.
2939
+
2940
+ Returns:
2941
+ 3D array of absorbed direct SW radiation (W/m³)
2942
+ """
2943
+ return self._pcbinswdir.to_numpy()
2944
+
2945
+ def get_canopy_absorbed_sw_diffuse(self) -> np.ndarray:
2946
+ """
2947
+ Get diffuse SW radiation absorbed in plant canopy.
2948
+
2949
+ Returns:
2950
+ 3D array of absorbed diffuse SW radiation (W/m³)
2951
+ """
2952
+ return self._pcbinswdif.to_numpy()
2953
+
2954
+ def get_canopy_received_sw(self) -> np.ndarray:
2955
+ """
2956
+ Get total received SW radiation at plant canopy (before absorption).
2957
+
2958
+ Returns:
2959
+ 3D array of received SW radiation (W/m²)
2960
+ """
2961
+ return self._pcinsw.to_numpy()
2962
+
2963
+ def get_total_canopy_absorption(self) -> float:
2964
+ """
2965
+ Get total SW radiation absorbed by all plant canopy (W).
2966
+
2967
+
2968
+ Returns:
2969
+ Total absorbed power in Watts
2970
+ """
2971
+ grid_volume = self.domain.dx * self.domain.dy * self.domain.dz
2972
+ return float(self._pcbinsw.to_numpy().sum() * grid_volume)
2973
+
2974
+ def get_canopy_absorption_profile(self) -> np.ndarray:
2975
+ """
2976
+ Get vertical profile of canopy-averaged SW absorption.
2977
+
2978
+ Returns:
2979
+ 1D array of mean absorbed SW per level (W/m³)
2980
+ """
2981
+ pcbinsw = self._pcbinsw.to_numpy()
2982
+ # Mean over horizontal dimensions, excluding zero cells
2983
+ profile = np.zeros(self.domain.nz)
2984
+ for k in range(self.domain.nz):
2985
+ layer = pcbinsw[:, :, k]
2986
+ nonzero = layer[layer > 0]
2987
+ if len(nonzero) > 0:
2988
+ profile[k] = nonzero.mean()
2989
+ return profile
2990
+
2991
+ def get_canopy_scattered_sw(self) -> np.ndarray:
2992
+ """
2993
+ Get scattered (reflected) SW radiation from canopy cells.
2994
+
2995
+ Returns:
2996
+ 3D array of scattered SW flux (W/m³) indexed by (i, j, k)
2997
+ """
2998
+ return self._pcbinswref.to_numpy()
2999
+
3000
+ def get_canopy_to_canopy_sw(self) -> np.ndarray:
3001
+ """
3002
+ Get SW radiation received from other canopy cells (canopy-to-canopy scattering).
3003
+
3004
+ This is radiation scattered by one canopy cell and absorbed by another.
3005
+ Only non-zero if canopy_to_canopy=True in config.
3006
+
3007
+ Returns:
3008
+ 3D array of canopy-to-canopy SW flux (W/m³) indexed by (i, j, k)
3009
+ """
3010
+ return self._pcbinswc2c_total.to_numpy()
3011
+
3012
+ def get_surface_sw_from_canopy(self) -> np.ndarray:
3013
+ """
3014
+ Get SW radiation received by surfaces from canopy scattering.
3015
+
3016
+ Returns:
3017
+ 1D array of SW from canopy (W/m²) per surface element
3018
+ """
3019
+ return self._surfinswpc.to_numpy()