voxcity 0.6.26__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. voxcity/__init__.py +14 -8
  2. voxcity/downloader/__init__.py +2 -1
  3. voxcity/downloader/gba.py +210 -0
  4. voxcity/downloader/gee.py +5 -1
  5. voxcity/downloader/mbfp.py +1 -1
  6. voxcity/downloader/oemj.py +80 -8
  7. voxcity/downloader/utils.py +73 -73
  8. voxcity/errors.py +30 -0
  9. voxcity/exporter/__init__.py +13 -5
  10. voxcity/exporter/cityles.py +633 -538
  11. voxcity/exporter/envimet.py +728 -708
  12. voxcity/exporter/magicavoxel.py +334 -297
  13. voxcity/exporter/netcdf.py +238 -211
  14. voxcity/exporter/obj.py +1481 -1406
  15. voxcity/generator/__init__.py +44 -0
  16. voxcity/generator/api.py +675 -0
  17. voxcity/generator/grids.py +379 -0
  18. voxcity/generator/io.py +94 -0
  19. voxcity/generator/pipeline.py +282 -0
  20. voxcity/generator/voxelizer.py +380 -0
  21. voxcity/geoprocessor/__init__.py +75 -6
  22. voxcity/geoprocessor/conversion.py +153 -0
  23. voxcity/geoprocessor/draw.py +62 -12
  24. voxcity/geoprocessor/heights.py +199 -0
  25. voxcity/geoprocessor/io.py +101 -0
  26. voxcity/geoprocessor/merge_utils.py +91 -0
  27. voxcity/geoprocessor/mesh.py +806 -790
  28. voxcity/geoprocessor/network.py +708 -679
  29. voxcity/geoprocessor/overlap.py +84 -0
  30. voxcity/geoprocessor/raster/__init__.py +82 -0
  31. voxcity/geoprocessor/raster/buildings.py +428 -0
  32. voxcity/geoprocessor/raster/canopy.py +258 -0
  33. voxcity/geoprocessor/raster/core.py +150 -0
  34. voxcity/geoprocessor/raster/export.py +93 -0
  35. voxcity/geoprocessor/raster/landcover.py +156 -0
  36. voxcity/geoprocessor/raster/raster.py +110 -0
  37. voxcity/geoprocessor/selection.py +85 -0
  38. voxcity/geoprocessor/utils.py +18 -14
  39. voxcity/models.py +113 -0
  40. voxcity/simulator/common/__init__.py +22 -0
  41. voxcity/simulator/common/geometry.py +98 -0
  42. voxcity/simulator/common/raytracing.py +450 -0
  43. voxcity/simulator/solar/__init__.py +43 -0
  44. voxcity/simulator/solar/integration.py +336 -0
  45. voxcity/simulator/solar/kernels.py +62 -0
  46. voxcity/simulator/solar/radiation.py +648 -0
  47. voxcity/simulator/solar/temporal.py +434 -0
  48. voxcity/simulator/view.py +36 -2286
  49. voxcity/simulator/visibility/__init__.py +29 -0
  50. voxcity/simulator/visibility/landmark.py +392 -0
  51. voxcity/simulator/visibility/view.py +508 -0
  52. voxcity/utils/logging.py +61 -0
  53. voxcity/utils/orientation.py +51 -0
  54. voxcity/utils/weather/__init__.py +26 -0
  55. voxcity/utils/weather/epw.py +146 -0
  56. voxcity/utils/weather/files.py +36 -0
  57. voxcity/utils/weather/onebuilding.py +486 -0
  58. voxcity/visualizer/__init__.py +24 -0
  59. voxcity/visualizer/builder.py +43 -0
  60. voxcity/visualizer/grids.py +141 -0
  61. voxcity/visualizer/maps.py +187 -0
  62. voxcity/visualizer/palette.py +228 -0
  63. voxcity/visualizer/renderer.py +928 -0
  64. {voxcity-0.6.26.dist-info → voxcity-0.7.0.dist-info}/METADATA +107 -34
  65. voxcity-0.7.0.dist-info/RECORD +77 -0
  66. voxcity/generator.py +0 -1302
  67. voxcity/geoprocessor/grid.py +0 -1739
  68. voxcity/geoprocessor/polygon.py +0 -1344
  69. voxcity/simulator/solar.py +0 -2339
  70. voxcity/utils/visualization.py +0 -2849
  71. voxcity/utils/weather.py +0 -1038
  72. voxcity-0.6.26.dist-info/RECORD +0 -38
  73. {voxcity-0.6.26.dist-info → voxcity-0.7.0.dist-info}/WHEEL +0 -0
  74. {voxcity-0.6.26.dist-info → voxcity-0.7.0.dist-info}/licenses/AUTHORS.rst +0 -0
  75. {voxcity-0.6.26.dist-info → voxcity-0.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,1344 +0,0 @@
1
- """
2
- Module for handling GeoJSON data related to building footprints and heights.
3
-
4
- This module provides functionality for loading, filtering, transforming and saving GeoJSON data,
5
- with a focus on building footprints and their height information. It includes functions for
6
- coordinate transformations, spatial filtering, and height data extraction from various sources.
7
- """
8
-
9
- # Required imports for GIS operations, data manipulation and file handling
10
- import geopandas as gpd
11
- import json
12
- from shapely.geometry import Polygon, Point, shape
13
- from shapely.errors import GEOSException, ShapelyError
14
- import pandas as pd
15
- import numpy as np
16
- import gzip
17
- from typing import List, Dict
18
- from pyproj import Transformer, CRS
19
- import rasterio
20
- from rasterio.mask import mask
21
- import copy
22
- from rtree import index
23
-
24
- from .utils import validate_polygon_coordinates
25
-
26
- def filter_and_convert_gdf_to_geojson(gdf, rectangle_vertices):
27
- """
28
- Filter a GeoDataFrame by a bounding rectangle and convert to GeoJSON format.
29
-
30
- This function performs spatial filtering on a GeoDataFrame using a bounding rectangle,
31
- and converts the filtered data to GeoJSON format. It handles both Polygon and MultiPolygon
32
- geometries, splitting MultiPolygons into separate Polygon features.
33
-
34
- Args:
35
- gdf (GeoDataFrame): Input GeoDataFrame containing building data
36
- Must have 'geometry' and 'height' columns
37
- Any CRS is accepted, will be converted to WGS84 if needed
38
- rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
39
- Must be in WGS84 (EPSG:4326) coordinate system
40
- Must form a valid rectangle (4 vertices, clockwise or counterclockwise)
41
-
42
- Returns:
43
- list: List of GeoJSON features within the bounding rectangle
44
- Each feature contains:
45
- - geometry: Polygon coordinates in WGS84
46
- - properties: Dictionary with 'height', 'confidence', and 'id'
47
- - type: Always "Feature"
48
-
49
- Memory Optimization:
50
- - Uses spatial indexing for efficient filtering
51
- - Downcasts numeric columns to save memory
52
- - Cleans up intermediate data structures
53
- - Splits MultiPolygons into separate features
54
- """
55
- # Reproject to WGS84 if necessary for consistent coordinate system
56
- if gdf.crs != 'EPSG:4326':
57
- gdf = gdf.to_crs(epsg=4326)
58
-
59
- # Downcast 'height' to float32 to save memory
60
- gdf['height'] = pd.to_numeric(gdf['height'], downcast='float')
61
-
62
- # Add 'confidence' column with default value for height reliability
63
- gdf['confidence'] = -1.0
64
-
65
- # Create shapely polygon from rectangle vertices for spatial filtering
66
- rectangle_polygon = Polygon(rectangle_vertices)
67
-
68
- # Use spatial index to efficiently filter geometries that intersect with rectangle
69
- gdf.sindex # Ensure spatial index is built
70
- possible_matches_index = list(gdf.sindex.intersection(rectangle_polygon.bounds))
71
- possible_matches = gdf.iloc[possible_matches_index]
72
- precise_matches = possible_matches[possible_matches.intersects(rectangle_polygon)]
73
- filtered_gdf = precise_matches.copy()
74
-
75
- # Delete intermediate data to save memory
76
- del gdf, possible_matches, precise_matches
77
-
78
- # Create GeoJSON features from filtered geometries
79
- features = []
80
- feature_id = 1
81
- for idx, row in filtered_gdf.iterrows():
82
- geom = row['geometry'].__geo_interface__
83
- properties = {
84
- 'height': row['height'],
85
- 'confidence': row['confidence'],
86
- 'id': feature_id
87
- }
88
-
89
- # Handle MultiPolygon by splitting into separate Polygon features
90
- if geom['type'] == 'MultiPolygon':
91
- for polygon_coords in geom['coordinates']:
92
- single_geom = {
93
- 'type': 'Polygon',
94
- 'coordinates': polygon_coords
95
- }
96
- feature = {
97
- 'type': 'Feature',
98
- 'properties': properties.copy(), # Use copy to avoid shared references
99
- 'geometry': single_geom
100
- }
101
- features.append(feature)
102
- feature_id += 1
103
- elif geom['type'] == 'Polygon':
104
- feature = {
105
- 'type': 'Feature',
106
- 'properties': properties,
107
- 'geometry': geom
108
- }
109
- features.append(feature)
110
- feature_id += 1
111
- else:
112
- pass # Skip other geometry types
113
-
114
- # Create a FeatureCollection
115
- geojson = {
116
- 'type': 'FeatureCollection',
117
- 'features': features
118
- }
119
-
120
- # Clean up memory
121
- del filtered_gdf, features
122
-
123
- return geojson["features"]
124
-
125
- def get_geojson_from_gpkg(gpkg_path, rectangle_vertices):
126
- """
127
- Read a GeoPackage file and convert it to GeoJSON format within a bounding rectangle.
128
-
129
- Args:
130
- gpkg_path (str): Path to the GeoPackage file
131
- rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
132
-
133
- Returns:
134
- list: List of GeoJSON features within the bounding rectangle
135
- """
136
- # Open and read the GPKG file
137
- print(f"Opening GPKG file: {gpkg_path}")
138
- gdf = gpd.read_file(gpkg_path)
139
- geojson = filter_and_convert_gdf_to_geojson(gdf, rectangle_vertices)
140
- return geojson
141
-
142
- def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
143
- """
144
- Extract building heights from one GeoDataFrame and apply them to another based on spatial overlap.
145
-
146
- This function transfers height information from a reference GeoDataFrame to a primary GeoDataFrame
147
- based on the spatial overlap between building footprints. For each building in the primary dataset
148
- that needs height data, it calculates a weighted average height from overlapping buildings in the
149
- reference dataset.
150
-
151
- Args:
152
- gdf_0 (gpd.GeoDataFrame): Primary GeoDataFrame to update with heights
153
- Must have 'geometry' column with building footprints
154
- Will be updated with height values where missing or zero
155
- gdf_1 (gpd.GeoDataFrame): Reference GeoDataFrame containing height data
156
- Must have 'geometry' column with building footprints
157
- Must have 'height' column with valid height values
158
-
159
- Returns:
160
- gpd.GeoDataFrame: Updated primary GeoDataFrame with extracted heights
161
- Buildings with overlapping reference data get weighted average heights
162
- Buildings without overlapping data retain original height or get NaN
163
-
164
- Statistics Tracked:
165
- - count_0: Number of buildings without height in primary dataset
166
- - count_1: Number of buildings successfully updated with height
167
- - count_2: Number of buildings where no reference height data found
168
-
169
- Note:
170
- - Uses R-tree spatial indexing for efficient overlap detection
171
- - Handles invalid geometries by attempting to fix them with buffer(0)
172
- - Weighted average is based on the area of overlap between buildings
173
- """
174
- # Make a copy of input GeoDataFrame to avoid modifying original
175
- gdf_primary = gdf_0.copy()
176
- gdf_ref = gdf_1.copy()
177
-
178
- # Make sure height columns exist with default values
179
- if 'height' not in gdf_primary.columns:
180
- gdf_primary['height'] = 0.0
181
- if 'height' not in gdf_ref.columns:
182
- gdf_ref['height'] = 0.0
183
-
184
- # Initialize counters for statistics
185
- count_0 = 0 # Buildings without height
186
- count_1 = 0 # Buildings updated with height
187
- count_2 = 0 # Buildings with no height data found
188
-
189
- # Create spatial index for reference buildings to speed up intersection tests
190
- spatial_index = index.Index()
191
- for i, geom in enumerate(gdf_ref.geometry):
192
- if geom.is_valid:
193
- spatial_index.insert(i, geom.bounds)
194
-
195
- # Process each building in primary dataset that needs height data
196
- for idx_primary, row in gdf_primary.iterrows():
197
- if row['height'] <= 0 or pd.isna(row['height']):
198
- count_0 += 1
199
- geom = row.geometry
200
-
201
- # Variables for weighted average height calculation
202
- overlapping_height_area = 0 # Sum of (height * overlap_area)
203
- overlapping_area = 0 # Total overlap area
204
-
205
- # Get potential intersecting buildings using spatial index
206
- potential_matches = list(spatial_index.intersection(geom.bounds))
207
-
208
- # Check intersections with reference buildings
209
- for ref_idx in potential_matches:
210
- if ref_idx >= len(gdf_ref):
211
- continue
212
-
213
- ref_row = gdf_ref.iloc[ref_idx]
214
- try:
215
- # Calculate intersection if geometries overlap
216
- if geom.intersects(ref_row.geometry):
217
- overlap_area = geom.intersection(ref_row.geometry).area
218
- overlapping_height_area += ref_row['height'] * overlap_area
219
- overlapping_area += overlap_area
220
- except GEOSException:
221
- # Try to fix invalid geometries using buffer(0)
222
- try:
223
- fixed_ref_geom = ref_row.geometry.buffer(0)
224
- if geom.intersects(fixed_ref_geom):
225
- overlap_area = geom.intersection(fixed_ref_geom).area
226
- overlapping_height_area += ref_row['height'] * overlap_area
227
- overlapping_area += overlap_area
228
- except Exception:
229
- print(f"Failed to fix polygon")
230
- continue
231
-
232
- # Update height if overlapping buildings found
233
- if overlapping_height_area > 0:
234
- count_1 += 1
235
- # Calculate weighted average height based on overlap areas
236
- new_height = overlapping_height_area / overlapping_area
237
- gdf_primary.at[idx_primary, 'height'] = new_height
238
- else:
239
- count_2 += 1
240
- gdf_primary.at[idx_primary, 'height'] = np.nan
241
-
242
- # Print statistics about height updates
243
- if count_0 > 0:
244
- print(f"For {count_1} of these building footprints without height, values from the complementary source were assigned.")
245
- print(f"For {count_2} of these building footprints without height, no data exist in complementary data.")
246
-
247
- return gdf_primary
248
-
249
- # from typing import List, Dict
250
- # from shapely.geometry import shape
251
- # from shapely.errors import GEOSException
252
- # import numpy as np
253
-
254
- # def complement_building_heights_from_geojson(geojson_data_0: List[Dict], geojson_data_1: List[Dict]) -> List[Dict]:
255
- # """
256
- # Complement building heights in one GeoJSON dataset with data from another and add non-intersecting buildings.
257
-
258
- # Args:
259
- # geojson_data_0 (List[Dict]): Primary GeoJSON features to update with heights
260
- # geojson_data_1 (List[Dict]): Reference GeoJSON features containing height data
261
-
262
- # Returns:
263
- # List[Dict]: Updated GeoJSON features with complemented heights and additional buildings
264
- # """
265
- # # Convert primary dataset to Shapely polygons for intersection checking
266
- # existing_buildings = []
267
- # for feature in geojson_data_0:
268
- # geom = shape(feature['geometry'])
269
- # existing_buildings.append(geom)
270
-
271
- # # Convert reference dataset to Shapely polygons with height info
272
- # reference_buildings = []
273
- # for feature in geojson_data_1:
274
- # geom = shape(feature['geometry'])
275
- # height = feature['properties']['height']
276
- # reference_buildings.append((geom, height, feature))
277
-
278
- # # Initialize counters for statistics
279
- # count_0 = 0 # Buildings without height
280
- # count_1 = 0 # Buildings updated with height
281
- # count_2 = 0 # Buildings with no height data found
282
- # count_3 = 0 # New non-intersecting buildings added
283
-
284
- # # Process primary dataset and update heights where needed
285
- # updated_geojson_data_0 = []
286
- # for feature in geojson_data_0:
287
- # geom = shape(feature['geometry'])
288
- # height = feature['properties']['height']
289
- # if height == 0:
290
- # count_0 += 1
291
- # # Calculate weighted average height based on overlapping areas
292
- # overlapping_height_area = 0
293
- # overlapping_area = 0
294
- # for ref_geom, ref_height, _ in reference_buildings:
295
- # try:
296
- # if geom.intersects(ref_geom):
297
- # overlap_area = geom.intersection(ref_geom).area
298
- # overlapping_height_area += ref_height * overlap_area
299
- # overlapping_area += overlap_area
300
- # except GEOSException as e:
301
- # # Try to fix invalid geometries
302
- # try:
303
- # fixed_ref_geom = ref_geom.buffer(0)
304
- # if geom.intersects(fixed_ref_geom):
305
- # overlap_area = geom.intersection(ref_geom).area
306
- # overlapping_height_area += ref_height * overlap_area
307
- # overlapping_area += overlap_area
308
- # except Exception as fix_error:
309
- # print(f"Failed to fix polygon")
310
- # continue
311
-
312
- # # Update height if overlapping buildings found
313
- # if overlapping_height_area > 0:
314
- # count_1 += 1
315
- # new_height = overlapping_height_area / overlapping_area
316
- # feature['properties']['height'] = new_height
317
- # else:
318
- # count_2 += 1
319
- # feature['properties']['height'] = np.nan
320
-
321
- # updated_geojson_data_0.append(feature)
322
-
323
- # # Add non-intersecting buildings from reference dataset
324
- # for ref_geom, ref_height, ref_feature in reference_buildings:
325
- # has_intersection = False
326
- # try:
327
- # # Check if reference building intersects with any existing building
328
- # for existing_geom in existing_buildings:
329
- # if ref_geom.intersects(existing_geom):
330
- # has_intersection = True
331
- # break
332
-
333
- # # Add building if it doesn't intersect with any existing ones
334
- # if not has_intersection:
335
- # updated_geojson_data_0.append(ref_feature)
336
- # count_3 += 1
337
-
338
- # except GEOSException as e:
339
- # # Try to fix invalid geometries
340
- # try:
341
- # fixed_ref_geom = ref_geom.buffer(0)
342
- # for existing_geom in existing_buildings:
343
- # if fixed_ref_geom.intersects(existing_geom):
344
- # has_intersection = True
345
- # break
346
-
347
- # if not has_intersection:
348
- # updated_geojson_data_0.append(ref_feature)
349
- # count_3 += 1
350
- # except Exception as fix_error:
351
- # print(f"Failed to process non-intersecting building")
352
- # continue
353
-
354
- # # Print statistics about updates
355
- # if count_0 > 0:
356
- # print(f"{count_0} of the total {len(geojson_data_0)} building footprint from base source did not have height data.")
357
- # print(f"For {count_1} of these building footprints without height, values from complement source were assigned.")
358
- # print(f"{count_3} non-intersecting buildings from Microsoft Building Footprints were added to the output.")
359
-
360
- # return updated_geojson_data_0
361
-
362
- import numpy as np
363
- import geopandas as gpd
364
- import pandas as pd
365
- from shapely.geometry import shape
366
- from shapely.errors import GEOSException
367
-
368
- def geojson_to_gdf(geojson_data, id_col='id'):
369
- """
370
- Convert a list of GeoJSON-like dict features into a GeoDataFrame.
371
-
372
- This function takes a list of GeoJSON feature dictionaries (Fiona-like format)
373
- and converts them into a GeoDataFrame, handling geometry conversion and property
374
- extraction. It ensures each feature has a unique identifier.
375
-
376
- Args:
377
- geojson_data (List[Dict]): A list of feature dicts (Fiona-like)
378
- Each dict must have 'geometry' and 'properties' keys
379
- 'geometry' must be a valid GeoJSON geometry
380
- 'properties' can be empty but must be a dict if present
381
- id_col (str, optional): Name of property to use as an identifier
382
- Default is 'id'
383
- If not found in properties, a sequential ID will be created
384
- Must be a string that can be used as a column name
385
-
386
- Returns:
387
- gpd.GeoDataFrame: GeoDataFrame with geometry and property columns
388
- Will have 'geometry' column with Shapely geometries
389
- Will have columns for all properties found in features
390
- Will have id_col with unique identifiers
391
- Will be set to WGS84 (EPSG:4326) coordinate system
392
-
393
- Note:
394
- - Handles missing properties gracefully
395
- - Creates sequential IDs if id_col not found
396
- - Converts GeoJSON geometries to Shapely objects
397
- - Sets WGS84 as coordinate system
398
- - Preserves all properties as columns
399
- """
400
- # Build lists for geometry and properties
401
- geometries = []
402
- all_props = []
403
-
404
- for i, feature in enumerate(geojson_data):
405
- # Extract geometry and convert to Shapely object
406
- geom = feature.get('geometry')
407
- shapely_geom = shape(geom) if geom else None
408
-
409
- # Extract properties, ensuring they exist
410
- props = feature.get('properties', {})
411
-
412
- # If specified ID column is missing, create sequential ID
413
- if id_col not in props:
414
- props[id_col] = i # fallback ID
415
-
416
- # Capture geometry and all properties
417
- geometries.append(shapely_geom)
418
- all_props.append(props)
419
-
420
- # Create GeoDataFrame with geometries and properties
421
- gdf = gpd.GeoDataFrame(all_props, geometry=geometries, crs="EPSG:4326")
422
- return gdf
423
-
424
-
425
- def complement_building_heights_from_gdf(gdf_0, gdf_1,
426
- primary_id='id', ref_id='id'):
427
- """
428
- Use a vectorized approach with GeoPandas to:
429
- 1) Find intersections and compute weighted average heights
430
- 2) Update heights in the primary dataset
431
- 3) Add non-intersecting buildings from the reference dataset
432
-
433
- Args:
434
- gdf_0 (gpd.GeoDataFrame): Primary GeoDataFrame
435
- gdf_1 (gpd.GeoDataFrame): Reference GeoDataFrame
436
- primary_id (str): Name of the unique identifier in primary dataset's properties
437
- ref_id (str): Name of the unique identifier in reference dataset's properties
438
-
439
- Returns:
440
- gpd.GeoDataFrame: Updated GeoDataFrame (including new buildings).
441
- """
442
- # Make a copy of input GeoDataFrames to avoid modifying originals
443
- gdf_primary = gdf_0.copy()
444
- gdf_ref = gdf_1.copy()
445
-
446
- # Ensure both are in the same CRS, e.g. EPSG:4326 or some projected CRS
447
- # If needed, do something like:
448
- # gdf_primary = gdf_primary.to_crs("EPSG:xxxx")
449
- # gdf_ref = gdf_ref.to_crs("EPSG:xxxx")
450
-
451
- # Make sure height columns exist
452
- if 'height' not in gdf_primary.columns:
453
- gdf_primary['height'] = 0.0
454
- if 'height' not in gdf_ref.columns:
455
- gdf_ref['height'] = 0.0
456
-
457
- # ----------------------------------------------------------------
458
- # 1) Intersection to compute areas for overlapping buildings
459
- # ----------------------------------------------------------------
460
- # We'll rename columns to avoid collision after overlay
461
- gdf_primary = gdf_primary.rename(columns={'height': 'height_primary'})
462
- gdf_ref = gdf_ref.rename(columns={'height': 'height_ref'})
463
-
464
- # We perform an 'intersection' overlay to get the overlapping polygons
465
- intersect_gdf = gpd.overlay(gdf_primary, gdf_ref, how='intersection')
466
-
467
- # Compute intersection area
468
- intersect_gdf['intersect_area'] = intersect_gdf.area
469
- intersect_gdf['height_area'] = intersect_gdf['height_ref'] * intersect_gdf['intersect_area']
470
-
471
- # ----------------------------------------------------------------
472
- # 2) Aggregate to get weighted average height for each primary building
473
- # ----------------------------------------------------------------
474
- # We group by the primary building ID, summing up the area and the 'height_area'
475
- group_cols = {
476
- 'height_area': 'sum',
477
- 'intersect_area': 'sum'
478
- }
479
- grouped = intersect_gdf.groupby(f'{primary_id}_1').agg(group_cols)
480
-
481
- # Weighted average
482
- grouped['weighted_height'] = grouped['height_area'] / grouped['intersect_area']
483
-
484
- # ----------------------------------------------------------------
485
- # 3) Merge aggregated results back to the primary GDF
486
- # ----------------------------------------------------------------
487
- # After merging, the primary GDF will have a column 'weighted_height'
488
- gdf_primary = gdf_primary.merge(grouped['weighted_height'],
489
- left_on=primary_id,
490
- right_index=True,
491
- how='left')
492
-
493
- # Where primary had zero or missing height, we assign the new weighted height
494
- zero_or_nan_mask = (gdf_primary['height_primary'] == 0) | (gdf_primary['height_primary'].isna())
495
-
496
- # Only update heights where we have valid weighted heights
497
- valid_weighted_height_mask = zero_or_nan_mask & gdf_primary['weighted_height'].notna()
498
- gdf_primary.loc[valid_weighted_height_mask, 'height_primary'] = gdf_primary.loc[valid_weighted_height_mask, 'weighted_height']
499
- gdf_primary['height_primary'] = gdf_primary['height_primary'].fillna(np.nan)
500
-
501
- # ----------------------------------------------------------------
502
- # 4) Identify reference buildings that do not intersect any primary building
503
- # ----------------------------------------------------------------
504
- # Another overlay or spatial join can do this:
505
- # Option A: use 'difference' on reference to get non-overlapping parts, but that can chop polygons.
506
- # Option B: check building-level intersection. We'll do a bounding test with sjoin.
507
-
508
- # For building-level intersection, do a left join of ref onto primary.
509
- # Then we'll identify which reference IDs are missing from the intersection result.
510
- sjoin_gdf = gpd.sjoin(gdf_ref, gdf_primary, how='left', predicate='intersects')
511
-
512
- # Find reference buildings that don't intersect with any primary building
513
- non_intersect_mask = sjoin_gdf[f'{primary_id}_right'].isna()
514
- non_intersect_ids = sjoin_gdf[non_intersect_mask][f'{ref_id}_left'].unique()
515
-
516
- # Extract them from the original reference GDF
517
- gdf_ref_non_intersect = gdf_ref[gdf_ref[ref_id].isin(non_intersect_ids)]
518
-
519
- # We'll rename columns back to 'height' to be consistent
520
- gdf_ref_non_intersect = gdf_ref_non_intersect.rename(columns={'height_ref': 'height'})
521
-
522
- # Also rename any other properties you prefer. For clarity, keep an ID so you know they came from reference.
523
-
524
- # ----------------------------------------------------------------
525
- # 5) Combine the updated primary GDF with the new reference buildings
526
- # ----------------------------------------------------------------
527
- # First, rename columns in updated primary GDF
528
- gdf_primary = gdf_primary.rename(columns={'height_primary': 'height'})
529
- # Drop the 'weighted_height' column to clean up
530
- if 'weighted_height' in gdf_primary.columns:
531
- gdf_primary.drop(columns='weighted_height', inplace=True)
532
-
533
- # Concatenate
534
- final_gdf = pd.concat([gdf_primary, gdf_ref_non_intersect], ignore_index=True)
535
-
536
- # Calculate statistics
537
- count_total = len(gdf_primary)
538
- count_0 = len(gdf_primary[zero_or_nan_mask])
539
- count_1 = len(gdf_primary[valid_weighted_height_mask])
540
- count_2 = count_0 - count_1
541
- count_3 = len(gdf_ref_non_intersect)
542
- count_4 = count_3
543
- height_mask = gdf_ref_non_intersect['height'].notna() & (gdf_ref_non_intersect['height'] > 0)
544
- count_5 = len(gdf_ref_non_intersect[height_mask])
545
- count_6 = count_4 - count_5
546
- final_height_mask = final_gdf['height'].notna() & (final_gdf['height'] > 0)
547
- count_7 = len(final_gdf[final_height_mask])
548
- count_8 = len(final_gdf)
549
-
550
- # Print statistics if there were buildings without height data
551
- if count_0 > 0:
552
- print(f"{count_0} of the total {count_total} building footprints from base data source did not have height data.")
553
- print(f"For {count_1} of these building footprints without height, values from complementary data were assigned.")
554
- print(f"For the rest {count_2}, no data exists in complementary data.")
555
- print(f"Footprints of {count_3} buildings were added from the complementary source.")
556
- print(f"Of these {count_4} additional building footprints, {count_5} had height data while {count_6} had no height data.")
557
- print(f"In total, {count_7} buildings had height data out of {count_8} total building footprints.")
558
-
559
- return final_gdf
560
-
561
-
562
- def gdf_to_geojson_dicts(gdf, id_col='id'):
563
- """
564
- Convert a GeoDataFrame to a list of dicts similar to GeoJSON features.
565
-
566
- This function converts a GeoDataFrame into a list of dictionary objects that
567
- follow the GeoJSON Feature format. Each feature will have geometry and properties,
568
- with an optional ID field handled separately from other properties.
569
-
570
- Args:
571
- gdf (gpd.GeoDataFrame): GeoDataFrame to convert
572
- Must have 'geometry' column with Shapely geometries
573
- All non-geometry columns will become properties
574
- Can optionally have id_col for unique identifiers
575
- id_col (str, optional): Name of column to use as feature ID
576
- Default is 'id'
577
- If present, will be excluded from properties
578
- If not present, features will not have explicit IDs
579
-
580
- Returns:
581
- list: List of GeoJSON-like feature dictionaries
582
- Each dict will have:
583
- - type: Always "Feature"
584
- - geometry: GeoJSON geometry from Shapely object
585
- - properties: All columns except geometry and ID
586
-
587
- Note:
588
- - Converts Shapely geometries to GeoJSON format
589
- - Preserves all non-geometry columns as properties
590
- - Handles missing ID column gracefully
591
- - Maintains original property types
592
- - Excludes ID from properties if specified
593
- """
594
- # Convert GeoDataFrame to dictionary records for easier processing
595
- records = gdf.to_dict(orient='records')
596
- features = []
597
-
598
- for rec in records:
599
- # Extract and convert geometry to GeoJSON format using __geo_interface__
600
- geom = rec.pop('geometry', None)
601
- if geom is not None:
602
- geom = geom.__geo_interface__
603
-
604
- # Extract ID if present and create properties dict excluding ID
605
- feature_id = rec.get(id_col, None)
606
- props = {k: v for k, v in rec.items() if k != id_col}
607
-
608
- # Create GeoJSON Feature object with type, properties, and geometry
609
- feature = {
610
- 'type': 'Feature',
611
- 'properties': props,
612
- 'geometry': geom
613
- }
614
- features.append(feature)
615
-
616
- return features
617
-
618
- def load_gdf_from_multiple_gz(file_paths):
619
- """
620
- Load GeoJSON features from multiple gzipped files into a single GeoDataFrame.
621
-
622
- This function reads multiple gzipped GeoJSON files, where each line in each file
623
- represents a single GeoJSON feature. It combines all features into a single
624
- GeoDataFrame, ensuring height properties are properly handled and coordinates
625
- are in WGS84.
626
-
627
- Args:
628
- file_paths (list): List of paths to gzipped GeoJSON files
629
- Each file should contain one GeoJSON feature per line
630
- Files should be readable as UTF-8 text
631
- Features should be in WGS84 coordinate system
632
-
633
- Returns:
634
- gpd.GeoDataFrame: Combined GeoDataFrame containing all features
635
- Will have 'geometry' column with building footprints
636
- Will have 'height' column (0 for missing values)
637
- Will be set to WGS84 (EPSG:4326) coordinate system
638
-
639
- Note:
640
- - Skips lines that cannot be parsed as valid JSON
641
- - Sets missing height values to 0
642
- - Assumes input coordinates are in WGS84
643
- - Memory usage scales with total number of features
644
- - Reports JSON parsing errors but continues processing
645
- """
646
- # Initialize list to store all GeoJSON features
647
- geojson_objects = []
648
-
649
- # Process each gzipped file
650
- for gz_file_path in file_paths:
651
- # Read each gzipped file line by line as UTF-8 text
652
- with gzip.open(gz_file_path, 'rt', encoding='utf-8') as file:
653
- for line in file:
654
- try:
655
- # Parse each line as a GeoJSON feature
656
- data = json.loads(line)
657
-
658
- # Ensure height property exists and has valid value
659
- if 'properties' in data and 'height' in data['properties']:
660
- if data['properties']['height'] is None:
661
- data['properties']['height'] = 0
662
- else:
663
- # Create properties dict if missing
664
- if 'properties' not in data:
665
- data['properties'] = {}
666
- # Set default height value
667
- data['properties']['height'] = 0
668
-
669
- geojson_objects.append(data)
670
- except json.JSONDecodeError as e:
671
- print(f"Skipping line in {gz_file_path} due to JSONDecodeError: {e}")
672
-
673
- # Convert list of GeoJSON features to GeoDataFrame
674
- gdf = gpd.GeoDataFrame.from_features(geojson_objects)
675
-
676
- # Set coordinate reference system to WGS84
677
- gdf.set_crs(epsg=4326, inplace=True)
678
-
679
- return gdf
680
-
681
- def filter_buildings(geojson_data, plotting_box):
682
- """
683
- Filter building features that intersect with a given bounding box.
684
-
685
- This function filters a list of GeoJSON building features to keep only those
686
- that intersect with a specified bounding box. It performs geometry validation
687
- and handles invalid geometries gracefully.
688
-
689
- Args:
690
- geojson_data (list): List of GeoJSON features representing buildings
691
- Each feature must have valid 'geometry' property
692
- Coordinates must be in same CRS as plotting_box
693
- Invalid geometries will be skipped with warning
694
- plotting_box (Polygon): Shapely polygon defining the bounding box
695
- Must be a valid Shapely Polygon object
696
- Must be in same coordinate system as geojson_data
697
- Used for spatial intersection testing
698
-
699
- Returns:
700
- list: Filtered list of GeoJSON features that intersect with the bounding box
701
- Features maintain their original structure
702
- Invalid features are excluded
703
- Order of features is preserved
704
-
705
- Note:
706
- - Validates polygon coordinates before processing
707
- - Skips features with invalid geometries
708
- - Reports validation and geometry errors
709
- - No coordinate system transformation is performed
710
- - Memory efficient as it creates new list only for valid features
711
- """
712
- # Initialize list for valid intersecting features
713
- filtered_features = []
714
-
715
- # Process each feature in the input data
716
- for feature in geojson_data:
717
- # Validate polygon coordinates before processing
718
- if not validate_polygon_coordinates(feature['geometry']):
719
- print("Skipping feature with invalid geometry")
720
- print(feature['geometry'])
721
- continue
722
-
723
- try:
724
- # Convert GeoJSON geometry to Shapely geometry for spatial operations
725
- geom = shape(feature['geometry'])
726
-
727
- # Skip invalid geometries that can't be fixed
728
- if not geom.is_valid:
729
- print("Skipping invalid geometry")
730
- print(geom)
731
- continue
732
-
733
- # Keep features that intersect with bounding box
734
- if plotting_box.intersects(geom):
735
- filtered_features.append(feature)
736
-
737
- except ShapelyError as e:
738
- # Log geometry errors but continue processing
739
- print(f"Skipping feature due to geometry error: {e}")
740
-
741
- return filtered_features
742
-
743
- def extract_building_heights_from_geotiff(geotiff_path, gdf):
744
- """
745
- Extract building heights from a GeoTIFF raster for building footprints in a GeoDataFrame.
746
-
747
- This function processes building footprints to extract height information from a GeoTIFF
748
- raster file. It handles coordinate transformation between WGS84 (EPSG:4326) and the raster's
749
- CRS, and calculates average heights for each building footprint.
750
-
751
- Args:
752
- geotiff_path (str): Path to the GeoTIFF height raster file containing elevation data
753
- gdf (gpd.GeoDataFrame): GeoDataFrame containing building footprints with geometry column
754
- The GeoDataFrame should be in WGS84 (EPSG:4326) coordinate system
755
-
756
- Returns:
757
- gpd.GeoDataFrame: Updated GeoDataFrame with extracted heights in the 'height' column
758
- - Buildings with valid height data will have their height values updated
759
- - Buildings with no valid height data will have NaN values
760
- - Original buildings with existing valid heights are preserved
761
-
762
- Statistics Reported:
763
- - Total number of buildings without height data
764
- - Number of buildings successfully updated with height data
765
- - Number of buildings where no height data could be found
766
-
767
- Note:
768
- - The function only processes Polygon geometries (not MultiPolygons or other types)
769
- - Buildings are considered to need height processing if they have no height or height <= 0
770
- - Heights are calculated as the mean of all valid raster values within the building footprint
771
- """
772
- # Make a copy to avoid modifying the input
773
- gdf = gdf.copy()
774
-
775
- # Initialize counters for statistics
776
- count_0 = 0 # Buildings without height
777
- count_1 = 0 # Buildings updated with height
778
- count_2 = 0 # Buildings with no height data found
779
-
780
- # Open GeoTIFF and process buildings
781
- with rasterio.open(geotiff_path) as src:
782
- # Create coordinate transformer from WGS84 to raster CRS for geometry transformation
783
- transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
784
-
785
- # Filter buildings that need height processing:
786
- # - Must be Polygon type (not MultiPolygon)
787
- # - Either has no height or height <= 0
788
- mask_condition = (gdf.geometry.geom_type == 'Polygon') & ((gdf.get('height', 0) <= 0) | gdf.get('height').isna())
789
- buildings_to_process = gdf[mask_condition]
790
- count_0 = len(buildings_to_process)
791
-
792
- for idx, row in buildings_to_process.iterrows():
793
- # Transform building polygon coordinates from WGS84 to raster CRS
794
- coords = list(row.geometry.exterior.coords)
795
- transformed_coords = [transformer.transform(lon, lat) for lon, lat in coords]
796
- polygon = shape({"type": "Polygon", "coordinates": [transformed_coords]})
797
-
798
- try:
799
- # Extract height values from raster within the building polygon
800
- # all_touched=True ensures we get all pixels that the polygon touches
801
- masked_data, _ = rasterio.mask.mask(src, [polygon], crop=True, all_touched=True)
802
-
803
- # Filter out nodata values from the raster
804
- heights = masked_data[0][masked_data[0] != src.nodata]
805
-
806
- # Calculate average height if valid samples exist
807
- if len(heights) > 0:
808
- count_1 += 1
809
- gdf.at[idx, 'height'] = float(np.mean(heights))
810
- else:
811
- count_2 += 1
812
- gdf.at[idx, 'height'] = np.nan
813
- except ValueError as e:
814
- print(f"Error processing building at index {idx}. Error: {str(e)}")
815
- gdf.at[idx, 'height'] = None
816
-
817
- # Print statistics about height updates
818
- if count_0 > 0:
819
- print(f"{count_0} of the total {len(gdf)} building footprint from OSM did not have height data.")
820
- print(f"For {count_1} of these building footprints without height, values from complementary data were assigned.")
821
- print(f"For {count_2} of these building footprints without height, no data exist in complementary data.")
822
-
823
- return gdf
824
-
825
- def get_gdf_from_gpkg(gpkg_path, rectangle_vertices):
826
- """
827
- Read a GeoPackage file and convert it to a GeoDataFrame with consistent CRS.
828
-
829
- This function reads a GeoPackage file containing building footprints and ensures
830
- the data is properly formatted with WGS84 coordinate system and unique identifiers.
831
- It handles CRS conversion if needed and adds sequential IDs.
832
-
833
- Args:
834
- gpkg_path (str): Path to the GeoPackage file
835
- File must exist and be readable
836
- Must contain valid building footprint geometries
837
- Any coordinate system is accepted
838
- rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
839
- Must be in WGS84 (EPSG:4326) coordinate system
840
- Used for spatial filtering (not implemented in this function)
841
-
842
- Returns:
843
- gpd.GeoDataFrame: GeoDataFrame containing building footprints
844
- Will have 'geometry' column with building geometries
845
- Will have 'id' column with sequential integers
846
- Will be in WGS84 (EPSG:4326) coordinate system
847
-
848
- Note:
849
- - Prints informative message when opening file
850
- - Sets CRS to WGS84 if not specified
851
- - Transforms to WGS84 if different CRS
852
- - Adds sequential IDs starting from 0
853
- - rectangle_vertices parameter is currently unused
854
- """
855
- # Open and read the GPKG file
856
- print(f"Opening GPKG file: {gpkg_path}")
857
- gdf = gpd.read_file(gpkg_path)
858
-
859
- # Only set CRS if not already set
860
- if gdf.crs is None:
861
- gdf.set_crs(epsg=4326, inplace=True)
862
- # Transform to WGS84 if needed
863
- elif gdf.crs != "EPSG:4326":
864
- gdf = gdf.to_crs(epsg=4326)
865
-
866
- # Replace id column with sequential index numbers
867
- gdf['id'] = gdf.index
868
-
869
- return gdf
870
-
871
- def swap_coordinates(features):
872
- """
873
- Swap coordinate ordering in GeoJSON features from (lat, lon) to (lon, lat).
874
-
875
- This function modifies GeoJSON features in-place to swap the order of coordinates
876
- from (latitude, longitude) to (longitude, latitude). It handles both Polygon and
877
- MultiPolygon geometries, maintaining their structure while swapping coordinates.
878
-
879
- Args:
880
- features (list): List of GeoJSON features to process
881
- Features must have 'geometry' property
882
- Supported geometry types: 'Polygon', 'MultiPolygon'
883
- Coordinates must be in (lat, lon) order initially
884
-
885
- Returns:
886
- None: Features are modified in-place
887
-
888
- Note:
889
- - Modifies features directly (no copy created)
890
- - Handles both Polygon and MultiPolygon geometries
891
- - For Polygons: processes single coordinate ring
892
- - For MultiPolygons: processes multiple coordinate rings
893
- - Assumes input coordinates are in (lat, lon) order
894
- - Resulting coordinates will be in (lon, lat) order
895
- """
896
- # Process each feature based on geometry type
897
- for feature in features:
898
- if feature['geometry']['type'] == 'Polygon':
899
- # Swap coordinates for simple polygons
900
- # Each polygon is a list of rings (exterior and optional holes)
901
- new_coords = [[[lon, lat] for lat, lon in polygon] for polygon in feature['geometry']['coordinates']]
902
- feature['geometry']['coordinates'] = new_coords
903
- elif feature['geometry']['type'] == 'MultiPolygon':
904
- # Swap coordinates for multi-polygons (polygons with holes)
905
- # Each multipolygon is a list of polygons, each with its own rings
906
- new_coords = [[[[lon, lat] for lat, lon in polygon] for polygon in multipolygon] for multipolygon in feature['geometry']['coordinates']]
907
- feature['geometry']['coordinates'] = new_coords
908
-
909
- def save_geojson(features, save_path):
910
- """
911
- Save GeoJSON features to a file with coordinate swapping and pretty printing.
912
-
913
- This function takes a list of GeoJSON features, swaps their coordinate ordering
914
- if needed, wraps them in a FeatureCollection, and saves to a file with proper
915
- JSON formatting. It creates a deep copy to avoid modifying the original data.
916
-
917
- Args:
918
- features (list): List of GeoJSON features to save
919
- Each feature should have valid GeoJSON structure
920
- Features can be Polygon or MultiPolygon type
921
- Coordinates will be swapped if in (lat, lon) order
922
- save_path (str): Path where the GeoJSON file should be saved
923
- Will overwrite existing file if present
924
- Directory must exist and be writable
925
- File will be created with UTF-8 encoding
926
-
927
- Returns:
928
- None
929
-
930
- Note:
931
- - Creates deep copy to preserve original feature data
932
- - Swaps coordinates from (lat, lon) to (lon, lat) order
933
- - Wraps features in a FeatureCollection object
934
- - Uses pretty printing with 2-space indentation
935
- - Handles both Polygon and MultiPolygon geometries
936
- """
937
- # Create deep copy to avoid modifying original data
938
- geojson_features = copy.deepcopy(features)
939
-
940
- # Swap coordinate ordering from (lat, lon) to (lon, lat)
941
- swap_coordinates(geojson_features)
942
-
943
- # Create FeatureCollection structure
944
- geojson = {
945
- "type": "FeatureCollection",
946
- "features": geojson_features
947
- }
948
-
949
- # Write to file with pretty printing (2-space indentation)
950
- with open(save_path, 'w') as f:
951
- json.dump(geojson, f, indent=2)
952
-
953
- def find_building_containing_point(building_gdf, target_point):
954
- """
955
- Find building IDs that contain a given point in their footprint.
956
-
957
- This function identifies all buildings in a GeoDataFrame whose footprint contains
958
- a specified geographic point. Only Polygon geometries are considered, and the point
959
- must be fully contained within the building footprint (not just touching).
960
-
961
- Args:
962
- building_gdf (GeoDataFrame): GeoDataFrame containing building geometries and IDs
963
- Must have 'geometry' column with Polygon geometries
964
- Must have 'id' column or index will be used as fallback
965
- Geometries must be in same CRS as target_point coordinates
966
- target_point (tuple): Tuple of (lon, lat) coordinates to check
967
- Must be in same coordinate system as building_gdf geometries
968
- Order must be (longitude, latitude) if using WGS84
969
-
970
- Returns:
971
- list: List of building IDs containing the target point
972
- Empty list if no buildings contain the point
973
- Multiple IDs possible if buildings overlap
974
- IDs are in arbitrary order
975
-
976
- Note:
977
- - Only processes Polygon geometries (skips MultiPolygons and others)
978
- - Uses Shapely's contains() method which requires point to be fully inside polygon
979
- - No spatial indexing is used, performs linear search through all buildings
980
- """
981
- # Create Shapely point from input coordinates
982
- point = Point(target_point[0], target_point[1])
983
-
984
- # Initialize list to store matching building IDs
985
- id_list = []
986
-
987
- # Check each building in the GeoDataFrame
988
- for idx, row in building_gdf.iterrows():
989
- # Skip any geometry that is not a simple Polygon
990
- if not isinstance(row.geometry, Polygon):
991
- continue
992
-
993
- # Check if point is fully contained within building footprint
994
- if row.geometry.contains(point):
995
- # Use specified ID column or None if not found
996
- id_list.append(row.get('id', None))
997
-
998
- return id_list
999
-
1000
- def get_buildings_in_drawn_polygon(building_gdf, drawn_polygons,
1001
- operation='within'):
1002
- """
1003
- Find buildings that intersect with or are contained within user-drawn polygons.
1004
-
1005
- This function identifies buildings from a GeoDataFrame that have a specified spatial
1006
- relationship with one or more polygons defined by user-drawn vertices. The relationship can be
1007
- either intersection (building overlaps polygon) or containment (building fully within
1008
- polygon).
1009
-
1010
- Args:
1011
- building_gdf (GeoDataFrame): GeoDataFrame containing building footprints
1012
- Must have 'geometry' column with Polygon geometries
1013
- Must have 'id' column or index will be used as fallback
1014
- Geometries must be in same CRS as drawn_polygons vertices
1015
- drawn_polygons (list): List of dictionaries containing polygon data
1016
- Each dictionary must have:
1017
- - 'id': Unique polygon identifier (int)
1018
- - 'vertices': List of (lon, lat) tuples defining polygon vertices
1019
- - 'color': Color string (optional, for reference)
1020
- Must be in same coordinate system as building_gdf geometries
1021
- Must form valid polygons (3+ vertices, first != last)
1022
- Order must be (longitude, latitude) if using WGS84
1023
- operation (str, optional): Type of spatial relationship to check
1024
- 'within': buildings must be fully contained in drawn polygon (default)
1025
- 'intersect': buildings must overlap with drawn polygon
1026
-
1027
- Returns:
1028
- list: List of building IDs that satisfy the spatial relationship with any of the drawn polygons
1029
- Empty list if no buildings meet the criteria
1030
- IDs are returned in order of processing
1031
- May contain None values if buildings lack IDs
1032
- Duplicate building IDs are removed (a building matching multiple polygons appears only once)
1033
-
1034
- Note:
1035
- - Only processes Polygon geometries (skips MultiPolygons and others)
1036
- - No spatial indexing is used, performs linear search through all buildings
1037
- - Invalid operation parameter will raise ValueError
1038
- - Does not validate polygon closure (first vertex = last vertex)
1039
- - Buildings matching any of the drawn polygons are included in the result
1040
- """
1041
- if not drawn_polygons:
1042
- return []
1043
-
1044
- # Initialize set to store matching building IDs (using set to avoid duplicates)
1045
- included_building_ids = set()
1046
-
1047
- # Process each polygon
1048
- for polygon_data in drawn_polygons:
1049
- vertices = polygon_data['vertices']
1050
-
1051
- # Create Shapely Polygon from drawn vertices
1052
- drawn_polygon_shapely = Polygon(vertices)
1053
-
1054
- # Check each building in the GeoDataFrame
1055
- for idx, row in building_gdf.iterrows():
1056
- # Skip any geometry that is not a simple Polygon
1057
- if not isinstance(row.geometry, Polygon):
1058
- continue
1059
-
1060
- # Check spatial relationship based on specified operation
1061
- if operation == 'intersect':
1062
- if row.geometry.intersects(drawn_polygon_shapely):
1063
- included_building_ids.add(row.get('id', None))
1064
- elif operation == 'within':
1065
- if row.geometry.within(drawn_polygon_shapely):
1066
- included_building_ids.add(row.get('id', None))
1067
- else:
1068
- raise ValueError("operation must be 'intersect' or 'within'")
1069
-
1070
- # Convert set back to list and return
1071
- return list(included_building_ids)
1072
-
1073
- def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
1074
- """
1075
- Process building footprints to merge overlapping buildings based on area overlap ratio.
1076
-
1077
- This function identifies and merges building footprints that significantly overlap with each other.
1078
- Buildings are processed in order of decreasing area, and smaller buildings that overlap significantly
1079
- with larger ones are assigned the ID of the larger building, effectively merging them.
1080
-
1081
- Args:
1082
- filtered_gdf (geopandas.GeoDataFrame): GeoDataFrame containing building footprints
1083
- Must have 'geometry' column with building polygons
1084
- If CRS is set, areas will be calculated in Web Mercator projection
1085
- overlap_threshold (float, optional): Threshold for overlap ratio (0.0-1.0) to merge buildings
1086
- Default is 0.5 (50% overlap)
1087
- Higher values require more overlap for merging
1088
- Lower values will result in more aggressive merging
1089
-
1090
- Returns:
1091
- geopandas.GeoDataFrame: Processed GeoDataFrame with updated IDs
1092
- Overlapping buildings will share the same ID
1093
- Original geometries are preserved, only IDs are updated
1094
- All other columns remain unchanged
1095
-
1096
- Note:
1097
- - Uses R-tree spatial indexing for efficient overlap detection
1098
- - Projects to Web Mercator (EPSG:3857) for accurate area calculation if CRS is set
1099
- - Handles invalid geometries by attempting to fix them with buffer(0)
1100
- - Processes buildings in order of decreasing area (largest first)
1101
- """
1102
- # Make a copy to avoid modifying the original
1103
- gdf = filtered_gdf.copy()
1104
-
1105
- # Ensure 'id' column exists, use index if not present
1106
- if 'id' not in gdf.columns:
1107
- gdf['id'] = gdf.index
1108
-
1109
- # Project to Web Mercator for accurate area calculation if CRS is set
1110
- if gdf.crs is None:
1111
- # Work with original geometries if no CRS is set
1112
- gdf_projected = gdf.copy()
1113
- else:
1114
- # Store original CRS to convert back later
1115
- original_crs = gdf.crs
1116
- # Project to Web Mercator for accurate area calculation
1117
- gdf_projected = gdf.to_crs("EPSG:3857")
1118
-
1119
- # Calculate areas and sort by decreasing area for processing largest buildings first
1120
- gdf_projected['area'] = gdf_projected.geometry.area
1121
- gdf_projected = gdf_projected.sort_values(by='area', ascending=False)
1122
- gdf_projected = gdf_projected.reset_index(drop=True)
1123
-
1124
- # Create spatial index for efficient querying of potential overlaps
1125
- spatial_idx = index.Index()
1126
- for i, geom in enumerate(gdf_projected.geometry):
1127
- if geom.is_valid:
1128
- spatial_idx.insert(i, geom.bounds)
1129
- else:
1130
- # Fix invalid geometries using buffer(0) technique
1131
- fixed_geom = geom.buffer(0)
1132
- if fixed_geom.is_valid:
1133
- spatial_idx.insert(i, fixed_geom.bounds)
1134
-
1135
- # Track ID replacements to avoid repeated processing
1136
- id_mapping = {}
1137
-
1138
- # Process each building (skip the largest one as it's our reference)
1139
- for i in range(1, len(gdf_projected)):
1140
- current_poly = gdf_projected.iloc[i].geometry
1141
- current_area = gdf_projected.iloc[i].area
1142
- current_id = gdf_projected.iloc[i]['id']
1143
-
1144
- # Skip if already mapped to another ID
1145
- if current_id in id_mapping:
1146
- continue
1147
-
1148
- # Ensure geometry is valid for processing
1149
- if not current_poly.is_valid:
1150
- current_poly = current_poly.buffer(0)
1151
- if not current_poly.is_valid:
1152
- continue
1153
-
1154
- # Find potential overlaps with larger polygons using spatial index
1155
- potential_overlaps = [j for j in spatial_idx.intersection(current_poly.bounds) if j < i]
1156
-
1157
- for j in potential_overlaps:
1158
- larger_poly = gdf_projected.iloc[j].geometry
1159
- larger_id = gdf_projected.iloc[j]['id']
1160
-
1161
- # Follow ID mapping chain to get final ID
1162
- if larger_id in id_mapping:
1163
- larger_id = id_mapping[larger_id]
1164
-
1165
- # Ensure geometry is valid for intersection test
1166
- if not larger_poly.is_valid:
1167
- larger_poly = larger_poly.buffer(0)
1168
- if not larger_poly.is_valid:
1169
- continue
1170
-
1171
- try:
1172
- # Calculate overlap ratio relative to current building's area
1173
- if current_poly.intersects(larger_poly):
1174
- overlap = current_poly.intersection(larger_poly)
1175
- overlap_ratio = overlap.area / current_area
1176
-
1177
- # Merge buildings if overlap exceeds threshold
1178
- if overlap_ratio > overlap_threshold:
1179
- id_mapping[current_id] = larger_id
1180
- gdf_projected.at[i, 'id'] = larger_id
1181
- break # Stop at first significant overlap
1182
- except (GEOSException, ValueError) as e:
1183
- # Skip problematic geometries
1184
- continue
1185
-
1186
- # Propagate ID changes through the original DataFrame
1187
- for i, row in filtered_gdf.iterrows():
1188
- orig_id = row.get('id')
1189
- if orig_id in id_mapping:
1190
- filtered_gdf.at[i, 'id'] = id_mapping[orig_id]
1191
-
1192
- return filtered_gdf
1193
-
1194
- def merge_gdfs_with_id_conflict_resolution(gdf_1, gdf_2, id_columns=['id', 'building_id']):
1195
- """
1196
- Merge two GeoDataFrames while resolving ID conflicts by modifying IDs in the second GeoDataFrame.
1197
-
1198
- This function merges two GeoDataFrames containing building footprints, ensuring that
1199
- when buildings from both datasets have the same ID or building_id, the IDs in the
1200
- second GeoDataFrame are modified to maintain uniqueness across the merged dataset.
1201
-
1202
- Args:
1203
- gdf_1 (gpd.GeoDataFrame): Primary GeoDataFrame containing building footprints
1204
- Must have 'geometry' column with building polygons
1205
- Must have 'id' and 'building_id' columns (or specified id_columns)
1206
- Will remain unchanged during merging
1207
- gdf_2 (gpd.GeoDataFrame): Secondary GeoDataFrame containing building footprints
1208
- Must have 'geometry' column with building polygons
1209
- Must have 'id' and 'building_id' columns (or specified id_columns)
1210
- IDs will be modified if conflicts exist with gdf_1
1211
- id_columns (list, optional): List of column names to check for ID conflicts
1212
- Default is ['id', 'building_id']
1213
- All specified columns must exist in both GeoDataFrames
1214
-
1215
- Returns:
1216
- gpd.GeoDataFrame: Merged GeoDataFrame with resolved ID conflicts
1217
- Contains all buildings from both input GeoDataFrames
1218
- All ID columns are unique across the entire dataset
1219
- Original geometries and other properties are preserved
1220
- Missing columns are filled with None values
1221
-
1222
- Note:
1223
- - Uses the maximum ID values from gdf_1 as the starting point for new IDs in gdf_2
1224
- - Modifies all specified ID columns in gdf_2 to maintain consistency
1225
- - Preserves all other columns and data from both GeoDataFrames
1226
- - Assumes both GeoDataFrames have the same coordinate reference system
1227
- - Handles missing ID columns gracefully by skipping them
1228
- - Sets missing columns to None instead of NaN for better compatibility
1229
- """
1230
- # Make copies to avoid modifying original GeoDataFrames
1231
- gdf_primary = gdf_1.copy()
1232
- gdf_secondary = gdf_2.copy()
1233
-
1234
- # Validate that required ID columns exist in both GeoDataFrames
1235
- missing_columns = []
1236
- for col in id_columns:
1237
- if col not in gdf_primary.columns:
1238
- missing_columns.append(f"'{col}' missing from gdf_1")
1239
- if col not in gdf_secondary.columns:
1240
- missing_columns.append(f"'{col}' missing from gdf_2")
1241
-
1242
- if missing_columns:
1243
- print(f"Warning: Missing ID columns: {', '.join(missing_columns)}")
1244
- # Remove missing columns from the list to process
1245
- id_columns = [col for col in id_columns
1246
- if col in gdf_primary.columns and col in gdf_secondary.columns]
1247
-
1248
- if not id_columns:
1249
- print("Warning: No valid ID columns found. Merging without ID conflict resolution.")
1250
- # Handle missing columns before concatenation
1251
- merged_gdf = _merge_gdfs_with_missing_columns(gdf_primary, gdf_secondary)
1252
- return merged_gdf
1253
-
1254
- # Calculate the maximum ID values from the primary GeoDataFrame for each ID column
1255
- max_ids = {}
1256
- for col in id_columns:
1257
- if gdf_primary[col].dtype in ['int64', 'int32', 'float64', 'float32']:
1258
- max_ids[col] = gdf_primary[col].max()
1259
- else:
1260
- # For non-numeric IDs, we'll use the length of the primary DataFrame
1261
- max_ids[col] = len(gdf_primary)
1262
-
1263
- # Create a mapping for new IDs in the secondary GeoDataFrame
1264
- id_mapping = {}
1265
- next_ids = {col: max_ids[col] + 1 for col in id_columns}
1266
-
1267
- # Process each row in the secondary GeoDataFrame
1268
- for idx, row in gdf_secondary.iterrows():
1269
- needs_new_ids = False
1270
-
1271
- # Check if any ID column conflicts with the primary GeoDataFrame
1272
- for col in id_columns:
1273
- current_id = row[col]
1274
-
1275
- # Check if this ID exists in the primary GeoDataFrame
1276
- if current_id in gdf_primary[col].values:
1277
- needs_new_ids = True
1278
- break
1279
-
1280
- # If conflicts found, assign new IDs
1281
- if needs_new_ids:
1282
- for col in id_columns:
1283
- new_id = next_ids[col]
1284
- gdf_secondary.at[idx, col] = new_id
1285
- next_ids[col] += 1
1286
-
1287
- # Handle missing columns before merging
1288
- merged_gdf = _merge_gdfs_with_missing_columns(gdf_primary, gdf_secondary)
1289
-
1290
- # Print statistics about the merge
1291
- total_buildings = len(merged_gdf)
1292
- primary_buildings = len(gdf_primary)
1293
- secondary_buildings = len(gdf_secondary)
1294
- modified_buildings = sum(1 for idx, row in gdf_secondary.iterrows()
1295
- if any(row[col] != gdf_2.iloc[idx][col] for col in id_columns))
1296
-
1297
- print(f"Merged {primary_buildings} buildings from primary dataset with {secondary_buildings} buildings from secondary dataset.")
1298
- print(f"Total buildings in merged dataset: {total_buildings}")
1299
- if modified_buildings > 0:
1300
- print(f"Modified IDs for {modified_buildings} buildings in secondary dataset to resolve conflicts.")
1301
-
1302
- return merged_gdf
1303
-
1304
-
1305
- def _merge_gdfs_with_missing_columns(gdf_1, gdf_2):
1306
- """
1307
- Helper function to merge two GeoDataFrames while handling missing columns.
1308
-
1309
- This function ensures that when one GeoDataFrame has columns that the other doesn't,
1310
- those missing values are filled with None instead of NaN.
1311
-
1312
- Args:
1313
- gdf_1 (gpd.GeoDataFrame): First GeoDataFrame
1314
- gdf_2 (gpd.GeoDataFrame): Second GeoDataFrame
1315
-
1316
- Returns:
1317
- gpd.GeoDataFrame: Merged GeoDataFrame with all columns from both inputs
1318
- """
1319
- # Find columns that exist in one GeoDataFrame but not the other
1320
- columns_1 = set(gdf_1.columns)
1321
- columns_2 = set(gdf_2.columns)
1322
-
1323
- # Columns only in gdf_1
1324
- only_in_1 = columns_1 - columns_2
1325
- # Columns only in gdf_2
1326
- only_in_2 = columns_2 - columns_1
1327
-
1328
- # Add missing columns to gdf_1 with None values
1329
- for col in only_in_2:
1330
- gdf_1[col] = None
1331
-
1332
- # Add missing columns to gdf_2 with None values
1333
- for col in only_in_1:
1334
- gdf_2[col] = None
1335
-
1336
- # Ensure both GeoDataFrames have the same column order
1337
- all_columns = sorted(list(columns_1.union(columns_2)))
1338
- gdf_1 = gdf_1[all_columns]
1339
- gdf_2 = gdf_2[all_columns]
1340
-
1341
- # Merge the GeoDataFrames
1342
- merged_gdf = pd.concat([gdf_1, gdf_2], ignore_index=True)
1343
-
1344
- return merged_gdf