voxcity 0.5.14__py3-none-any.whl → 0.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of voxcity might be problematic. Click here for more details.
- voxcity/downloader/citygml.py +202 -28
- voxcity/downloader/eubucco.py +91 -14
- voxcity/downloader/gee.py +164 -22
- voxcity/downloader/mbfp.py +55 -9
- voxcity/downloader/oemj.py +110 -24
- voxcity/downloader/omt.py +74 -7
- voxcity/downloader/osm.py +109 -23
- voxcity/downloader/overture.py +108 -23
- voxcity/downloader/utils.py +37 -7
- voxcity/exporter/envimet.py +180 -61
- voxcity/exporter/magicavoxel.py +138 -28
- voxcity/exporter/obj.py +159 -36
- voxcity/generator.py +159 -76
- voxcity/geoprocessor/draw.py +180 -27
- voxcity/geoprocessor/grid.py +178 -38
- voxcity/geoprocessor/mesh.py +347 -43
- voxcity/geoprocessor/network.py +196 -63
- voxcity/geoprocessor/polygon.py +365 -88
- voxcity/geoprocessor/utils.py +283 -72
- voxcity/simulator/solar.py +596 -201
- voxcity/simulator/view.py +278 -723
- voxcity/utils/lc.py +183 -0
- voxcity/utils/material.py +99 -32
- voxcity/utils/visualization.py +2578 -1988
- voxcity/utils/weather.py +816 -615
- {voxcity-0.5.14.dist-info → voxcity-0.5.15.dist-info}/METADATA +10 -12
- voxcity-0.5.15.dist-info/RECORD +38 -0
- {voxcity-0.5.14.dist-info → voxcity-0.5.15.dist-info}/WHEEL +1 -1
- voxcity-0.5.14.dist-info/RECORD +0 -38
- {voxcity-0.5.14.dist-info → voxcity-0.5.15.dist-info}/licenses/AUTHORS.rst +0 -0
- {voxcity-0.5.14.dist-info → voxcity-0.5.15.dist-info}/licenses/LICENSE +0 -0
- {voxcity-0.5.14.dist-info → voxcity-0.5.15.dist-info}/top_level.txt +0 -0
voxcity/geoprocessor/polygon.py
CHANGED
|
@@ -27,24 +27,42 @@ def filter_and_convert_gdf_to_geojson(gdf, rectangle_vertices):
|
|
|
27
27
|
"""
|
|
28
28
|
Filter a GeoDataFrame by a bounding rectangle and convert to GeoJSON format.
|
|
29
29
|
|
|
30
|
+
This function performs spatial filtering on a GeoDataFrame using a bounding rectangle,
|
|
31
|
+
and converts the filtered data to GeoJSON format. It handles both Polygon and MultiPolygon
|
|
32
|
+
geometries, splitting MultiPolygons into separate Polygon features.
|
|
33
|
+
|
|
30
34
|
Args:
|
|
31
35
|
gdf (GeoDataFrame): Input GeoDataFrame containing building data
|
|
36
|
+
Must have 'geometry' and 'height' columns
|
|
37
|
+
Any CRS is accepted, will be converted to WGS84 if needed
|
|
32
38
|
rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
|
|
39
|
+
Must be in WGS84 (EPSG:4326) coordinate system
|
|
40
|
+
Must form a valid rectangle (4 vertices, clockwise or counterclockwise)
|
|
33
41
|
|
|
34
42
|
Returns:
|
|
35
43
|
list: List of GeoJSON features within the bounding rectangle
|
|
44
|
+
Each feature contains:
|
|
45
|
+
- geometry: Polygon coordinates in WGS84
|
|
46
|
+
- properties: Dictionary with 'height', 'confidence', and 'id'
|
|
47
|
+
- type: Always "Feature"
|
|
48
|
+
|
|
49
|
+
Memory Optimization:
|
|
50
|
+
- Uses spatial indexing for efficient filtering
|
|
51
|
+
- Downcasts numeric columns to save memory
|
|
52
|
+
- Cleans up intermediate data structures
|
|
53
|
+
- Splits MultiPolygons into separate features
|
|
36
54
|
"""
|
|
37
|
-
# Reproject to WGS84 if necessary
|
|
55
|
+
# Reproject to WGS84 if necessary for consistent coordinate system
|
|
38
56
|
if gdf.crs != 'EPSG:4326':
|
|
39
57
|
gdf = gdf.to_crs(epsg=4326)
|
|
40
58
|
|
|
41
|
-
# Downcast 'height' to save memory
|
|
59
|
+
# Downcast 'height' to float32 to save memory
|
|
42
60
|
gdf['height'] = pd.to_numeric(gdf['height'], downcast='float')
|
|
43
61
|
|
|
44
|
-
# Add 'confidence' column with default value
|
|
62
|
+
# Add 'confidence' column with default value for height reliability
|
|
45
63
|
gdf['confidence'] = -1.0
|
|
46
64
|
|
|
47
|
-
#
|
|
65
|
+
# Create shapely polygon from rectangle vertices for spatial filtering
|
|
48
66
|
rectangle_polygon = Polygon(rectangle_vertices)
|
|
49
67
|
|
|
50
68
|
# Use spatial index to efficiently filter geometries that intersect with rectangle
|
|
@@ -125,18 +143,39 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
|
|
|
125
143
|
"""
|
|
126
144
|
Extract building heights from one GeoDataFrame and apply them to another based on spatial overlap.
|
|
127
145
|
|
|
146
|
+
This function transfers height information from a reference GeoDataFrame to a primary GeoDataFrame
|
|
147
|
+
based on the spatial overlap between building footprints. For each building in the primary dataset
|
|
148
|
+
that needs height data, it calculates a weighted average height from overlapping buildings in the
|
|
149
|
+
reference dataset.
|
|
150
|
+
|
|
128
151
|
Args:
|
|
129
152
|
gdf_0 (gpd.GeoDataFrame): Primary GeoDataFrame to update with heights
|
|
153
|
+
Must have 'geometry' column with building footprints
|
|
154
|
+
Will be updated with height values where missing or zero
|
|
130
155
|
gdf_1 (gpd.GeoDataFrame): Reference GeoDataFrame containing height data
|
|
156
|
+
Must have 'geometry' column with building footprints
|
|
157
|
+
Must have 'height' column with valid height values
|
|
131
158
|
|
|
132
159
|
Returns:
|
|
133
160
|
gpd.GeoDataFrame: Updated primary GeoDataFrame with extracted heights
|
|
161
|
+
Buildings with overlapping reference data get weighted average heights
|
|
162
|
+
Buildings without overlapping data retain original height or get NaN
|
|
163
|
+
|
|
164
|
+
Statistics Tracked:
|
|
165
|
+
- count_0: Number of buildings without height in primary dataset
|
|
166
|
+
- count_1: Number of buildings successfully updated with height
|
|
167
|
+
- count_2: Number of buildings where no reference height data found
|
|
168
|
+
|
|
169
|
+
Note:
|
|
170
|
+
- Uses R-tree spatial indexing for efficient overlap detection
|
|
171
|
+
- Handles invalid geometries by attempting to fix them with buffer(0)
|
|
172
|
+
- Weighted average is based on the area of overlap between buildings
|
|
134
173
|
"""
|
|
135
174
|
# Make a copy of input GeoDataFrame to avoid modifying original
|
|
136
175
|
gdf_primary = gdf_0.copy()
|
|
137
176
|
gdf_ref = gdf_1.copy()
|
|
138
177
|
|
|
139
|
-
# Make sure height columns exist
|
|
178
|
+
# Make sure height columns exist with default values
|
|
140
179
|
if 'height' not in gdf_primary.columns:
|
|
141
180
|
gdf_primary['height'] = 0.0
|
|
142
181
|
if 'height' not in gdf_ref.columns:
|
|
@@ -147,8 +186,7 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
|
|
|
147
186
|
count_1 = 0 # Buildings updated with height
|
|
148
187
|
count_2 = 0 # Buildings with no height data found
|
|
149
188
|
|
|
150
|
-
# Create spatial index for reference buildings
|
|
151
|
-
from rtree import index
|
|
189
|
+
# Create spatial index for reference buildings to speed up intersection tests
|
|
152
190
|
spatial_index = index.Index()
|
|
153
191
|
for i, geom in enumerate(gdf_ref.geometry):
|
|
154
192
|
if geom.is_valid:
|
|
@@ -160,9 +198,9 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
|
|
|
160
198
|
count_0 += 1
|
|
161
199
|
geom = row.geometry
|
|
162
200
|
|
|
163
|
-
#
|
|
164
|
-
overlapping_height_area = 0
|
|
165
|
-
overlapping_area = 0
|
|
201
|
+
# Variables for weighted average height calculation
|
|
202
|
+
overlapping_height_area = 0 # Sum of (height * overlap_area)
|
|
203
|
+
overlapping_area = 0 # Total overlap area
|
|
166
204
|
|
|
167
205
|
# Get potential intersecting buildings using spatial index
|
|
168
206
|
potential_matches = list(spatial_index.intersection(geom.bounds))
|
|
@@ -174,6 +212,7 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
|
|
|
174
212
|
|
|
175
213
|
ref_row = gdf_ref.iloc[ref_idx]
|
|
176
214
|
try:
|
|
215
|
+
# Calculate intersection if geometries overlap
|
|
177
216
|
if geom.intersects(ref_row.geometry):
|
|
178
217
|
overlap_area = geom.intersection(ref_row.geometry).area
|
|
179
218
|
overlapping_height_area += ref_row['height'] * overlap_area
|
|
@@ -193,7 +232,7 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
|
|
|
193
232
|
# Update height if overlapping buildings found
|
|
194
233
|
if overlapping_height_area > 0:
|
|
195
234
|
count_1 += 1
|
|
196
|
-
# Calculate weighted average height
|
|
235
|
+
# Calculate weighted average height based on overlap areas
|
|
197
236
|
new_height = overlapping_height_area / overlapping_area
|
|
198
237
|
gdf_primary.at[idx_primary, 'height'] = new_height
|
|
199
238
|
else:
|
|
@@ -202,7 +241,6 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
|
|
|
202
241
|
|
|
203
242
|
# Print statistics about height updates
|
|
204
243
|
if count_0 > 0:
|
|
205
|
-
# print(f"{count_0} of the total {len(gdf_primary)} building footprint from OSM did not have height data.")
|
|
206
244
|
print(f"For {count_1} of these building footprints without height, values from the complementary source were assigned.")
|
|
207
245
|
print(f"For {count_2} of these building footprints without height, no data exist in complementary data.")
|
|
208
246
|
|
|
@@ -331,34 +369,55 @@ def geojson_to_gdf(geojson_data, id_col='id'):
|
|
|
331
369
|
"""
|
|
332
370
|
Convert a list of GeoJSON-like dict features into a GeoDataFrame.
|
|
333
371
|
|
|
372
|
+
This function takes a list of GeoJSON feature dictionaries (Fiona-like format)
|
|
373
|
+
and converts them into a GeoDataFrame, handling geometry conversion and property
|
|
374
|
+
extraction. It ensures each feature has a unique identifier.
|
|
375
|
+
|
|
334
376
|
Args:
|
|
335
|
-
geojson_data (List[Dict]): A list of feature dicts (Fiona-like)
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
377
|
+
geojson_data (List[Dict]): A list of feature dicts (Fiona-like)
|
|
378
|
+
Each dict must have 'geometry' and 'properties' keys
|
|
379
|
+
'geometry' must be a valid GeoJSON geometry
|
|
380
|
+
'properties' can be empty but must be a dict if present
|
|
381
|
+
id_col (str, optional): Name of property to use as an identifier
|
|
382
|
+
Default is 'id'
|
|
383
|
+
If not found in properties, a sequential ID will be created
|
|
384
|
+
Must be a string that can be used as a column name
|
|
385
|
+
|
|
339
386
|
Returns:
|
|
340
|
-
gpd.GeoDataFrame: GeoDataFrame with geometry and property columns
|
|
387
|
+
gpd.GeoDataFrame: GeoDataFrame with geometry and property columns
|
|
388
|
+
Will have 'geometry' column with Shapely geometries
|
|
389
|
+
Will have columns for all properties found in features
|
|
390
|
+
Will have id_col with unique identifiers
|
|
391
|
+
Will be set to WGS84 (EPSG:4326) coordinate system
|
|
392
|
+
|
|
393
|
+
Note:
|
|
394
|
+
- Handles missing properties gracefully
|
|
395
|
+
- Creates sequential IDs if id_col not found
|
|
396
|
+
- Converts GeoJSON geometries to Shapely objects
|
|
397
|
+
- Sets WGS84 as coordinate system
|
|
398
|
+
- Preserves all properties as columns
|
|
341
399
|
"""
|
|
342
400
|
# Build lists for geometry and properties
|
|
343
401
|
geometries = []
|
|
344
402
|
all_props = []
|
|
345
403
|
|
|
346
404
|
for i, feature in enumerate(geojson_data):
|
|
347
|
-
# Extract geometry
|
|
405
|
+
# Extract geometry and convert to Shapely object
|
|
348
406
|
geom = feature.get('geometry')
|
|
349
407
|
shapely_geom = shape(geom) if geom else None
|
|
350
408
|
|
|
351
|
-
# Extract properties
|
|
409
|
+
# Extract properties, ensuring they exist
|
|
352
410
|
props = feature.get('properties', {})
|
|
353
411
|
|
|
354
|
-
# If
|
|
412
|
+
# If specified ID column is missing, create sequential ID
|
|
355
413
|
if id_col not in props:
|
|
356
414
|
props[id_col] = i # fallback ID
|
|
357
415
|
|
|
358
|
-
# Capture geometry and all
|
|
416
|
+
# Capture geometry and all properties
|
|
359
417
|
geometries.append(shapely_geom)
|
|
360
418
|
all_props.append(props)
|
|
361
419
|
|
|
420
|
+
# Create GeoDataFrame with geometries and properties
|
|
362
421
|
gdf = gpd.GeoDataFrame(all_props, geometry=geometries, crs="EPSG:4326")
|
|
363
422
|
return gdf
|
|
364
423
|
|
|
@@ -503,18 +562,50 @@ def complement_building_heights_from_gdf(gdf_0, gdf_1,
|
|
|
503
562
|
def gdf_to_geojson_dicts(gdf, id_col='id'):
|
|
504
563
|
"""
|
|
505
564
|
Convert a GeoDataFrame to a list of dicts similar to GeoJSON features.
|
|
565
|
+
|
|
566
|
+
This function converts a GeoDataFrame into a list of dictionary objects that
|
|
567
|
+
follow the GeoJSON Feature format. Each feature will have geometry and properties,
|
|
568
|
+
with an optional ID field handled separately from other properties.
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
gdf (gpd.GeoDataFrame): GeoDataFrame to convert
|
|
572
|
+
Must have 'geometry' column with Shapely geometries
|
|
573
|
+
All non-geometry columns will become properties
|
|
574
|
+
Can optionally have id_col for unique identifiers
|
|
575
|
+
id_col (str, optional): Name of column to use as feature ID
|
|
576
|
+
Default is 'id'
|
|
577
|
+
If present, will be excluded from properties
|
|
578
|
+
If not present, features will not have explicit IDs
|
|
579
|
+
|
|
580
|
+
Returns:
|
|
581
|
+
list: List of GeoJSON-like feature dictionaries
|
|
582
|
+
Each dict will have:
|
|
583
|
+
- type: Always "Feature"
|
|
584
|
+
- geometry: GeoJSON geometry from Shapely object
|
|
585
|
+
- properties: All columns except geometry and ID
|
|
586
|
+
|
|
587
|
+
Note:
|
|
588
|
+
- Converts Shapely geometries to GeoJSON format
|
|
589
|
+
- Preserves all non-geometry columns as properties
|
|
590
|
+
- Handles missing ID column gracefully
|
|
591
|
+
- Maintains original property types
|
|
592
|
+
- Excludes ID from properties if specified
|
|
506
593
|
"""
|
|
594
|
+
# Convert GeoDataFrame to dictionary records for easier processing
|
|
507
595
|
records = gdf.to_dict(orient='records')
|
|
508
596
|
features = []
|
|
597
|
+
|
|
509
598
|
for rec in records:
|
|
510
|
-
# geometry
|
|
599
|
+
# Extract and convert geometry to GeoJSON format using __geo_interface__
|
|
511
600
|
geom = rec.pop('geometry', None)
|
|
512
601
|
if geom is not None:
|
|
513
602
|
geom = geom.__geo_interface__
|
|
514
|
-
|
|
603
|
+
|
|
604
|
+
# Extract ID if present and create properties dict excluding ID
|
|
515
605
|
feature_id = rec.get(id_col, None)
|
|
516
606
|
props = {k: v for k, v in rec.items() if k != id_col}
|
|
517
|
-
|
|
607
|
+
|
|
608
|
+
# Create GeoJSON Feature object with type, properties, and geometry
|
|
518
609
|
feature = {
|
|
519
610
|
'type': 'Feature',
|
|
520
611
|
'properties': props,
|
|
@@ -526,38 +617,63 @@ def gdf_to_geojson_dicts(gdf, id_col='id'):
|
|
|
526
617
|
|
|
527
618
|
def load_gdf_from_multiple_gz(file_paths):
|
|
528
619
|
"""
|
|
529
|
-
Load GeoJSON features from multiple gzipped files into a GeoDataFrame.
|
|
620
|
+
Load GeoJSON features from multiple gzipped files into a single GeoDataFrame.
|
|
621
|
+
|
|
622
|
+
This function reads multiple gzipped GeoJSON files, where each line in each file
|
|
623
|
+
represents a single GeoJSON feature. It combines all features into a single
|
|
624
|
+
GeoDataFrame, ensuring height properties are properly handled and coordinates
|
|
625
|
+
are in WGS84.
|
|
530
626
|
|
|
531
627
|
Args:
|
|
532
628
|
file_paths (list): List of paths to gzipped GeoJSON files
|
|
629
|
+
Each file should contain one GeoJSON feature per line
|
|
630
|
+
Files should be readable as UTF-8 text
|
|
631
|
+
Features should be in WGS84 coordinate system
|
|
533
632
|
|
|
534
633
|
Returns:
|
|
535
|
-
gpd.GeoDataFrame: GeoDataFrame containing
|
|
634
|
+
gpd.GeoDataFrame: Combined GeoDataFrame containing all features
|
|
635
|
+
Will have 'geometry' column with building footprints
|
|
636
|
+
Will have 'height' column (0 for missing values)
|
|
637
|
+
Will be set to WGS84 (EPSG:4326) coordinate system
|
|
638
|
+
|
|
639
|
+
Note:
|
|
640
|
+
- Skips lines that cannot be parsed as valid JSON
|
|
641
|
+
- Sets missing height values to 0
|
|
642
|
+
- Assumes input coordinates are in WGS84
|
|
643
|
+
- Memory usage scales with total number of features
|
|
644
|
+
- Reports JSON parsing errors but continues processing
|
|
536
645
|
"""
|
|
646
|
+
# Initialize list to store all GeoJSON features
|
|
537
647
|
geojson_objects = []
|
|
648
|
+
|
|
649
|
+
# Process each gzipped file
|
|
538
650
|
for gz_file_path in file_paths:
|
|
539
|
-
# Read each gzipped file line by line
|
|
651
|
+
# Read each gzipped file line by line as UTF-8 text
|
|
540
652
|
with gzip.open(gz_file_path, 'rt', encoding='utf-8') as file:
|
|
541
653
|
for line in file:
|
|
542
654
|
try:
|
|
655
|
+
# Parse each line as a GeoJSON feature
|
|
543
656
|
data = json.loads(line)
|
|
657
|
+
|
|
544
658
|
# Ensure height property exists and has valid value
|
|
545
659
|
if 'properties' in data and 'height' in data['properties']:
|
|
546
660
|
if data['properties']['height'] is None:
|
|
547
661
|
data['properties']['height'] = 0
|
|
548
662
|
else:
|
|
663
|
+
# Create properties dict if missing
|
|
549
664
|
if 'properties' not in data:
|
|
550
665
|
data['properties'] = {}
|
|
666
|
+
# Set default height value
|
|
551
667
|
data['properties']['height'] = 0
|
|
668
|
+
|
|
552
669
|
geojson_objects.append(data)
|
|
553
670
|
except json.JSONDecodeError as e:
|
|
554
671
|
print(f"Skipping line in {gz_file_path} due to JSONDecodeError: {e}")
|
|
555
672
|
|
|
556
673
|
# Convert list of GeoJSON features to GeoDataFrame
|
|
557
|
-
# swap_coordinates(geojson_objects)
|
|
558
674
|
gdf = gpd.GeoDataFrame.from_features(geojson_objects)
|
|
559
675
|
|
|
560
|
-
# Set
|
|
676
|
+
# Set coordinate reference system to WGS84
|
|
561
677
|
gdf.set_crs(epsg=4326, inplace=True)
|
|
562
678
|
|
|
563
679
|
return gdf
|
|
@@ -566,44 +682,92 @@ def filter_buildings(geojson_data, plotting_box):
|
|
|
566
682
|
"""
|
|
567
683
|
Filter building features that intersect with a given bounding box.
|
|
568
684
|
|
|
685
|
+
This function filters a list of GeoJSON building features to keep only those
|
|
686
|
+
that intersect with a specified bounding box. It performs geometry validation
|
|
687
|
+
and handles invalid geometries gracefully.
|
|
688
|
+
|
|
569
689
|
Args:
|
|
570
|
-
geojson_data (list): List of GeoJSON features
|
|
690
|
+
geojson_data (list): List of GeoJSON features representing buildings
|
|
691
|
+
Each feature must have valid 'geometry' property
|
|
692
|
+
Coordinates must be in same CRS as plotting_box
|
|
693
|
+
Invalid geometries will be skipped with warning
|
|
571
694
|
plotting_box (Polygon): Shapely polygon defining the bounding box
|
|
695
|
+
Must be a valid Shapely Polygon object
|
|
696
|
+
Must be in same coordinate system as geojson_data
|
|
697
|
+
Used for spatial intersection testing
|
|
572
698
|
|
|
573
699
|
Returns:
|
|
574
700
|
list: Filtered list of GeoJSON features that intersect with the bounding box
|
|
701
|
+
Features maintain their original structure
|
|
702
|
+
Invalid features are excluded
|
|
703
|
+
Order of features is preserved
|
|
704
|
+
|
|
705
|
+
Note:
|
|
706
|
+
- Validates polygon coordinates before processing
|
|
707
|
+
- Skips features with invalid geometries
|
|
708
|
+
- Reports validation and geometry errors
|
|
709
|
+
- No coordinate system transformation is performed
|
|
710
|
+
- Memory efficient as it creates new list only for valid features
|
|
575
711
|
"""
|
|
712
|
+
# Initialize list for valid intersecting features
|
|
576
713
|
filtered_features = []
|
|
714
|
+
|
|
715
|
+
# Process each feature in the input data
|
|
577
716
|
for feature in geojson_data:
|
|
578
717
|
# Validate polygon coordinates before processing
|
|
579
718
|
if not validate_polygon_coordinates(feature['geometry']):
|
|
580
719
|
print("Skipping feature with invalid geometry")
|
|
581
720
|
print(feature['geometry'])
|
|
582
721
|
continue
|
|
722
|
+
|
|
583
723
|
try:
|
|
584
|
-
# Convert GeoJSON geometry to Shapely geometry
|
|
724
|
+
# Convert GeoJSON geometry to Shapely geometry for spatial operations
|
|
585
725
|
geom = shape(feature['geometry'])
|
|
726
|
+
|
|
727
|
+
# Skip invalid geometries that can't be fixed
|
|
586
728
|
if not geom.is_valid:
|
|
587
729
|
print("Skipping invalid geometry")
|
|
588
730
|
print(geom)
|
|
589
731
|
continue
|
|
732
|
+
|
|
590
733
|
# Keep features that intersect with bounding box
|
|
591
734
|
if plotting_box.intersects(geom):
|
|
592
735
|
filtered_features.append(feature)
|
|
736
|
+
|
|
593
737
|
except ShapelyError as e:
|
|
738
|
+
# Log geometry errors but continue processing
|
|
594
739
|
print(f"Skipping feature due to geometry error: {e}")
|
|
740
|
+
|
|
595
741
|
return filtered_features
|
|
596
742
|
|
|
597
743
|
def extract_building_heights_from_geotiff(geotiff_path, gdf):
|
|
598
744
|
"""
|
|
599
745
|
Extract building heights from a GeoTIFF raster for building footprints in a GeoDataFrame.
|
|
600
746
|
|
|
747
|
+
This function processes building footprints to extract height information from a GeoTIFF
|
|
748
|
+
raster file. It handles coordinate transformation between WGS84 (EPSG:4326) and the raster's
|
|
749
|
+
CRS, and calculates average heights for each building footprint.
|
|
750
|
+
|
|
601
751
|
Args:
|
|
602
|
-
geotiff_path (str): Path to the GeoTIFF height raster
|
|
603
|
-
gdf (gpd.GeoDataFrame): GeoDataFrame containing building footprints
|
|
752
|
+
geotiff_path (str): Path to the GeoTIFF height raster file containing elevation data
|
|
753
|
+
gdf (gpd.GeoDataFrame): GeoDataFrame containing building footprints with geometry column
|
|
754
|
+
The GeoDataFrame should be in WGS84 (EPSG:4326) coordinate system
|
|
604
755
|
|
|
605
756
|
Returns:
|
|
606
|
-
gpd.GeoDataFrame: Updated GeoDataFrame with extracted heights
|
|
757
|
+
gpd.GeoDataFrame: Updated GeoDataFrame with extracted heights in the 'height' column
|
|
758
|
+
- Buildings with valid height data will have their height values updated
|
|
759
|
+
- Buildings with no valid height data will have NaN values
|
|
760
|
+
- Original buildings with existing valid heights are preserved
|
|
761
|
+
|
|
762
|
+
Statistics Reported:
|
|
763
|
+
- Total number of buildings without height data
|
|
764
|
+
- Number of buildings successfully updated with height data
|
|
765
|
+
- Number of buildings where no height data could be found
|
|
766
|
+
|
|
767
|
+
Note:
|
|
768
|
+
- The function only processes Polygon geometries (not MultiPolygons or other types)
|
|
769
|
+
- Buildings are considered to need height processing if they have no height or height <= 0
|
|
770
|
+
- Heights are calculated as the mean of all valid raster values within the building footprint
|
|
607
771
|
"""
|
|
608
772
|
# Make a copy to avoid modifying the input
|
|
609
773
|
gdf = gdf.copy()
|
|
@@ -615,23 +779,28 @@ def extract_building_heights_from_geotiff(geotiff_path, gdf):
|
|
|
615
779
|
|
|
616
780
|
# Open GeoTIFF and process buildings
|
|
617
781
|
with rasterio.open(geotiff_path) as src:
|
|
618
|
-
# Create coordinate transformer from WGS84 to raster CRS
|
|
782
|
+
# Create coordinate transformer from WGS84 to raster CRS for geometry transformation
|
|
619
783
|
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
|
|
620
784
|
|
|
621
|
-
# Filter buildings that need height processing
|
|
785
|
+
# Filter buildings that need height processing:
|
|
786
|
+
# - Must be Polygon type (not MultiPolygon)
|
|
787
|
+
# - Either has no height or height <= 0
|
|
622
788
|
mask_condition = (gdf.geometry.geom_type == 'Polygon') & ((gdf.get('height', 0) <= 0) | gdf.get('height').isna())
|
|
623
789
|
buildings_to_process = gdf[mask_condition]
|
|
624
790
|
count_0 = len(buildings_to_process)
|
|
625
791
|
|
|
626
792
|
for idx, row in buildings_to_process.iterrows():
|
|
627
|
-
# Transform
|
|
793
|
+
# Transform building polygon coordinates from WGS84 to raster CRS
|
|
628
794
|
coords = list(row.geometry.exterior.coords)
|
|
629
795
|
transformed_coords = [transformer.transform(lon, lat) for lon, lat in coords]
|
|
630
796
|
polygon = shape({"type": "Polygon", "coordinates": [transformed_coords]})
|
|
631
797
|
|
|
632
798
|
try:
|
|
633
|
-
# Extract height values from raster within polygon
|
|
799
|
+
# Extract height values from raster within the building polygon
|
|
800
|
+
# all_touched=True ensures we get all pixels that the polygon touches
|
|
634
801
|
masked_data, _ = rasterio.mask.mask(src, [polygon], crop=True, all_touched=True)
|
|
802
|
+
|
|
803
|
+
# Filter out nodata values from the raster
|
|
635
804
|
heights = masked_data[0][masked_data[0] != src.nodata]
|
|
636
805
|
|
|
637
806
|
# Calculate average height if valid samples exist
|
|
@@ -641,7 +810,6 @@ def extract_building_heights_from_geotiff(geotiff_path, gdf):
|
|
|
641
810
|
else:
|
|
642
811
|
count_2 += 1
|
|
643
812
|
gdf.at[idx, 'height'] = np.nan
|
|
644
|
-
# print(f"No valid height data for building at index {idx}")
|
|
645
813
|
except ValueError as e:
|
|
646
814
|
print(f"Error processing building at index {idx}. Error: {str(e)}")
|
|
647
815
|
gdf.at[idx, 'height'] = None
|
|
@@ -656,14 +824,33 @@ def extract_building_heights_from_geotiff(geotiff_path, gdf):
|
|
|
656
824
|
|
|
657
825
|
def get_gdf_from_gpkg(gpkg_path, rectangle_vertices):
|
|
658
826
|
"""
|
|
659
|
-
Read a GeoPackage file and convert it to
|
|
827
|
+
Read a GeoPackage file and convert it to a GeoDataFrame with consistent CRS.
|
|
828
|
+
|
|
829
|
+
This function reads a GeoPackage file containing building footprints and ensures
|
|
830
|
+
the data is properly formatted with WGS84 coordinate system and unique identifiers.
|
|
831
|
+
It handles CRS conversion if needed and adds sequential IDs.
|
|
660
832
|
|
|
661
833
|
Args:
|
|
662
834
|
gpkg_path (str): Path to the GeoPackage file
|
|
835
|
+
File must exist and be readable
|
|
836
|
+
Must contain valid building footprint geometries
|
|
837
|
+
Any coordinate system is accepted
|
|
663
838
|
rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
|
|
839
|
+
Must be in WGS84 (EPSG:4326) coordinate system
|
|
840
|
+
Used for spatial filtering (not implemented in this function)
|
|
664
841
|
|
|
665
842
|
Returns:
|
|
666
|
-
|
|
843
|
+
gpd.GeoDataFrame: GeoDataFrame containing building footprints
|
|
844
|
+
Will have 'geometry' column with building geometries
|
|
845
|
+
Will have 'id' column with sequential integers
|
|
846
|
+
Will be in WGS84 (EPSG:4326) coordinate system
|
|
847
|
+
|
|
848
|
+
Note:
|
|
849
|
+
- Prints informative message when opening file
|
|
850
|
+
- Sets CRS to WGS84 if not specified
|
|
851
|
+
- Transforms to WGS84 if different CRS
|
|
852
|
+
- Adds sequential IDs starting from 0
|
|
853
|
+
- rectangle_vertices parameter is currently unused
|
|
667
854
|
"""
|
|
668
855
|
# Open and read the GPKG file
|
|
669
856
|
print(f"Opening GPKG file: {gpkg_path}")
|
|
@@ -676,7 +863,7 @@ def get_gdf_from_gpkg(gpkg_path, rectangle_vertices):
|
|
|
676
863
|
elif gdf.crs != "EPSG:4326":
|
|
677
864
|
gdf = gdf.to_crs(epsg=4326)
|
|
678
865
|
|
|
679
|
-
# Replace id column with index numbers
|
|
866
|
+
# Replace id column with sequential index numbers
|
|
680
867
|
gdf['id'] = gdf.index
|
|
681
868
|
|
|
682
869
|
return gdf
|
|
@@ -685,66 +872,127 @@ def swap_coordinates(features):
|
|
|
685
872
|
"""
|
|
686
873
|
Swap coordinate ordering in GeoJSON features from (lat, lon) to (lon, lat).
|
|
687
874
|
|
|
875
|
+
This function modifies GeoJSON features in-place to swap the order of coordinates
|
|
876
|
+
from (latitude, longitude) to (longitude, latitude). It handles both Polygon and
|
|
877
|
+
MultiPolygon geometries, maintaining their structure while swapping coordinates.
|
|
878
|
+
|
|
688
879
|
Args:
|
|
689
880
|
features (list): List of GeoJSON features to process
|
|
881
|
+
Features must have 'geometry' property
|
|
882
|
+
Supported geometry types: 'Polygon', 'MultiPolygon'
|
|
883
|
+
Coordinates must be in (lat, lon) order initially
|
|
884
|
+
|
|
885
|
+
Returns:
|
|
886
|
+
None: Features are modified in-place
|
|
887
|
+
|
|
888
|
+
Note:
|
|
889
|
+
- Modifies features directly (no copy created)
|
|
890
|
+
- Handles both Polygon and MultiPolygon geometries
|
|
891
|
+
- For Polygons: processes single coordinate ring
|
|
892
|
+
- For MultiPolygons: processes multiple coordinate rings
|
|
893
|
+
- Assumes input coordinates are in (lat, lon) order
|
|
894
|
+
- Resulting coordinates will be in (lon, lat) order
|
|
690
895
|
"""
|
|
691
896
|
# Process each feature based on geometry type
|
|
692
897
|
for feature in features:
|
|
693
898
|
if feature['geometry']['type'] == 'Polygon':
|
|
694
899
|
# Swap coordinates for simple polygons
|
|
900
|
+
# Each polygon is a list of rings (exterior and optional holes)
|
|
695
901
|
new_coords = [[[lon, lat] for lat, lon in polygon] for polygon in feature['geometry']['coordinates']]
|
|
696
902
|
feature['geometry']['coordinates'] = new_coords
|
|
697
903
|
elif feature['geometry']['type'] == 'MultiPolygon':
|
|
698
904
|
# Swap coordinates for multi-polygons (polygons with holes)
|
|
905
|
+
# Each multipolygon is a list of polygons, each with its own rings
|
|
699
906
|
new_coords = [[[[lon, lat] for lat, lon in polygon] for polygon in multipolygon] for multipolygon in feature['geometry']['coordinates']]
|
|
700
907
|
feature['geometry']['coordinates'] = new_coords
|
|
701
908
|
|
|
702
909
|
def save_geojson(features, save_path):
|
|
703
910
|
"""
|
|
704
|
-
Save GeoJSON features to a file with
|
|
911
|
+
Save GeoJSON features to a file with coordinate swapping and pretty printing.
|
|
912
|
+
|
|
913
|
+
This function takes a list of GeoJSON features, swaps their coordinate ordering
|
|
914
|
+
if needed, wraps them in a FeatureCollection, and saves to a file with proper
|
|
915
|
+
JSON formatting. It creates a deep copy to avoid modifying the original data.
|
|
705
916
|
|
|
706
917
|
Args:
|
|
707
918
|
features (list): List of GeoJSON features to save
|
|
919
|
+
Each feature should have valid GeoJSON structure
|
|
920
|
+
Features can be Polygon or MultiPolygon type
|
|
921
|
+
Coordinates will be swapped if in (lat, lon) order
|
|
708
922
|
save_path (str): Path where the GeoJSON file should be saved
|
|
923
|
+
Will overwrite existing file if present
|
|
924
|
+
Directory must exist and be writable
|
|
925
|
+
File will be created with UTF-8 encoding
|
|
926
|
+
|
|
927
|
+
Returns:
|
|
928
|
+
None
|
|
929
|
+
|
|
930
|
+
Note:
|
|
931
|
+
- Creates deep copy to preserve original feature data
|
|
932
|
+
- Swaps coordinates from (lat, lon) to (lon, lat) order
|
|
933
|
+
- Wraps features in a FeatureCollection object
|
|
934
|
+
- Uses pretty printing with 2-space indentation
|
|
935
|
+
- Handles both Polygon and MultiPolygon geometries
|
|
709
936
|
"""
|
|
710
937
|
# Create deep copy to avoid modifying original data
|
|
711
938
|
geojson_features = copy.deepcopy(features)
|
|
712
939
|
|
|
713
|
-
# Swap coordinate ordering
|
|
940
|
+
# Swap coordinate ordering from (lat, lon) to (lon, lat)
|
|
714
941
|
swap_coordinates(geojson_features)
|
|
715
942
|
|
|
716
|
-
# Create FeatureCollection
|
|
943
|
+
# Create FeatureCollection structure
|
|
717
944
|
geojson = {
|
|
718
945
|
"type": "FeatureCollection",
|
|
719
946
|
"features": geojson_features
|
|
720
947
|
}
|
|
721
948
|
|
|
722
|
-
# Write to file with pretty printing
|
|
949
|
+
# Write to file with pretty printing (2-space indentation)
|
|
723
950
|
with open(save_path, 'w') as f:
|
|
724
951
|
json.dump(geojson, f, indent=2)
|
|
725
952
|
|
|
726
953
|
def find_building_containing_point(building_gdf, target_point):
|
|
727
954
|
"""
|
|
728
|
-
Find building IDs that contain a given point.
|
|
955
|
+
Find building IDs that contain a given point in their footprint.
|
|
956
|
+
|
|
957
|
+
This function identifies all buildings in a GeoDataFrame whose footprint contains
|
|
958
|
+
a specified geographic point. Only Polygon geometries are considered, and the point
|
|
959
|
+
must be fully contained within the building footprint (not just touching).
|
|
729
960
|
|
|
730
961
|
Args:
|
|
731
962
|
building_gdf (GeoDataFrame): GeoDataFrame containing building geometries and IDs
|
|
732
|
-
|
|
963
|
+
Must have 'geometry' column with Polygon geometries
|
|
964
|
+
Must have 'id' column or index will be used as fallback
|
|
965
|
+
Geometries must be in same CRS as target_point coordinates
|
|
966
|
+
target_point (tuple): Tuple of (lon, lat) coordinates to check
|
|
967
|
+
Must be in same coordinate system as building_gdf geometries
|
|
968
|
+
Order must be (longitude, latitude) if using WGS84
|
|
733
969
|
|
|
734
970
|
Returns:
|
|
735
971
|
list: List of building IDs containing the target point
|
|
972
|
+
Empty list if no buildings contain the point
|
|
973
|
+
Multiple IDs possible if buildings overlap
|
|
974
|
+
IDs are in arbitrary order
|
|
975
|
+
|
|
976
|
+
Note:
|
|
977
|
+
- Only processes Polygon geometries (skips MultiPolygons and others)
|
|
978
|
+
- Uses Shapely's contains() method which requires point to be fully inside polygon
|
|
979
|
+
- No spatial indexing is used, performs linear search through all buildings
|
|
736
980
|
"""
|
|
737
|
-
# Create Shapely point
|
|
981
|
+
# Create Shapely point from input coordinates
|
|
738
982
|
point = Point(target_point[0], target_point[1])
|
|
739
983
|
|
|
984
|
+
# Initialize list to store matching building IDs
|
|
740
985
|
id_list = []
|
|
986
|
+
|
|
987
|
+
# Check each building in the GeoDataFrame
|
|
741
988
|
for idx, row in building_gdf.iterrows():
|
|
742
|
-
# Skip any geometry that is not Polygon
|
|
989
|
+
# Skip any geometry that is not a simple Polygon
|
|
743
990
|
if not isinstance(row.geometry, Polygon):
|
|
744
991
|
continue
|
|
745
992
|
|
|
746
|
-
# Check if point is within
|
|
993
|
+
# Check if point is fully contained within building footprint
|
|
747
994
|
if row.geometry.contains(point):
|
|
995
|
+
# Use specified ID column or None if not found
|
|
748
996
|
id_list.append(row.get('id', None))
|
|
749
997
|
|
|
750
998
|
return id_list
|
|
@@ -752,40 +1000,51 @@ def find_building_containing_point(building_gdf, target_point):
|
|
|
752
1000
|
def get_buildings_in_drawn_polygon(building_gdf, drawn_polygon_vertices,
|
|
753
1001
|
operation='within'):
|
|
754
1002
|
"""
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
1003
|
+
Find buildings that intersect with or are contained within a user-drawn polygon.
|
|
1004
|
+
|
|
1005
|
+
This function identifies buildings from a GeoDataFrame that have a specified spatial
|
|
1006
|
+
relationship with a polygon defined by user-drawn vertices. The relationship can be
|
|
1007
|
+
either intersection (building overlaps polygon) or containment (building fully within
|
|
1008
|
+
polygon).
|
|
1009
|
+
|
|
759
1010
|
Args:
|
|
760
|
-
building_gdf (GeoDataFrame):
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
operation (str):
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
1011
|
+
building_gdf (GeoDataFrame): GeoDataFrame containing building footprints
|
|
1012
|
+
Must have 'geometry' column with Polygon geometries
|
|
1013
|
+
Must have 'id' column or index will be used as fallback
|
|
1014
|
+
Geometries must be in same CRS as drawn_polygon_vertices
|
|
1015
|
+
drawn_polygon_vertices (list): List of (lon, lat) tuples defining polygon vertices
|
|
1016
|
+
Must be in same coordinate system as building_gdf geometries
|
|
1017
|
+
Must form a valid polygon (3+ vertices, first != last)
|
|
1018
|
+
Order must be (longitude, latitude) if using WGS84
|
|
1019
|
+
operation (str, optional): Type of spatial relationship to check
|
|
1020
|
+
'within': buildings must be fully contained in drawn polygon (default)
|
|
1021
|
+
'intersect': buildings must overlap with drawn polygon
|
|
1022
|
+
|
|
773
1023
|
Returns:
|
|
774
|
-
list:
|
|
775
|
-
|
|
1024
|
+
list: List of building IDs that satisfy the spatial relationship
|
|
1025
|
+
Empty list if no buildings meet the criteria
|
|
1026
|
+
IDs are returned in order of processing
|
|
1027
|
+
May contain None values if buildings lack IDs
|
|
1028
|
+
|
|
1029
|
+
Note:
|
|
1030
|
+
- Only processes Polygon geometries (skips MultiPolygons and others)
|
|
1031
|
+
- No spatial indexing is used, performs linear search through all buildings
|
|
1032
|
+
- Invalid operation parameter will raise ValueError
|
|
1033
|
+
- Does not validate polygon closure (first vertex = last vertex)
|
|
776
1034
|
"""
|
|
777
1035
|
# Create Shapely Polygon from drawn vertices
|
|
778
1036
|
drawn_polygon_shapely = Polygon(drawn_polygon_vertices)
|
|
779
1037
|
|
|
1038
|
+
# Initialize list to store matching building IDs
|
|
780
1039
|
included_building_ids = []
|
|
781
1040
|
|
|
782
1041
|
# Check each building in the GeoDataFrame
|
|
783
1042
|
for idx, row in building_gdf.iterrows():
|
|
784
|
-
# Skip any geometry that is not Polygon
|
|
1043
|
+
# Skip any geometry that is not a simple Polygon
|
|
785
1044
|
if not isinstance(row.geometry, Polygon):
|
|
786
1045
|
continue
|
|
787
1046
|
|
|
788
|
-
#
|
|
1047
|
+
# Check spatial relationship based on specified operation
|
|
789
1048
|
if operation == 'intersect':
|
|
790
1049
|
if row.geometry.intersects(drawn_polygon_shapely):
|
|
791
1050
|
included_building_ids.append(row.get('id', None))
|
|
@@ -799,23 +1058,41 @@ def get_buildings_in_drawn_polygon(building_gdf, drawn_polygon_vertices,
|
|
|
799
1058
|
|
|
800
1059
|
def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
|
|
801
1060
|
"""
|
|
802
|
-
Process building footprints to merge overlapping buildings.
|
|
1061
|
+
Process building footprints to merge overlapping buildings based on area overlap ratio.
|
|
1062
|
+
|
|
1063
|
+
This function identifies and merges building footprints that significantly overlap with each other.
|
|
1064
|
+
Buildings are processed in order of decreasing area, and smaller buildings that overlap significantly
|
|
1065
|
+
with larger ones are assigned the ID of the larger building, effectively merging them.
|
|
803
1066
|
|
|
804
1067
|
Args:
|
|
805
1068
|
filtered_gdf (geopandas.GeoDataFrame): GeoDataFrame containing building footprints
|
|
806
|
-
|
|
1069
|
+
Must have 'geometry' column with building polygons
|
|
1070
|
+
If CRS is set, areas will be calculated in Web Mercator projection
|
|
1071
|
+
overlap_threshold (float, optional): Threshold for overlap ratio (0.0-1.0) to merge buildings
|
|
1072
|
+
Default is 0.5 (50% overlap)
|
|
1073
|
+
Higher values require more overlap for merging
|
|
1074
|
+
Lower values will result in more aggressive merging
|
|
807
1075
|
|
|
808
1076
|
Returns:
|
|
809
1077
|
geopandas.GeoDataFrame: Processed GeoDataFrame with updated IDs
|
|
1078
|
+
Overlapping buildings will share the same ID
|
|
1079
|
+
Original geometries are preserved, only IDs are updated
|
|
1080
|
+
All other columns remain unchanged
|
|
1081
|
+
|
|
1082
|
+
Note:
|
|
1083
|
+
- Uses R-tree spatial indexing for efficient overlap detection
|
|
1084
|
+
- Projects to Web Mercator (EPSG:3857) for accurate area calculation if CRS is set
|
|
1085
|
+
- Handles invalid geometries by attempting to fix them with buffer(0)
|
|
1086
|
+
- Processes buildings in order of decreasing area (largest first)
|
|
810
1087
|
"""
|
|
811
1088
|
# Make a copy to avoid modifying the original
|
|
812
1089
|
gdf = filtered_gdf.copy()
|
|
813
1090
|
|
|
814
|
-
# Ensure 'id' column exists
|
|
1091
|
+
# Ensure 'id' column exists, use index if not present
|
|
815
1092
|
if 'id' not in gdf.columns:
|
|
816
1093
|
gdf['id'] = gdf.index
|
|
817
1094
|
|
|
818
|
-
#
|
|
1095
|
+
# Project to Web Mercator for accurate area calculation if CRS is set
|
|
819
1096
|
if gdf.crs is None:
|
|
820
1097
|
# Work with original geometries if no CRS is set
|
|
821
1098
|
gdf_projected = gdf.copy()
|
|
@@ -825,18 +1102,18 @@ def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
|
|
|
825
1102
|
# Project to Web Mercator for accurate area calculation
|
|
826
1103
|
gdf_projected = gdf.to_crs("EPSG:3857")
|
|
827
1104
|
|
|
828
|
-
# Calculate areas
|
|
1105
|
+
# Calculate areas and sort by decreasing area for processing largest buildings first
|
|
829
1106
|
gdf_projected['area'] = gdf_projected.geometry.area
|
|
830
1107
|
gdf_projected = gdf_projected.sort_values(by='area', ascending=False)
|
|
831
1108
|
gdf_projected = gdf_projected.reset_index(drop=True)
|
|
832
1109
|
|
|
833
|
-
# Create spatial index for efficient querying
|
|
1110
|
+
# Create spatial index for efficient querying of potential overlaps
|
|
834
1111
|
spatial_idx = index.Index()
|
|
835
1112
|
for i, geom in enumerate(gdf_projected.geometry):
|
|
836
1113
|
if geom.is_valid:
|
|
837
1114
|
spatial_idx.insert(i, geom.bounds)
|
|
838
1115
|
else:
|
|
839
|
-
# Fix invalid geometries
|
|
1116
|
+
# Fix invalid geometries using buffer(0) technique
|
|
840
1117
|
fixed_geom = geom.buffer(0)
|
|
841
1118
|
if fixed_geom.is_valid:
|
|
842
1119
|
spatial_idx.insert(i, fixed_geom.bounds)
|
|
@@ -844,52 +1121,52 @@ def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
|
|
|
844
1121
|
# Track ID replacements to avoid repeated processing
|
|
845
1122
|
id_mapping = {}
|
|
846
1123
|
|
|
847
|
-
# Process each building (skip the largest one)
|
|
1124
|
+
# Process each building (skip the largest one as it's our reference)
|
|
848
1125
|
for i in range(1, len(gdf_projected)):
|
|
849
1126
|
current_poly = gdf_projected.iloc[i].geometry
|
|
850
1127
|
current_area = gdf_projected.iloc[i].area
|
|
851
1128
|
current_id = gdf_projected.iloc[i]['id']
|
|
852
1129
|
|
|
853
|
-
# Skip if already mapped
|
|
1130
|
+
# Skip if already mapped to another ID
|
|
854
1131
|
if current_id in id_mapping:
|
|
855
1132
|
continue
|
|
856
1133
|
|
|
857
|
-
# Ensure geometry is valid
|
|
1134
|
+
# Ensure geometry is valid for processing
|
|
858
1135
|
if not current_poly.is_valid:
|
|
859
1136
|
current_poly = current_poly.buffer(0)
|
|
860
1137
|
if not current_poly.is_valid:
|
|
861
1138
|
continue
|
|
862
1139
|
|
|
863
|
-
# Find potential overlaps with larger polygons
|
|
1140
|
+
# Find potential overlaps with larger polygons using spatial index
|
|
864
1141
|
potential_overlaps = [j for j in spatial_idx.intersection(current_poly.bounds) if j < i]
|
|
865
1142
|
|
|
866
1143
|
for j in potential_overlaps:
|
|
867
1144
|
larger_poly = gdf_projected.iloc[j].geometry
|
|
868
1145
|
larger_id = gdf_projected.iloc[j]['id']
|
|
869
1146
|
|
|
870
|
-
#
|
|
1147
|
+
# Follow ID mapping chain to get final ID
|
|
871
1148
|
if larger_id in id_mapping:
|
|
872
1149
|
larger_id = id_mapping[larger_id]
|
|
873
1150
|
|
|
874
|
-
# Ensure geometry is valid
|
|
1151
|
+
# Ensure geometry is valid for intersection test
|
|
875
1152
|
if not larger_poly.is_valid:
|
|
876
1153
|
larger_poly = larger_poly.buffer(0)
|
|
877
1154
|
if not larger_poly.is_valid:
|
|
878
1155
|
continue
|
|
879
1156
|
|
|
880
1157
|
try:
|
|
881
|
-
# Calculate overlap
|
|
1158
|
+
# Calculate overlap ratio relative to current building's area
|
|
882
1159
|
if current_poly.intersects(larger_poly):
|
|
883
1160
|
overlap = current_poly.intersection(larger_poly)
|
|
884
1161
|
overlap_ratio = overlap.area / current_area
|
|
885
1162
|
|
|
886
|
-
#
|
|
1163
|
+
# Merge buildings if overlap exceeds threshold
|
|
887
1164
|
if overlap_ratio > overlap_threshold:
|
|
888
1165
|
id_mapping[current_id] = larger_id
|
|
889
1166
|
gdf_projected.at[i, 'id'] = larger_id
|
|
890
1167
|
break # Stop at first significant overlap
|
|
891
1168
|
except (GEOSException, ValueError) as e:
|
|
892
|
-
#
|
|
1169
|
+
# Skip problematic geometries
|
|
893
1170
|
continue
|
|
894
1171
|
|
|
895
1172
|
# Propagate ID changes through the original DataFrame
|