voxcity 0.6.26__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voxcity/__init__.py +10 -4
- voxcity/downloader/__init__.py +2 -1
- voxcity/downloader/gba.py +210 -0
- voxcity/downloader/gee.py +5 -1
- voxcity/downloader/mbfp.py +1 -1
- voxcity/downloader/oemj.py +80 -8
- voxcity/downloader/utils.py +73 -73
- voxcity/errors.py +30 -0
- voxcity/exporter/__init__.py +9 -1
- voxcity/exporter/cityles.py +129 -34
- voxcity/exporter/envimet.py +51 -26
- voxcity/exporter/magicavoxel.py +42 -5
- voxcity/exporter/netcdf.py +27 -0
- voxcity/exporter/obj.py +103 -28
- voxcity/generator/__init__.py +47 -0
- voxcity/generator/api.py +721 -0
- voxcity/generator/grids.py +381 -0
- voxcity/generator/io.py +94 -0
- voxcity/generator/pipeline.py +282 -0
- voxcity/generator/update.py +429 -0
- voxcity/generator/voxelizer.py +392 -0
- voxcity/geoprocessor/__init__.py +75 -6
- voxcity/geoprocessor/conversion.py +153 -0
- voxcity/geoprocessor/draw.py +1488 -1169
- voxcity/geoprocessor/heights.py +199 -0
- voxcity/geoprocessor/io.py +101 -0
- voxcity/geoprocessor/merge_utils.py +91 -0
- voxcity/geoprocessor/mesh.py +26 -10
- voxcity/geoprocessor/network.py +35 -6
- voxcity/geoprocessor/overlap.py +84 -0
- voxcity/geoprocessor/raster/__init__.py +82 -0
- voxcity/geoprocessor/raster/buildings.py +435 -0
- voxcity/geoprocessor/raster/canopy.py +258 -0
- voxcity/geoprocessor/raster/core.py +150 -0
- voxcity/geoprocessor/raster/export.py +93 -0
- voxcity/geoprocessor/raster/landcover.py +159 -0
- voxcity/geoprocessor/raster/raster.py +110 -0
- voxcity/geoprocessor/selection.py +85 -0
- voxcity/geoprocessor/utils.py +824 -820
- voxcity/models.py +113 -0
- voxcity/simulator/common/__init__.py +22 -0
- voxcity/simulator/common/geometry.py +98 -0
- voxcity/simulator/common/raytracing.py +450 -0
- voxcity/simulator/solar/__init__.py +66 -0
- voxcity/simulator/solar/integration.py +336 -0
- voxcity/simulator/solar/kernels.py +62 -0
- voxcity/simulator/solar/radiation.py +648 -0
- voxcity/simulator/solar/sky.py +668 -0
- voxcity/simulator/solar/temporal.py +792 -0
- voxcity/simulator/view.py +36 -2286
- voxcity/simulator/visibility/__init__.py +29 -0
- voxcity/simulator/visibility/landmark.py +392 -0
- voxcity/simulator/visibility/view.py +508 -0
- voxcity/utils/__init__.py +11 -0
- voxcity/utils/classes.py +194 -0
- voxcity/utils/lc.py +80 -39
- voxcity/utils/logging.py +61 -0
- voxcity/utils/orientation.py +51 -0
- voxcity/utils/shape.py +230 -0
- voxcity/utils/weather/__init__.py +26 -0
- voxcity/utils/weather/epw.py +146 -0
- voxcity/utils/weather/files.py +36 -0
- voxcity/utils/weather/onebuilding.py +486 -0
- voxcity/visualizer/__init__.py +24 -0
- voxcity/visualizer/builder.py +43 -0
- voxcity/visualizer/grids.py +141 -0
- voxcity/visualizer/maps.py +187 -0
- voxcity/visualizer/palette.py +228 -0
- voxcity/visualizer/renderer.py +1145 -0
- {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/METADATA +162 -48
- voxcity-1.0.2.dist-info/RECORD +81 -0
- voxcity/generator.py +0 -1302
- voxcity/geoprocessor/grid.py +0 -1739
- voxcity/geoprocessor/polygon.py +0 -1344
- voxcity/simulator/solar.py +0 -2339
- voxcity/utils/visualization.py +0 -2849
- voxcity/utils/weather.py +0 -1038
- voxcity-0.6.26.dist-info/RECORD +0 -38
- {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/WHEEL +0 -0
- {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/licenses/AUTHORS.rst +0 -0
- {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/licenses/LICENSE +0 -0
voxcity/geoprocessor/polygon.py
DELETED
|
@@ -1,1344 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module for handling GeoJSON data related to building footprints and heights.
|
|
3
|
-
|
|
4
|
-
This module provides functionality for loading, filtering, transforming and saving GeoJSON data,
|
|
5
|
-
with a focus on building footprints and their height information. It includes functions for
|
|
6
|
-
coordinate transformations, spatial filtering, and height data extraction from various sources.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
# Required imports for GIS operations, data manipulation and file handling
|
|
10
|
-
import geopandas as gpd
|
|
11
|
-
import json
|
|
12
|
-
from shapely.geometry import Polygon, Point, shape
|
|
13
|
-
from shapely.errors import GEOSException, ShapelyError
|
|
14
|
-
import pandas as pd
|
|
15
|
-
import numpy as np
|
|
16
|
-
import gzip
|
|
17
|
-
from typing import List, Dict
|
|
18
|
-
from pyproj import Transformer, CRS
|
|
19
|
-
import rasterio
|
|
20
|
-
from rasterio.mask import mask
|
|
21
|
-
import copy
|
|
22
|
-
from rtree import index
|
|
23
|
-
|
|
24
|
-
from .utils import validate_polygon_coordinates
|
|
25
|
-
|
|
26
|
-
def filter_and_convert_gdf_to_geojson(gdf, rectangle_vertices):
|
|
27
|
-
"""
|
|
28
|
-
Filter a GeoDataFrame by a bounding rectangle and convert to GeoJSON format.
|
|
29
|
-
|
|
30
|
-
This function performs spatial filtering on a GeoDataFrame using a bounding rectangle,
|
|
31
|
-
and converts the filtered data to GeoJSON format. It handles both Polygon and MultiPolygon
|
|
32
|
-
geometries, splitting MultiPolygons into separate Polygon features.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
gdf (GeoDataFrame): Input GeoDataFrame containing building data
|
|
36
|
-
Must have 'geometry' and 'height' columns
|
|
37
|
-
Any CRS is accepted, will be converted to WGS84 if needed
|
|
38
|
-
rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
|
|
39
|
-
Must be in WGS84 (EPSG:4326) coordinate system
|
|
40
|
-
Must form a valid rectangle (4 vertices, clockwise or counterclockwise)
|
|
41
|
-
|
|
42
|
-
Returns:
|
|
43
|
-
list: List of GeoJSON features within the bounding rectangle
|
|
44
|
-
Each feature contains:
|
|
45
|
-
- geometry: Polygon coordinates in WGS84
|
|
46
|
-
- properties: Dictionary with 'height', 'confidence', and 'id'
|
|
47
|
-
- type: Always "Feature"
|
|
48
|
-
|
|
49
|
-
Memory Optimization:
|
|
50
|
-
- Uses spatial indexing for efficient filtering
|
|
51
|
-
- Downcasts numeric columns to save memory
|
|
52
|
-
- Cleans up intermediate data structures
|
|
53
|
-
- Splits MultiPolygons into separate features
|
|
54
|
-
"""
|
|
55
|
-
# Reproject to WGS84 if necessary for consistent coordinate system
|
|
56
|
-
if gdf.crs != 'EPSG:4326':
|
|
57
|
-
gdf = gdf.to_crs(epsg=4326)
|
|
58
|
-
|
|
59
|
-
# Downcast 'height' to float32 to save memory
|
|
60
|
-
gdf['height'] = pd.to_numeric(gdf['height'], downcast='float')
|
|
61
|
-
|
|
62
|
-
# Add 'confidence' column with default value for height reliability
|
|
63
|
-
gdf['confidence'] = -1.0
|
|
64
|
-
|
|
65
|
-
# Create shapely polygon from rectangle vertices for spatial filtering
|
|
66
|
-
rectangle_polygon = Polygon(rectangle_vertices)
|
|
67
|
-
|
|
68
|
-
# Use spatial index to efficiently filter geometries that intersect with rectangle
|
|
69
|
-
gdf.sindex # Ensure spatial index is built
|
|
70
|
-
possible_matches_index = list(gdf.sindex.intersection(rectangle_polygon.bounds))
|
|
71
|
-
possible_matches = gdf.iloc[possible_matches_index]
|
|
72
|
-
precise_matches = possible_matches[possible_matches.intersects(rectangle_polygon)]
|
|
73
|
-
filtered_gdf = precise_matches.copy()
|
|
74
|
-
|
|
75
|
-
# Delete intermediate data to save memory
|
|
76
|
-
del gdf, possible_matches, precise_matches
|
|
77
|
-
|
|
78
|
-
# Create GeoJSON features from filtered geometries
|
|
79
|
-
features = []
|
|
80
|
-
feature_id = 1
|
|
81
|
-
for idx, row in filtered_gdf.iterrows():
|
|
82
|
-
geom = row['geometry'].__geo_interface__
|
|
83
|
-
properties = {
|
|
84
|
-
'height': row['height'],
|
|
85
|
-
'confidence': row['confidence'],
|
|
86
|
-
'id': feature_id
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
# Handle MultiPolygon by splitting into separate Polygon features
|
|
90
|
-
if geom['type'] == 'MultiPolygon':
|
|
91
|
-
for polygon_coords in geom['coordinates']:
|
|
92
|
-
single_geom = {
|
|
93
|
-
'type': 'Polygon',
|
|
94
|
-
'coordinates': polygon_coords
|
|
95
|
-
}
|
|
96
|
-
feature = {
|
|
97
|
-
'type': 'Feature',
|
|
98
|
-
'properties': properties.copy(), # Use copy to avoid shared references
|
|
99
|
-
'geometry': single_geom
|
|
100
|
-
}
|
|
101
|
-
features.append(feature)
|
|
102
|
-
feature_id += 1
|
|
103
|
-
elif geom['type'] == 'Polygon':
|
|
104
|
-
feature = {
|
|
105
|
-
'type': 'Feature',
|
|
106
|
-
'properties': properties,
|
|
107
|
-
'geometry': geom
|
|
108
|
-
}
|
|
109
|
-
features.append(feature)
|
|
110
|
-
feature_id += 1
|
|
111
|
-
else:
|
|
112
|
-
pass # Skip other geometry types
|
|
113
|
-
|
|
114
|
-
# Create a FeatureCollection
|
|
115
|
-
geojson = {
|
|
116
|
-
'type': 'FeatureCollection',
|
|
117
|
-
'features': features
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
# Clean up memory
|
|
121
|
-
del filtered_gdf, features
|
|
122
|
-
|
|
123
|
-
return geojson["features"]
|
|
124
|
-
|
|
125
|
-
def get_geojson_from_gpkg(gpkg_path, rectangle_vertices):
|
|
126
|
-
"""
|
|
127
|
-
Read a GeoPackage file and convert it to GeoJSON format within a bounding rectangle.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
gpkg_path (str): Path to the GeoPackage file
|
|
131
|
-
rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
|
|
132
|
-
|
|
133
|
-
Returns:
|
|
134
|
-
list: List of GeoJSON features within the bounding rectangle
|
|
135
|
-
"""
|
|
136
|
-
# Open and read the GPKG file
|
|
137
|
-
print(f"Opening GPKG file: {gpkg_path}")
|
|
138
|
-
gdf = gpd.read_file(gpkg_path)
|
|
139
|
-
geojson = filter_and_convert_gdf_to_geojson(gdf, rectangle_vertices)
|
|
140
|
-
return geojson
|
|
141
|
-
|
|
142
|
-
def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
143
|
-
"""
|
|
144
|
-
Extract building heights from one GeoDataFrame and apply them to another based on spatial overlap.
|
|
145
|
-
|
|
146
|
-
This function transfers height information from a reference GeoDataFrame to a primary GeoDataFrame
|
|
147
|
-
based on the spatial overlap between building footprints. For each building in the primary dataset
|
|
148
|
-
that needs height data, it calculates a weighted average height from overlapping buildings in the
|
|
149
|
-
reference dataset.
|
|
150
|
-
|
|
151
|
-
Args:
|
|
152
|
-
gdf_0 (gpd.GeoDataFrame): Primary GeoDataFrame to update with heights
|
|
153
|
-
Must have 'geometry' column with building footprints
|
|
154
|
-
Will be updated with height values where missing or zero
|
|
155
|
-
gdf_1 (gpd.GeoDataFrame): Reference GeoDataFrame containing height data
|
|
156
|
-
Must have 'geometry' column with building footprints
|
|
157
|
-
Must have 'height' column with valid height values
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
gpd.GeoDataFrame: Updated primary GeoDataFrame with extracted heights
|
|
161
|
-
Buildings with overlapping reference data get weighted average heights
|
|
162
|
-
Buildings without overlapping data retain original height or get NaN
|
|
163
|
-
|
|
164
|
-
Statistics Tracked:
|
|
165
|
-
- count_0: Number of buildings without height in primary dataset
|
|
166
|
-
- count_1: Number of buildings successfully updated with height
|
|
167
|
-
- count_2: Number of buildings where no reference height data found
|
|
168
|
-
|
|
169
|
-
Note:
|
|
170
|
-
- Uses R-tree spatial indexing for efficient overlap detection
|
|
171
|
-
- Handles invalid geometries by attempting to fix them with buffer(0)
|
|
172
|
-
- Weighted average is based on the area of overlap between buildings
|
|
173
|
-
"""
|
|
174
|
-
# Make a copy of input GeoDataFrame to avoid modifying original
|
|
175
|
-
gdf_primary = gdf_0.copy()
|
|
176
|
-
gdf_ref = gdf_1.copy()
|
|
177
|
-
|
|
178
|
-
# Make sure height columns exist with default values
|
|
179
|
-
if 'height' not in gdf_primary.columns:
|
|
180
|
-
gdf_primary['height'] = 0.0
|
|
181
|
-
if 'height' not in gdf_ref.columns:
|
|
182
|
-
gdf_ref['height'] = 0.0
|
|
183
|
-
|
|
184
|
-
# Initialize counters for statistics
|
|
185
|
-
count_0 = 0 # Buildings without height
|
|
186
|
-
count_1 = 0 # Buildings updated with height
|
|
187
|
-
count_2 = 0 # Buildings with no height data found
|
|
188
|
-
|
|
189
|
-
# Create spatial index for reference buildings to speed up intersection tests
|
|
190
|
-
spatial_index = index.Index()
|
|
191
|
-
for i, geom in enumerate(gdf_ref.geometry):
|
|
192
|
-
if geom.is_valid:
|
|
193
|
-
spatial_index.insert(i, geom.bounds)
|
|
194
|
-
|
|
195
|
-
# Process each building in primary dataset that needs height data
|
|
196
|
-
for idx_primary, row in gdf_primary.iterrows():
|
|
197
|
-
if row['height'] <= 0 or pd.isna(row['height']):
|
|
198
|
-
count_0 += 1
|
|
199
|
-
geom = row.geometry
|
|
200
|
-
|
|
201
|
-
# Variables for weighted average height calculation
|
|
202
|
-
overlapping_height_area = 0 # Sum of (height * overlap_area)
|
|
203
|
-
overlapping_area = 0 # Total overlap area
|
|
204
|
-
|
|
205
|
-
# Get potential intersecting buildings using spatial index
|
|
206
|
-
potential_matches = list(spatial_index.intersection(geom.bounds))
|
|
207
|
-
|
|
208
|
-
# Check intersections with reference buildings
|
|
209
|
-
for ref_idx in potential_matches:
|
|
210
|
-
if ref_idx >= len(gdf_ref):
|
|
211
|
-
continue
|
|
212
|
-
|
|
213
|
-
ref_row = gdf_ref.iloc[ref_idx]
|
|
214
|
-
try:
|
|
215
|
-
# Calculate intersection if geometries overlap
|
|
216
|
-
if geom.intersects(ref_row.geometry):
|
|
217
|
-
overlap_area = geom.intersection(ref_row.geometry).area
|
|
218
|
-
overlapping_height_area += ref_row['height'] * overlap_area
|
|
219
|
-
overlapping_area += overlap_area
|
|
220
|
-
except GEOSException:
|
|
221
|
-
# Try to fix invalid geometries using buffer(0)
|
|
222
|
-
try:
|
|
223
|
-
fixed_ref_geom = ref_row.geometry.buffer(0)
|
|
224
|
-
if geom.intersects(fixed_ref_geom):
|
|
225
|
-
overlap_area = geom.intersection(fixed_ref_geom).area
|
|
226
|
-
overlapping_height_area += ref_row['height'] * overlap_area
|
|
227
|
-
overlapping_area += overlap_area
|
|
228
|
-
except Exception:
|
|
229
|
-
print(f"Failed to fix polygon")
|
|
230
|
-
continue
|
|
231
|
-
|
|
232
|
-
# Update height if overlapping buildings found
|
|
233
|
-
if overlapping_height_area > 0:
|
|
234
|
-
count_1 += 1
|
|
235
|
-
# Calculate weighted average height based on overlap areas
|
|
236
|
-
new_height = overlapping_height_area / overlapping_area
|
|
237
|
-
gdf_primary.at[idx_primary, 'height'] = new_height
|
|
238
|
-
else:
|
|
239
|
-
count_2 += 1
|
|
240
|
-
gdf_primary.at[idx_primary, 'height'] = np.nan
|
|
241
|
-
|
|
242
|
-
# Print statistics about height updates
|
|
243
|
-
if count_0 > 0:
|
|
244
|
-
print(f"For {count_1} of these building footprints without height, values from the complementary source were assigned.")
|
|
245
|
-
print(f"For {count_2} of these building footprints without height, no data exist in complementary data.")
|
|
246
|
-
|
|
247
|
-
return gdf_primary
|
|
248
|
-
|
|
249
|
-
# from typing import List, Dict
|
|
250
|
-
# from shapely.geometry import shape
|
|
251
|
-
# from shapely.errors import GEOSException
|
|
252
|
-
# import numpy as np
|
|
253
|
-
|
|
254
|
-
# def complement_building_heights_from_geojson(geojson_data_0: List[Dict], geojson_data_1: List[Dict]) -> List[Dict]:
|
|
255
|
-
# """
|
|
256
|
-
# Complement building heights in one GeoJSON dataset with data from another and add non-intersecting buildings.
|
|
257
|
-
|
|
258
|
-
# Args:
|
|
259
|
-
# geojson_data_0 (List[Dict]): Primary GeoJSON features to update with heights
|
|
260
|
-
# geojson_data_1 (List[Dict]): Reference GeoJSON features containing height data
|
|
261
|
-
|
|
262
|
-
# Returns:
|
|
263
|
-
# List[Dict]: Updated GeoJSON features with complemented heights and additional buildings
|
|
264
|
-
# """
|
|
265
|
-
# # Convert primary dataset to Shapely polygons for intersection checking
|
|
266
|
-
# existing_buildings = []
|
|
267
|
-
# for feature in geojson_data_0:
|
|
268
|
-
# geom = shape(feature['geometry'])
|
|
269
|
-
# existing_buildings.append(geom)
|
|
270
|
-
|
|
271
|
-
# # Convert reference dataset to Shapely polygons with height info
|
|
272
|
-
# reference_buildings = []
|
|
273
|
-
# for feature in geojson_data_1:
|
|
274
|
-
# geom = shape(feature['geometry'])
|
|
275
|
-
# height = feature['properties']['height']
|
|
276
|
-
# reference_buildings.append((geom, height, feature))
|
|
277
|
-
|
|
278
|
-
# # Initialize counters for statistics
|
|
279
|
-
# count_0 = 0 # Buildings without height
|
|
280
|
-
# count_1 = 0 # Buildings updated with height
|
|
281
|
-
# count_2 = 0 # Buildings with no height data found
|
|
282
|
-
# count_3 = 0 # New non-intersecting buildings added
|
|
283
|
-
|
|
284
|
-
# # Process primary dataset and update heights where needed
|
|
285
|
-
# updated_geojson_data_0 = []
|
|
286
|
-
# for feature in geojson_data_0:
|
|
287
|
-
# geom = shape(feature['geometry'])
|
|
288
|
-
# height = feature['properties']['height']
|
|
289
|
-
# if height == 0:
|
|
290
|
-
# count_0 += 1
|
|
291
|
-
# # Calculate weighted average height based on overlapping areas
|
|
292
|
-
# overlapping_height_area = 0
|
|
293
|
-
# overlapping_area = 0
|
|
294
|
-
# for ref_geom, ref_height, _ in reference_buildings:
|
|
295
|
-
# try:
|
|
296
|
-
# if geom.intersects(ref_geom):
|
|
297
|
-
# overlap_area = geom.intersection(ref_geom).area
|
|
298
|
-
# overlapping_height_area += ref_height * overlap_area
|
|
299
|
-
# overlapping_area += overlap_area
|
|
300
|
-
# except GEOSException as e:
|
|
301
|
-
# # Try to fix invalid geometries
|
|
302
|
-
# try:
|
|
303
|
-
# fixed_ref_geom = ref_geom.buffer(0)
|
|
304
|
-
# if geom.intersects(fixed_ref_geom):
|
|
305
|
-
# overlap_area = geom.intersection(ref_geom).area
|
|
306
|
-
# overlapping_height_area += ref_height * overlap_area
|
|
307
|
-
# overlapping_area += overlap_area
|
|
308
|
-
# except Exception as fix_error:
|
|
309
|
-
# print(f"Failed to fix polygon")
|
|
310
|
-
# continue
|
|
311
|
-
|
|
312
|
-
# # Update height if overlapping buildings found
|
|
313
|
-
# if overlapping_height_area > 0:
|
|
314
|
-
# count_1 += 1
|
|
315
|
-
# new_height = overlapping_height_area / overlapping_area
|
|
316
|
-
# feature['properties']['height'] = new_height
|
|
317
|
-
# else:
|
|
318
|
-
# count_2 += 1
|
|
319
|
-
# feature['properties']['height'] = np.nan
|
|
320
|
-
|
|
321
|
-
# updated_geojson_data_0.append(feature)
|
|
322
|
-
|
|
323
|
-
# # Add non-intersecting buildings from reference dataset
|
|
324
|
-
# for ref_geom, ref_height, ref_feature in reference_buildings:
|
|
325
|
-
# has_intersection = False
|
|
326
|
-
# try:
|
|
327
|
-
# # Check if reference building intersects with any existing building
|
|
328
|
-
# for existing_geom in existing_buildings:
|
|
329
|
-
# if ref_geom.intersects(existing_geom):
|
|
330
|
-
# has_intersection = True
|
|
331
|
-
# break
|
|
332
|
-
|
|
333
|
-
# # Add building if it doesn't intersect with any existing ones
|
|
334
|
-
# if not has_intersection:
|
|
335
|
-
# updated_geojson_data_0.append(ref_feature)
|
|
336
|
-
# count_3 += 1
|
|
337
|
-
|
|
338
|
-
# except GEOSException as e:
|
|
339
|
-
# # Try to fix invalid geometries
|
|
340
|
-
# try:
|
|
341
|
-
# fixed_ref_geom = ref_geom.buffer(0)
|
|
342
|
-
# for existing_geom in existing_buildings:
|
|
343
|
-
# if fixed_ref_geom.intersects(existing_geom):
|
|
344
|
-
# has_intersection = True
|
|
345
|
-
# break
|
|
346
|
-
|
|
347
|
-
# if not has_intersection:
|
|
348
|
-
# updated_geojson_data_0.append(ref_feature)
|
|
349
|
-
# count_3 += 1
|
|
350
|
-
# except Exception as fix_error:
|
|
351
|
-
# print(f"Failed to process non-intersecting building")
|
|
352
|
-
# continue
|
|
353
|
-
|
|
354
|
-
# # Print statistics about updates
|
|
355
|
-
# if count_0 > 0:
|
|
356
|
-
# print(f"{count_0} of the total {len(geojson_data_0)} building footprint from base source did not have height data.")
|
|
357
|
-
# print(f"For {count_1} of these building footprints without height, values from complement source were assigned.")
|
|
358
|
-
# print(f"{count_3} non-intersecting buildings from Microsoft Building Footprints were added to the output.")
|
|
359
|
-
|
|
360
|
-
# return updated_geojson_data_0
|
|
361
|
-
|
|
362
|
-
import numpy as np
|
|
363
|
-
import geopandas as gpd
|
|
364
|
-
import pandas as pd
|
|
365
|
-
from shapely.geometry import shape
|
|
366
|
-
from shapely.errors import GEOSException
|
|
367
|
-
|
|
368
|
-
def geojson_to_gdf(geojson_data, id_col='id'):
|
|
369
|
-
"""
|
|
370
|
-
Convert a list of GeoJSON-like dict features into a GeoDataFrame.
|
|
371
|
-
|
|
372
|
-
This function takes a list of GeoJSON feature dictionaries (Fiona-like format)
|
|
373
|
-
and converts them into a GeoDataFrame, handling geometry conversion and property
|
|
374
|
-
extraction. It ensures each feature has a unique identifier.
|
|
375
|
-
|
|
376
|
-
Args:
|
|
377
|
-
geojson_data (List[Dict]): A list of feature dicts (Fiona-like)
|
|
378
|
-
Each dict must have 'geometry' and 'properties' keys
|
|
379
|
-
'geometry' must be a valid GeoJSON geometry
|
|
380
|
-
'properties' can be empty but must be a dict if present
|
|
381
|
-
id_col (str, optional): Name of property to use as an identifier
|
|
382
|
-
Default is 'id'
|
|
383
|
-
If not found in properties, a sequential ID will be created
|
|
384
|
-
Must be a string that can be used as a column name
|
|
385
|
-
|
|
386
|
-
Returns:
|
|
387
|
-
gpd.GeoDataFrame: GeoDataFrame with geometry and property columns
|
|
388
|
-
Will have 'geometry' column with Shapely geometries
|
|
389
|
-
Will have columns for all properties found in features
|
|
390
|
-
Will have id_col with unique identifiers
|
|
391
|
-
Will be set to WGS84 (EPSG:4326) coordinate system
|
|
392
|
-
|
|
393
|
-
Note:
|
|
394
|
-
- Handles missing properties gracefully
|
|
395
|
-
- Creates sequential IDs if id_col not found
|
|
396
|
-
- Converts GeoJSON geometries to Shapely objects
|
|
397
|
-
- Sets WGS84 as coordinate system
|
|
398
|
-
- Preserves all properties as columns
|
|
399
|
-
"""
|
|
400
|
-
# Build lists for geometry and properties
|
|
401
|
-
geometries = []
|
|
402
|
-
all_props = []
|
|
403
|
-
|
|
404
|
-
for i, feature in enumerate(geojson_data):
|
|
405
|
-
# Extract geometry and convert to Shapely object
|
|
406
|
-
geom = feature.get('geometry')
|
|
407
|
-
shapely_geom = shape(geom) if geom else None
|
|
408
|
-
|
|
409
|
-
# Extract properties, ensuring they exist
|
|
410
|
-
props = feature.get('properties', {})
|
|
411
|
-
|
|
412
|
-
# If specified ID column is missing, create sequential ID
|
|
413
|
-
if id_col not in props:
|
|
414
|
-
props[id_col] = i # fallback ID
|
|
415
|
-
|
|
416
|
-
# Capture geometry and all properties
|
|
417
|
-
geometries.append(shapely_geom)
|
|
418
|
-
all_props.append(props)
|
|
419
|
-
|
|
420
|
-
# Create GeoDataFrame with geometries and properties
|
|
421
|
-
gdf = gpd.GeoDataFrame(all_props, geometry=geometries, crs="EPSG:4326")
|
|
422
|
-
return gdf
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
def complement_building_heights_from_gdf(gdf_0, gdf_1,
|
|
426
|
-
primary_id='id', ref_id='id'):
|
|
427
|
-
"""
|
|
428
|
-
Use a vectorized approach with GeoPandas to:
|
|
429
|
-
1) Find intersections and compute weighted average heights
|
|
430
|
-
2) Update heights in the primary dataset
|
|
431
|
-
3) Add non-intersecting buildings from the reference dataset
|
|
432
|
-
|
|
433
|
-
Args:
|
|
434
|
-
gdf_0 (gpd.GeoDataFrame): Primary GeoDataFrame
|
|
435
|
-
gdf_1 (gpd.GeoDataFrame): Reference GeoDataFrame
|
|
436
|
-
primary_id (str): Name of the unique identifier in primary dataset's properties
|
|
437
|
-
ref_id (str): Name of the unique identifier in reference dataset's properties
|
|
438
|
-
|
|
439
|
-
Returns:
|
|
440
|
-
gpd.GeoDataFrame: Updated GeoDataFrame (including new buildings).
|
|
441
|
-
"""
|
|
442
|
-
# Make a copy of input GeoDataFrames to avoid modifying originals
|
|
443
|
-
gdf_primary = gdf_0.copy()
|
|
444
|
-
gdf_ref = gdf_1.copy()
|
|
445
|
-
|
|
446
|
-
# Ensure both are in the same CRS, e.g. EPSG:4326 or some projected CRS
|
|
447
|
-
# If needed, do something like:
|
|
448
|
-
# gdf_primary = gdf_primary.to_crs("EPSG:xxxx")
|
|
449
|
-
# gdf_ref = gdf_ref.to_crs("EPSG:xxxx")
|
|
450
|
-
|
|
451
|
-
# Make sure height columns exist
|
|
452
|
-
if 'height' not in gdf_primary.columns:
|
|
453
|
-
gdf_primary['height'] = 0.0
|
|
454
|
-
if 'height' not in gdf_ref.columns:
|
|
455
|
-
gdf_ref['height'] = 0.0
|
|
456
|
-
|
|
457
|
-
# ----------------------------------------------------------------
|
|
458
|
-
# 1) Intersection to compute areas for overlapping buildings
|
|
459
|
-
# ----------------------------------------------------------------
|
|
460
|
-
# We'll rename columns to avoid collision after overlay
|
|
461
|
-
gdf_primary = gdf_primary.rename(columns={'height': 'height_primary'})
|
|
462
|
-
gdf_ref = gdf_ref.rename(columns={'height': 'height_ref'})
|
|
463
|
-
|
|
464
|
-
# We perform an 'intersection' overlay to get the overlapping polygons
|
|
465
|
-
intersect_gdf = gpd.overlay(gdf_primary, gdf_ref, how='intersection')
|
|
466
|
-
|
|
467
|
-
# Compute intersection area
|
|
468
|
-
intersect_gdf['intersect_area'] = intersect_gdf.area
|
|
469
|
-
intersect_gdf['height_area'] = intersect_gdf['height_ref'] * intersect_gdf['intersect_area']
|
|
470
|
-
|
|
471
|
-
# ----------------------------------------------------------------
|
|
472
|
-
# 2) Aggregate to get weighted average height for each primary building
|
|
473
|
-
# ----------------------------------------------------------------
|
|
474
|
-
# We group by the primary building ID, summing up the area and the 'height_area'
|
|
475
|
-
group_cols = {
|
|
476
|
-
'height_area': 'sum',
|
|
477
|
-
'intersect_area': 'sum'
|
|
478
|
-
}
|
|
479
|
-
grouped = intersect_gdf.groupby(f'{primary_id}_1').agg(group_cols)
|
|
480
|
-
|
|
481
|
-
# Weighted average
|
|
482
|
-
grouped['weighted_height'] = grouped['height_area'] / grouped['intersect_area']
|
|
483
|
-
|
|
484
|
-
# ----------------------------------------------------------------
|
|
485
|
-
# 3) Merge aggregated results back to the primary GDF
|
|
486
|
-
# ----------------------------------------------------------------
|
|
487
|
-
# After merging, the primary GDF will have a column 'weighted_height'
|
|
488
|
-
gdf_primary = gdf_primary.merge(grouped['weighted_height'],
|
|
489
|
-
left_on=primary_id,
|
|
490
|
-
right_index=True,
|
|
491
|
-
how='left')
|
|
492
|
-
|
|
493
|
-
# Where primary had zero or missing height, we assign the new weighted height
|
|
494
|
-
zero_or_nan_mask = (gdf_primary['height_primary'] == 0) | (gdf_primary['height_primary'].isna())
|
|
495
|
-
|
|
496
|
-
# Only update heights where we have valid weighted heights
|
|
497
|
-
valid_weighted_height_mask = zero_or_nan_mask & gdf_primary['weighted_height'].notna()
|
|
498
|
-
gdf_primary.loc[valid_weighted_height_mask, 'height_primary'] = gdf_primary.loc[valid_weighted_height_mask, 'weighted_height']
|
|
499
|
-
gdf_primary['height_primary'] = gdf_primary['height_primary'].fillna(np.nan)
|
|
500
|
-
|
|
501
|
-
# ----------------------------------------------------------------
|
|
502
|
-
# 4) Identify reference buildings that do not intersect any primary building
|
|
503
|
-
# ----------------------------------------------------------------
|
|
504
|
-
# Another overlay or spatial join can do this:
|
|
505
|
-
# Option A: use 'difference' on reference to get non-overlapping parts, but that can chop polygons.
|
|
506
|
-
# Option B: check building-level intersection. We'll do a bounding test with sjoin.
|
|
507
|
-
|
|
508
|
-
# For building-level intersection, do a left join of ref onto primary.
|
|
509
|
-
# Then we'll identify which reference IDs are missing from the intersection result.
|
|
510
|
-
sjoin_gdf = gpd.sjoin(gdf_ref, gdf_primary, how='left', predicate='intersects')
|
|
511
|
-
|
|
512
|
-
# Find reference buildings that don't intersect with any primary building
|
|
513
|
-
non_intersect_mask = sjoin_gdf[f'{primary_id}_right'].isna()
|
|
514
|
-
non_intersect_ids = sjoin_gdf[non_intersect_mask][f'{ref_id}_left'].unique()
|
|
515
|
-
|
|
516
|
-
# Extract them from the original reference GDF
|
|
517
|
-
gdf_ref_non_intersect = gdf_ref[gdf_ref[ref_id].isin(non_intersect_ids)]
|
|
518
|
-
|
|
519
|
-
# We'll rename columns back to 'height' to be consistent
|
|
520
|
-
gdf_ref_non_intersect = gdf_ref_non_intersect.rename(columns={'height_ref': 'height'})
|
|
521
|
-
|
|
522
|
-
# Also rename any other properties you prefer. For clarity, keep an ID so you know they came from reference.
|
|
523
|
-
|
|
524
|
-
# ----------------------------------------------------------------
|
|
525
|
-
# 5) Combine the updated primary GDF with the new reference buildings
|
|
526
|
-
# ----------------------------------------------------------------
|
|
527
|
-
# First, rename columns in updated primary GDF
|
|
528
|
-
gdf_primary = gdf_primary.rename(columns={'height_primary': 'height'})
|
|
529
|
-
# Drop the 'weighted_height' column to clean up
|
|
530
|
-
if 'weighted_height' in gdf_primary.columns:
|
|
531
|
-
gdf_primary.drop(columns='weighted_height', inplace=True)
|
|
532
|
-
|
|
533
|
-
# Concatenate
|
|
534
|
-
final_gdf = pd.concat([gdf_primary, gdf_ref_non_intersect], ignore_index=True)
|
|
535
|
-
|
|
536
|
-
# Calculate statistics
|
|
537
|
-
count_total = len(gdf_primary)
|
|
538
|
-
count_0 = len(gdf_primary[zero_or_nan_mask])
|
|
539
|
-
count_1 = len(gdf_primary[valid_weighted_height_mask])
|
|
540
|
-
count_2 = count_0 - count_1
|
|
541
|
-
count_3 = len(gdf_ref_non_intersect)
|
|
542
|
-
count_4 = count_3
|
|
543
|
-
height_mask = gdf_ref_non_intersect['height'].notna() & (gdf_ref_non_intersect['height'] > 0)
|
|
544
|
-
count_5 = len(gdf_ref_non_intersect[height_mask])
|
|
545
|
-
count_6 = count_4 - count_5
|
|
546
|
-
final_height_mask = final_gdf['height'].notna() & (final_gdf['height'] > 0)
|
|
547
|
-
count_7 = len(final_gdf[final_height_mask])
|
|
548
|
-
count_8 = len(final_gdf)
|
|
549
|
-
|
|
550
|
-
# Print statistics if there were buildings without height data
|
|
551
|
-
if count_0 > 0:
|
|
552
|
-
print(f"{count_0} of the total {count_total} building footprints from base data source did not have height data.")
|
|
553
|
-
print(f"For {count_1} of these building footprints without height, values from complementary data were assigned.")
|
|
554
|
-
print(f"For the rest {count_2}, no data exists in complementary data.")
|
|
555
|
-
print(f"Footprints of {count_3} buildings were added from the complementary source.")
|
|
556
|
-
print(f"Of these {count_4} additional building footprints, {count_5} had height data while {count_6} had no height data.")
|
|
557
|
-
print(f"In total, {count_7} buildings had height data out of {count_8} total building footprints.")
|
|
558
|
-
|
|
559
|
-
return final_gdf
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
def gdf_to_geojson_dicts(gdf, id_col='id'):
|
|
563
|
-
"""
|
|
564
|
-
Convert a GeoDataFrame to a list of dicts similar to GeoJSON features.
|
|
565
|
-
|
|
566
|
-
This function converts a GeoDataFrame into a list of dictionary objects that
|
|
567
|
-
follow the GeoJSON Feature format. Each feature will have geometry and properties,
|
|
568
|
-
with an optional ID field handled separately from other properties.
|
|
569
|
-
|
|
570
|
-
Args:
|
|
571
|
-
gdf (gpd.GeoDataFrame): GeoDataFrame to convert
|
|
572
|
-
Must have 'geometry' column with Shapely geometries
|
|
573
|
-
All non-geometry columns will become properties
|
|
574
|
-
Can optionally have id_col for unique identifiers
|
|
575
|
-
id_col (str, optional): Name of column to use as feature ID
|
|
576
|
-
Default is 'id'
|
|
577
|
-
If present, will be excluded from properties
|
|
578
|
-
If not present, features will not have explicit IDs
|
|
579
|
-
|
|
580
|
-
Returns:
|
|
581
|
-
list: List of GeoJSON-like feature dictionaries
|
|
582
|
-
Each dict will have:
|
|
583
|
-
- type: Always "Feature"
|
|
584
|
-
- geometry: GeoJSON geometry from Shapely object
|
|
585
|
-
- properties: All columns except geometry and ID
|
|
586
|
-
|
|
587
|
-
Note:
|
|
588
|
-
- Converts Shapely geometries to GeoJSON format
|
|
589
|
-
- Preserves all non-geometry columns as properties
|
|
590
|
-
- Handles missing ID column gracefully
|
|
591
|
-
- Maintains original property types
|
|
592
|
-
- Excludes ID from properties if specified
|
|
593
|
-
"""
|
|
594
|
-
# Convert GeoDataFrame to dictionary records for easier processing
|
|
595
|
-
records = gdf.to_dict(orient='records')
|
|
596
|
-
features = []
|
|
597
|
-
|
|
598
|
-
for rec in records:
|
|
599
|
-
# Extract and convert geometry to GeoJSON format using __geo_interface__
|
|
600
|
-
geom = rec.pop('geometry', None)
|
|
601
|
-
if geom is not None:
|
|
602
|
-
geom = geom.__geo_interface__
|
|
603
|
-
|
|
604
|
-
# Extract ID if present and create properties dict excluding ID
|
|
605
|
-
feature_id = rec.get(id_col, None)
|
|
606
|
-
props = {k: v for k, v in rec.items() if k != id_col}
|
|
607
|
-
|
|
608
|
-
# Create GeoJSON Feature object with type, properties, and geometry
|
|
609
|
-
feature = {
|
|
610
|
-
'type': 'Feature',
|
|
611
|
-
'properties': props,
|
|
612
|
-
'geometry': geom
|
|
613
|
-
}
|
|
614
|
-
features.append(feature)
|
|
615
|
-
|
|
616
|
-
return features
|
|
617
|
-
|
|
618
|
-
def load_gdf_from_multiple_gz(file_paths):
|
|
619
|
-
"""
|
|
620
|
-
Load GeoJSON features from multiple gzipped files into a single GeoDataFrame.
|
|
621
|
-
|
|
622
|
-
This function reads multiple gzipped GeoJSON files, where each line in each file
|
|
623
|
-
represents a single GeoJSON feature. It combines all features into a single
|
|
624
|
-
GeoDataFrame, ensuring height properties are properly handled and coordinates
|
|
625
|
-
are in WGS84.
|
|
626
|
-
|
|
627
|
-
Args:
|
|
628
|
-
file_paths (list): List of paths to gzipped GeoJSON files
|
|
629
|
-
Each file should contain one GeoJSON feature per line
|
|
630
|
-
Files should be readable as UTF-8 text
|
|
631
|
-
Features should be in WGS84 coordinate system
|
|
632
|
-
|
|
633
|
-
Returns:
|
|
634
|
-
gpd.GeoDataFrame: Combined GeoDataFrame containing all features
|
|
635
|
-
Will have 'geometry' column with building footprints
|
|
636
|
-
Will have 'height' column (0 for missing values)
|
|
637
|
-
Will be set to WGS84 (EPSG:4326) coordinate system
|
|
638
|
-
|
|
639
|
-
Note:
|
|
640
|
-
- Skips lines that cannot be parsed as valid JSON
|
|
641
|
-
- Sets missing height values to 0
|
|
642
|
-
- Assumes input coordinates are in WGS84
|
|
643
|
-
- Memory usage scales with total number of features
|
|
644
|
-
- Reports JSON parsing errors but continues processing
|
|
645
|
-
"""
|
|
646
|
-
# Initialize list to store all GeoJSON features
|
|
647
|
-
geojson_objects = []
|
|
648
|
-
|
|
649
|
-
# Process each gzipped file
|
|
650
|
-
for gz_file_path in file_paths:
|
|
651
|
-
# Read each gzipped file line by line as UTF-8 text
|
|
652
|
-
with gzip.open(gz_file_path, 'rt', encoding='utf-8') as file:
|
|
653
|
-
for line in file:
|
|
654
|
-
try:
|
|
655
|
-
# Parse each line as a GeoJSON feature
|
|
656
|
-
data = json.loads(line)
|
|
657
|
-
|
|
658
|
-
# Ensure height property exists and has valid value
|
|
659
|
-
if 'properties' in data and 'height' in data['properties']:
|
|
660
|
-
if data['properties']['height'] is None:
|
|
661
|
-
data['properties']['height'] = 0
|
|
662
|
-
else:
|
|
663
|
-
# Create properties dict if missing
|
|
664
|
-
if 'properties' not in data:
|
|
665
|
-
data['properties'] = {}
|
|
666
|
-
# Set default height value
|
|
667
|
-
data['properties']['height'] = 0
|
|
668
|
-
|
|
669
|
-
geojson_objects.append(data)
|
|
670
|
-
except json.JSONDecodeError as e:
|
|
671
|
-
print(f"Skipping line in {gz_file_path} due to JSONDecodeError: {e}")
|
|
672
|
-
|
|
673
|
-
# Convert list of GeoJSON features to GeoDataFrame
|
|
674
|
-
gdf = gpd.GeoDataFrame.from_features(geojson_objects)
|
|
675
|
-
|
|
676
|
-
# Set coordinate reference system to WGS84
|
|
677
|
-
gdf.set_crs(epsg=4326, inplace=True)
|
|
678
|
-
|
|
679
|
-
return gdf
|
|
680
|
-
|
|
681
|
-
def filter_buildings(geojson_data, plotting_box):
|
|
682
|
-
"""
|
|
683
|
-
Filter building features that intersect with a given bounding box.
|
|
684
|
-
|
|
685
|
-
This function filters a list of GeoJSON building features to keep only those
|
|
686
|
-
that intersect with a specified bounding box. It performs geometry validation
|
|
687
|
-
and handles invalid geometries gracefully.
|
|
688
|
-
|
|
689
|
-
Args:
|
|
690
|
-
geojson_data (list): List of GeoJSON features representing buildings
|
|
691
|
-
Each feature must have valid 'geometry' property
|
|
692
|
-
Coordinates must be in same CRS as plotting_box
|
|
693
|
-
Invalid geometries will be skipped with warning
|
|
694
|
-
plotting_box (Polygon): Shapely polygon defining the bounding box
|
|
695
|
-
Must be a valid Shapely Polygon object
|
|
696
|
-
Must be in same coordinate system as geojson_data
|
|
697
|
-
Used for spatial intersection testing
|
|
698
|
-
|
|
699
|
-
Returns:
|
|
700
|
-
list: Filtered list of GeoJSON features that intersect with the bounding box
|
|
701
|
-
Features maintain their original structure
|
|
702
|
-
Invalid features are excluded
|
|
703
|
-
Order of features is preserved
|
|
704
|
-
|
|
705
|
-
Note:
|
|
706
|
-
- Validates polygon coordinates before processing
|
|
707
|
-
- Skips features with invalid geometries
|
|
708
|
-
- Reports validation and geometry errors
|
|
709
|
-
- No coordinate system transformation is performed
|
|
710
|
-
- Memory efficient as it creates new list only for valid features
|
|
711
|
-
"""
|
|
712
|
-
# Initialize list for valid intersecting features
|
|
713
|
-
filtered_features = []
|
|
714
|
-
|
|
715
|
-
# Process each feature in the input data
|
|
716
|
-
for feature in geojson_data:
|
|
717
|
-
# Validate polygon coordinates before processing
|
|
718
|
-
if not validate_polygon_coordinates(feature['geometry']):
|
|
719
|
-
print("Skipping feature with invalid geometry")
|
|
720
|
-
print(feature['geometry'])
|
|
721
|
-
continue
|
|
722
|
-
|
|
723
|
-
try:
|
|
724
|
-
# Convert GeoJSON geometry to Shapely geometry for spatial operations
|
|
725
|
-
geom = shape(feature['geometry'])
|
|
726
|
-
|
|
727
|
-
# Skip invalid geometries that can't be fixed
|
|
728
|
-
if not geom.is_valid:
|
|
729
|
-
print("Skipping invalid geometry")
|
|
730
|
-
print(geom)
|
|
731
|
-
continue
|
|
732
|
-
|
|
733
|
-
# Keep features that intersect with bounding box
|
|
734
|
-
if plotting_box.intersects(geom):
|
|
735
|
-
filtered_features.append(feature)
|
|
736
|
-
|
|
737
|
-
except ShapelyError as e:
|
|
738
|
-
# Log geometry errors but continue processing
|
|
739
|
-
print(f"Skipping feature due to geometry error: {e}")
|
|
740
|
-
|
|
741
|
-
return filtered_features
|
|
742
|
-
|
|
743
|
-
def extract_building_heights_from_geotiff(geotiff_path, gdf):
|
|
744
|
-
"""
|
|
745
|
-
Extract building heights from a GeoTIFF raster for building footprints in a GeoDataFrame.
|
|
746
|
-
|
|
747
|
-
This function processes building footprints to extract height information from a GeoTIFF
|
|
748
|
-
raster file. It handles coordinate transformation between WGS84 (EPSG:4326) and the raster's
|
|
749
|
-
CRS, and calculates average heights for each building footprint.
|
|
750
|
-
|
|
751
|
-
Args:
|
|
752
|
-
geotiff_path (str): Path to the GeoTIFF height raster file containing elevation data
|
|
753
|
-
gdf (gpd.GeoDataFrame): GeoDataFrame containing building footprints with geometry column
|
|
754
|
-
The GeoDataFrame should be in WGS84 (EPSG:4326) coordinate system
|
|
755
|
-
|
|
756
|
-
Returns:
|
|
757
|
-
gpd.GeoDataFrame: Updated GeoDataFrame with extracted heights in the 'height' column
|
|
758
|
-
- Buildings with valid height data will have their height values updated
|
|
759
|
-
- Buildings with no valid height data will have NaN values
|
|
760
|
-
- Original buildings with existing valid heights are preserved
|
|
761
|
-
|
|
762
|
-
Statistics Reported:
|
|
763
|
-
- Total number of buildings without height data
|
|
764
|
-
- Number of buildings successfully updated with height data
|
|
765
|
-
- Number of buildings where no height data could be found
|
|
766
|
-
|
|
767
|
-
Note:
|
|
768
|
-
- The function only processes Polygon geometries (not MultiPolygons or other types)
|
|
769
|
-
- Buildings are considered to need height processing if they have no height or height <= 0
|
|
770
|
-
- Heights are calculated as the mean of all valid raster values within the building footprint
|
|
771
|
-
"""
|
|
772
|
-
# Make a copy to avoid modifying the input
|
|
773
|
-
gdf = gdf.copy()
|
|
774
|
-
|
|
775
|
-
# Initialize counters for statistics
|
|
776
|
-
count_0 = 0 # Buildings without height
|
|
777
|
-
count_1 = 0 # Buildings updated with height
|
|
778
|
-
count_2 = 0 # Buildings with no height data found
|
|
779
|
-
|
|
780
|
-
# Open GeoTIFF and process buildings
|
|
781
|
-
with rasterio.open(geotiff_path) as src:
|
|
782
|
-
# Create coordinate transformer from WGS84 to raster CRS for geometry transformation
|
|
783
|
-
transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
|
|
784
|
-
|
|
785
|
-
# Filter buildings that need height processing:
|
|
786
|
-
# - Must be Polygon type (not MultiPolygon)
|
|
787
|
-
# - Either has no height or height <= 0
|
|
788
|
-
mask_condition = (gdf.geometry.geom_type == 'Polygon') & ((gdf.get('height', 0) <= 0) | gdf.get('height').isna())
|
|
789
|
-
buildings_to_process = gdf[mask_condition]
|
|
790
|
-
count_0 = len(buildings_to_process)
|
|
791
|
-
|
|
792
|
-
for idx, row in buildings_to_process.iterrows():
|
|
793
|
-
# Transform building polygon coordinates from WGS84 to raster CRS
|
|
794
|
-
coords = list(row.geometry.exterior.coords)
|
|
795
|
-
transformed_coords = [transformer.transform(lon, lat) for lon, lat in coords]
|
|
796
|
-
polygon = shape({"type": "Polygon", "coordinates": [transformed_coords]})
|
|
797
|
-
|
|
798
|
-
try:
|
|
799
|
-
# Extract height values from raster within the building polygon
|
|
800
|
-
# all_touched=True ensures we get all pixels that the polygon touches
|
|
801
|
-
masked_data, _ = rasterio.mask.mask(src, [polygon], crop=True, all_touched=True)
|
|
802
|
-
|
|
803
|
-
# Filter out nodata values from the raster
|
|
804
|
-
heights = masked_data[0][masked_data[0] != src.nodata]
|
|
805
|
-
|
|
806
|
-
# Calculate average height if valid samples exist
|
|
807
|
-
if len(heights) > 0:
|
|
808
|
-
count_1 += 1
|
|
809
|
-
gdf.at[idx, 'height'] = float(np.mean(heights))
|
|
810
|
-
else:
|
|
811
|
-
count_2 += 1
|
|
812
|
-
gdf.at[idx, 'height'] = np.nan
|
|
813
|
-
except ValueError as e:
|
|
814
|
-
print(f"Error processing building at index {idx}. Error: {str(e)}")
|
|
815
|
-
gdf.at[idx, 'height'] = None
|
|
816
|
-
|
|
817
|
-
# Print statistics about height updates
|
|
818
|
-
if count_0 > 0:
|
|
819
|
-
print(f"{count_0} of the total {len(gdf)} building footprint from OSM did not have height data.")
|
|
820
|
-
print(f"For {count_1} of these building footprints without height, values from complementary data were assigned.")
|
|
821
|
-
print(f"For {count_2} of these building footprints without height, no data exist in complementary data.")
|
|
822
|
-
|
|
823
|
-
return gdf
|
|
824
|
-
|
|
825
|
-
def get_gdf_from_gpkg(gpkg_path, rectangle_vertices):
|
|
826
|
-
"""
|
|
827
|
-
Read a GeoPackage file and convert it to a GeoDataFrame with consistent CRS.
|
|
828
|
-
|
|
829
|
-
This function reads a GeoPackage file containing building footprints and ensures
|
|
830
|
-
the data is properly formatted with WGS84 coordinate system and unique identifiers.
|
|
831
|
-
It handles CRS conversion if needed and adds sequential IDs.
|
|
832
|
-
|
|
833
|
-
Args:
|
|
834
|
-
gpkg_path (str): Path to the GeoPackage file
|
|
835
|
-
File must exist and be readable
|
|
836
|
-
Must contain valid building footprint geometries
|
|
837
|
-
Any coordinate system is accepted
|
|
838
|
-
rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
|
|
839
|
-
Must be in WGS84 (EPSG:4326) coordinate system
|
|
840
|
-
Used for spatial filtering (not implemented in this function)
|
|
841
|
-
|
|
842
|
-
Returns:
|
|
843
|
-
gpd.GeoDataFrame: GeoDataFrame containing building footprints
|
|
844
|
-
Will have 'geometry' column with building geometries
|
|
845
|
-
Will have 'id' column with sequential integers
|
|
846
|
-
Will be in WGS84 (EPSG:4326) coordinate system
|
|
847
|
-
|
|
848
|
-
Note:
|
|
849
|
-
- Prints informative message when opening file
|
|
850
|
-
- Sets CRS to WGS84 if not specified
|
|
851
|
-
- Transforms to WGS84 if different CRS
|
|
852
|
-
- Adds sequential IDs starting from 0
|
|
853
|
-
- rectangle_vertices parameter is currently unused
|
|
854
|
-
"""
|
|
855
|
-
# Open and read the GPKG file
|
|
856
|
-
print(f"Opening GPKG file: {gpkg_path}")
|
|
857
|
-
gdf = gpd.read_file(gpkg_path)
|
|
858
|
-
|
|
859
|
-
# Only set CRS if not already set
|
|
860
|
-
if gdf.crs is None:
|
|
861
|
-
gdf.set_crs(epsg=4326, inplace=True)
|
|
862
|
-
# Transform to WGS84 if needed
|
|
863
|
-
elif gdf.crs != "EPSG:4326":
|
|
864
|
-
gdf = gdf.to_crs(epsg=4326)
|
|
865
|
-
|
|
866
|
-
# Replace id column with sequential index numbers
|
|
867
|
-
gdf['id'] = gdf.index
|
|
868
|
-
|
|
869
|
-
return gdf
|
|
870
|
-
|
|
871
|
-
def swap_coordinates(features):
|
|
872
|
-
"""
|
|
873
|
-
Swap coordinate ordering in GeoJSON features from (lat, lon) to (lon, lat).
|
|
874
|
-
|
|
875
|
-
This function modifies GeoJSON features in-place to swap the order of coordinates
|
|
876
|
-
from (latitude, longitude) to (longitude, latitude). It handles both Polygon and
|
|
877
|
-
MultiPolygon geometries, maintaining their structure while swapping coordinates.
|
|
878
|
-
|
|
879
|
-
Args:
|
|
880
|
-
features (list): List of GeoJSON features to process
|
|
881
|
-
Features must have 'geometry' property
|
|
882
|
-
Supported geometry types: 'Polygon', 'MultiPolygon'
|
|
883
|
-
Coordinates must be in (lat, lon) order initially
|
|
884
|
-
|
|
885
|
-
Returns:
|
|
886
|
-
None: Features are modified in-place
|
|
887
|
-
|
|
888
|
-
Note:
|
|
889
|
-
- Modifies features directly (no copy created)
|
|
890
|
-
- Handles both Polygon and MultiPolygon geometries
|
|
891
|
-
- For Polygons: processes single coordinate ring
|
|
892
|
-
- For MultiPolygons: processes multiple coordinate rings
|
|
893
|
-
- Assumes input coordinates are in (lat, lon) order
|
|
894
|
-
- Resulting coordinates will be in (lon, lat) order
|
|
895
|
-
"""
|
|
896
|
-
# Process each feature based on geometry type
|
|
897
|
-
for feature in features:
|
|
898
|
-
if feature['geometry']['type'] == 'Polygon':
|
|
899
|
-
# Swap coordinates for simple polygons
|
|
900
|
-
# Each polygon is a list of rings (exterior and optional holes)
|
|
901
|
-
new_coords = [[[lon, lat] for lat, lon in polygon] for polygon in feature['geometry']['coordinates']]
|
|
902
|
-
feature['geometry']['coordinates'] = new_coords
|
|
903
|
-
elif feature['geometry']['type'] == 'MultiPolygon':
|
|
904
|
-
# Swap coordinates for multi-polygons (polygons with holes)
|
|
905
|
-
# Each multipolygon is a list of polygons, each with its own rings
|
|
906
|
-
new_coords = [[[[lon, lat] for lat, lon in polygon] for polygon in multipolygon] for multipolygon in feature['geometry']['coordinates']]
|
|
907
|
-
feature['geometry']['coordinates'] = new_coords
|
|
908
|
-
|
|
909
|
-
def save_geojson(features, save_path):
|
|
910
|
-
"""
|
|
911
|
-
Save GeoJSON features to a file with coordinate swapping and pretty printing.
|
|
912
|
-
|
|
913
|
-
This function takes a list of GeoJSON features, swaps their coordinate ordering
|
|
914
|
-
if needed, wraps them in a FeatureCollection, and saves to a file with proper
|
|
915
|
-
JSON formatting. It creates a deep copy to avoid modifying the original data.
|
|
916
|
-
|
|
917
|
-
Args:
|
|
918
|
-
features (list): List of GeoJSON features to save
|
|
919
|
-
Each feature should have valid GeoJSON structure
|
|
920
|
-
Features can be Polygon or MultiPolygon type
|
|
921
|
-
Coordinates will be swapped if in (lat, lon) order
|
|
922
|
-
save_path (str): Path where the GeoJSON file should be saved
|
|
923
|
-
Will overwrite existing file if present
|
|
924
|
-
Directory must exist and be writable
|
|
925
|
-
File will be created with UTF-8 encoding
|
|
926
|
-
|
|
927
|
-
Returns:
|
|
928
|
-
None
|
|
929
|
-
|
|
930
|
-
Note:
|
|
931
|
-
- Creates deep copy to preserve original feature data
|
|
932
|
-
- Swaps coordinates from (lat, lon) to (lon, lat) order
|
|
933
|
-
- Wraps features in a FeatureCollection object
|
|
934
|
-
- Uses pretty printing with 2-space indentation
|
|
935
|
-
- Handles both Polygon and MultiPolygon geometries
|
|
936
|
-
"""
|
|
937
|
-
# Create deep copy to avoid modifying original data
|
|
938
|
-
geojson_features = copy.deepcopy(features)
|
|
939
|
-
|
|
940
|
-
# Swap coordinate ordering from (lat, lon) to (lon, lat)
|
|
941
|
-
swap_coordinates(geojson_features)
|
|
942
|
-
|
|
943
|
-
# Create FeatureCollection structure
|
|
944
|
-
geojson = {
|
|
945
|
-
"type": "FeatureCollection",
|
|
946
|
-
"features": geojson_features
|
|
947
|
-
}
|
|
948
|
-
|
|
949
|
-
# Write to file with pretty printing (2-space indentation)
|
|
950
|
-
with open(save_path, 'w') as f:
|
|
951
|
-
json.dump(geojson, f, indent=2)
|
|
952
|
-
|
|
953
|
-
def find_building_containing_point(building_gdf, target_point):
|
|
954
|
-
"""
|
|
955
|
-
Find building IDs that contain a given point in their footprint.
|
|
956
|
-
|
|
957
|
-
This function identifies all buildings in a GeoDataFrame whose footprint contains
|
|
958
|
-
a specified geographic point. Only Polygon geometries are considered, and the point
|
|
959
|
-
must be fully contained within the building footprint (not just touching).
|
|
960
|
-
|
|
961
|
-
Args:
|
|
962
|
-
building_gdf (GeoDataFrame): GeoDataFrame containing building geometries and IDs
|
|
963
|
-
Must have 'geometry' column with Polygon geometries
|
|
964
|
-
Must have 'id' column or index will be used as fallback
|
|
965
|
-
Geometries must be in same CRS as target_point coordinates
|
|
966
|
-
target_point (tuple): Tuple of (lon, lat) coordinates to check
|
|
967
|
-
Must be in same coordinate system as building_gdf geometries
|
|
968
|
-
Order must be (longitude, latitude) if using WGS84
|
|
969
|
-
|
|
970
|
-
Returns:
|
|
971
|
-
list: List of building IDs containing the target point
|
|
972
|
-
Empty list if no buildings contain the point
|
|
973
|
-
Multiple IDs possible if buildings overlap
|
|
974
|
-
IDs are in arbitrary order
|
|
975
|
-
|
|
976
|
-
Note:
|
|
977
|
-
- Only processes Polygon geometries (skips MultiPolygons and others)
|
|
978
|
-
- Uses Shapely's contains() method which requires point to be fully inside polygon
|
|
979
|
-
- No spatial indexing is used, performs linear search through all buildings
|
|
980
|
-
"""
|
|
981
|
-
# Create Shapely point from input coordinates
|
|
982
|
-
point = Point(target_point[0], target_point[1])
|
|
983
|
-
|
|
984
|
-
# Initialize list to store matching building IDs
|
|
985
|
-
id_list = []
|
|
986
|
-
|
|
987
|
-
# Check each building in the GeoDataFrame
|
|
988
|
-
for idx, row in building_gdf.iterrows():
|
|
989
|
-
# Skip any geometry that is not a simple Polygon
|
|
990
|
-
if not isinstance(row.geometry, Polygon):
|
|
991
|
-
continue
|
|
992
|
-
|
|
993
|
-
# Check if point is fully contained within building footprint
|
|
994
|
-
if row.geometry.contains(point):
|
|
995
|
-
# Use specified ID column or None if not found
|
|
996
|
-
id_list.append(row.get('id', None))
|
|
997
|
-
|
|
998
|
-
return id_list
|
|
999
|
-
|
|
1000
|
-
def get_buildings_in_drawn_polygon(building_gdf, drawn_polygons,
|
|
1001
|
-
operation='within'):
|
|
1002
|
-
"""
|
|
1003
|
-
Find buildings that intersect with or are contained within user-drawn polygons.
|
|
1004
|
-
|
|
1005
|
-
This function identifies buildings from a GeoDataFrame that have a specified spatial
|
|
1006
|
-
relationship with one or more polygons defined by user-drawn vertices. The relationship can be
|
|
1007
|
-
either intersection (building overlaps polygon) or containment (building fully within
|
|
1008
|
-
polygon).
|
|
1009
|
-
|
|
1010
|
-
Args:
|
|
1011
|
-
building_gdf (GeoDataFrame): GeoDataFrame containing building footprints
|
|
1012
|
-
Must have 'geometry' column with Polygon geometries
|
|
1013
|
-
Must have 'id' column or index will be used as fallback
|
|
1014
|
-
Geometries must be in same CRS as drawn_polygons vertices
|
|
1015
|
-
drawn_polygons (list): List of dictionaries containing polygon data
|
|
1016
|
-
Each dictionary must have:
|
|
1017
|
-
- 'id': Unique polygon identifier (int)
|
|
1018
|
-
- 'vertices': List of (lon, lat) tuples defining polygon vertices
|
|
1019
|
-
- 'color': Color string (optional, for reference)
|
|
1020
|
-
Must be in same coordinate system as building_gdf geometries
|
|
1021
|
-
Must form valid polygons (3+ vertices, first != last)
|
|
1022
|
-
Order must be (longitude, latitude) if using WGS84
|
|
1023
|
-
operation (str, optional): Type of spatial relationship to check
|
|
1024
|
-
'within': buildings must be fully contained in drawn polygon (default)
|
|
1025
|
-
'intersect': buildings must overlap with drawn polygon
|
|
1026
|
-
|
|
1027
|
-
Returns:
|
|
1028
|
-
list: List of building IDs that satisfy the spatial relationship with any of the drawn polygons
|
|
1029
|
-
Empty list if no buildings meet the criteria
|
|
1030
|
-
IDs are returned in order of processing
|
|
1031
|
-
May contain None values if buildings lack IDs
|
|
1032
|
-
Duplicate building IDs are removed (a building matching multiple polygons appears only once)
|
|
1033
|
-
|
|
1034
|
-
Note:
|
|
1035
|
-
- Only processes Polygon geometries (skips MultiPolygons and others)
|
|
1036
|
-
- No spatial indexing is used, performs linear search through all buildings
|
|
1037
|
-
- Invalid operation parameter will raise ValueError
|
|
1038
|
-
- Does not validate polygon closure (first vertex = last vertex)
|
|
1039
|
-
- Buildings matching any of the drawn polygons are included in the result
|
|
1040
|
-
"""
|
|
1041
|
-
if not drawn_polygons:
|
|
1042
|
-
return []
|
|
1043
|
-
|
|
1044
|
-
# Initialize set to store matching building IDs (using set to avoid duplicates)
|
|
1045
|
-
included_building_ids = set()
|
|
1046
|
-
|
|
1047
|
-
# Process each polygon
|
|
1048
|
-
for polygon_data in drawn_polygons:
|
|
1049
|
-
vertices = polygon_data['vertices']
|
|
1050
|
-
|
|
1051
|
-
# Create Shapely Polygon from drawn vertices
|
|
1052
|
-
drawn_polygon_shapely = Polygon(vertices)
|
|
1053
|
-
|
|
1054
|
-
# Check each building in the GeoDataFrame
|
|
1055
|
-
for idx, row in building_gdf.iterrows():
|
|
1056
|
-
# Skip any geometry that is not a simple Polygon
|
|
1057
|
-
if not isinstance(row.geometry, Polygon):
|
|
1058
|
-
continue
|
|
1059
|
-
|
|
1060
|
-
# Check spatial relationship based on specified operation
|
|
1061
|
-
if operation == 'intersect':
|
|
1062
|
-
if row.geometry.intersects(drawn_polygon_shapely):
|
|
1063
|
-
included_building_ids.add(row.get('id', None))
|
|
1064
|
-
elif operation == 'within':
|
|
1065
|
-
if row.geometry.within(drawn_polygon_shapely):
|
|
1066
|
-
included_building_ids.add(row.get('id', None))
|
|
1067
|
-
else:
|
|
1068
|
-
raise ValueError("operation must be 'intersect' or 'within'")
|
|
1069
|
-
|
|
1070
|
-
# Convert set back to list and return
|
|
1071
|
-
return list(included_building_ids)
|
|
1072
|
-
|
|
1073
|
-
def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
|
|
1074
|
-
"""
|
|
1075
|
-
Process building footprints to merge overlapping buildings based on area overlap ratio.
|
|
1076
|
-
|
|
1077
|
-
This function identifies and merges building footprints that significantly overlap with each other.
|
|
1078
|
-
Buildings are processed in order of decreasing area, and smaller buildings that overlap significantly
|
|
1079
|
-
with larger ones are assigned the ID of the larger building, effectively merging them.
|
|
1080
|
-
|
|
1081
|
-
Args:
|
|
1082
|
-
filtered_gdf (geopandas.GeoDataFrame): GeoDataFrame containing building footprints
|
|
1083
|
-
Must have 'geometry' column with building polygons
|
|
1084
|
-
If CRS is set, areas will be calculated in Web Mercator projection
|
|
1085
|
-
overlap_threshold (float, optional): Threshold for overlap ratio (0.0-1.0) to merge buildings
|
|
1086
|
-
Default is 0.5 (50% overlap)
|
|
1087
|
-
Higher values require more overlap for merging
|
|
1088
|
-
Lower values will result in more aggressive merging
|
|
1089
|
-
|
|
1090
|
-
Returns:
|
|
1091
|
-
geopandas.GeoDataFrame: Processed GeoDataFrame with updated IDs
|
|
1092
|
-
Overlapping buildings will share the same ID
|
|
1093
|
-
Original geometries are preserved, only IDs are updated
|
|
1094
|
-
All other columns remain unchanged
|
|
1095
|
-
|
|
1096
|
-
Note:
|
|
1097
|
-
- Uses R-tree spatial indexing for efficient overlap detection
|
|
1098
|
-
- Projects to Web Mercator (EPSG:3857) for accurate area calculation if CRS is set
|
|
1099
|
-
- Handles invalid geometries by attempting to fix them with buffer(0)
|
|
1100
|
-
- Processes buildings in order of decreasing area (largest first)
|
|
1101
|
-
"""
|
|
1102
|
-
# Make a copy to avoid modifying the original
|
|
1103
|
-
gdf = filtered_gdf.copy()
|
|
1104
|
-
|
|
1105
|
-
# Ensure 'id' column exists, use index if not present
|
|
1106
|
-
if 'id' not in gdf.columns:
|
|
1107
|
-
gdf['id'] = gdf.index
|
|
1108
|
-
|
|
1109
|
-
# Project to Web Mercator for accurate area calculation if CRS is set
|
|
1110
|
-
if gdf.crs is None:
|
|
1111
|
-
# Work with original geometries if no CRS is set
|
|
1112
|
-
gdf_projected = gdf.copy()
|
|
1113
|
-
else:
|
|
1114
|
-
# Store original CRS to convert back later
|
|
1115
|
-
original_crs = gdf.crs
|
|
1116
|
-
# Project to Web Mercator for accurate area calculation
|
|
1117
|
-
gdf_projected = gdf.to_crs("EPSG:3857")
|
|
1118
|
-
|
|
1119
|
-
# Calculate areas and sort by decreasing area for processing largest buildings first
|
|
1120
|
-
gdf_projected['area'] = gdf_projected.geometry.area
|
|
1121
|
-
gdf_projected = gdf_projected.sort_values(by='area', ascending=False)
|
|
1122
|
-
gdf_projected = gdf_projected.reset_index(drop=True)
|
|
1123
|
-
|
|
1124
|
-
# Create spatial index for efficient querying of potential overlaps
|
|
1125
|
-
spatial_idx = index.Index()
|
|
1126
|
-
for i, geom in enumerate(gdf_projected.geometry):
|
|
1127
|
-
if geom.is_valid:
|
|
1128
|
-
spatial_idx.insert(i, geom.bounds)
|
|
1129
|
-
else:
|
|
1130
|
-
# Fix invalid geometries using buffer(0) technique
|
|
1131
|
-
fixed_geom = geom.buffer(0)
|
|
1132
|
-
if fixed_geom.is_valid:
|
|
1133
|
-
spatial_idx.insert(i, fixed_geom.bounds)
|
|
1134
|
-
|
|
1135
|
-
# Track ID replacements to avoid repeated processing
|
|
1136
|
-
id_mapping = {}
|
|
1137
|
-
|
|
1138
|
-
# Process each building (skip the largest one as it's our reference)
|
|
1139
|
-
for i in range(1, len(gdf_projected)):
|
|
1140
|
-
current_poly = gdf_projected.iloc[i].geometry
|
|
1141
|
-
current_area = gdf_projected.iloc[i].area
|
|
1142
|
-
current_id = gdf_projected.iloc[i]['id']
|
|
1143
|
-
|
|
1144
|
-
# Skip if already mapped to another ID
|
|
1145
|
-
if current_id in id_mapping:
|
|
1146
|
-
continue
|
|
1147
|
-
|
|
1148
|
-
# Ensure geometry is valid for processing
|
|
1149
|
-
if not current_poly.is_valid:
|
|
1150
|
-
current_poly = current_poly.buffer(0)
|
|
1151
|
-
if not current_poly.is_valid:
|
|
1152
|
-
continue
|
|
1153
|
-
|
|
1154
|
-
# Find potential overlaps with larger polygons using spatial index
|
|
1155
|
-
potential_overlaps = [j for j in spatial_idx.intersection(current_poly.bounds) if j < i]
|
|
1156
|
-
|
|
1157
|
-
for j in potential_overlaps:
|
|
1158
|
-
larger_poly = gdf_projected.iloc[j].geometry
|
|
1159
|
-
larger_id = gdf_projected.iloc[j]['id']
|
|
1160
|
-
|
|
1161
|
-
# Follow ID mapping chain to get final ID
|
|
1162
|
-
if larger_id in id_mapping:
|
|
1163
|
-
larger_id = id_mapping[larger_id]
|
|
1164
|
-
|
|
1165
|
-
# Ensure geometry is valid for intersection test
|
|
1166
|
-
if not larger_poly.is_valid:
|
|
1167
|
-
larger_poly = larger_poly.buffer(0)
|
|
1168
|
-
if not larger_poly.is_valid:
|
|
1169
|
-
continue
|
|
1170
|
-
|
|
1171
|
-
try:
|
|
1172
|
-
# Calculate overlap ratio relative to current building's area
|
|
1173
|
-
if current_poly.intersects(larger_poly):
|
|
1174
|
-
overlap = current_poly.intersection(larger_poly)
|
|
1175
|
-
overlap_ratio = overlap.area / current_area
|
|
1176
|
-
|
|
1177
|
-
# Merge buildings if overlap exceeds threshold
|
|
1178
|
-
if overlap_ratio > overlap_threshold:
|
|
1179
|
-
id_mapping[current_id] = larger_id
|
|
1180
|
-
gdf_projected.at[i, 'id'] = larger_id
|
|
1181
|
-
break # Stop at first significant overlap
|
|
1182
|
-
except (GEOSException, ValueError) as e:
|
|
1183
|
-
# Skip problematic geometries
|
|
1184
|
-
continue
|
|
1185
|
-
|
|
1186
|
-
# Propagate ID changes through the original DataFrame
|
|
1187
|
-
for i, row in filtered_gdf.iterrows():
|
|
1188
|
-
orig_id = row.get('id')
|
|
1189
|
-
if orig_id in id_mapping:
|
|
1190
|
-
filtered_gdf.at[i, 'id'] = id_mapping[orig_id]
|
|
1191
|
-
|
|
1192
|
-
return filtered_gdf
|
|
1193
|
-
|
|
1194
|
-
def merge_gdfs_with_id_conflict_resolution(gdf_1, gdf_2, id_columns=['id', 'building_id']):
|
|
1195
|
-
"""
|
|
1196
|
-
Merge two GeoDataFrames while resolving ID conflicts by modifying IDs in the second GeoDataFrame.
|
|
1197
|
-
|
|
1198
|
-
This function merges two GeoDataFrames containing building footprints, ensuring that
|
|
1199
|
-
when buildings from both datasets have the same ID or building_id, the IDs in the
|
|
1200
|
-
second GeoDataFrame are modified to maintain uniqueness across the merged dataset.
|
|
1201
|
-
|
|
1202
|
-
Args:
|
|
1203
|
-
gdf_1 (gpd.GeoDataFrame): Primary GeoDataFrame containing building footprints
|
|
1204
|
-
Must have 'geometry' column with building polygons
|
|
1205
|
-
Must have 'id' and 'building_id' columns (or specified id_columns)
|
|
1206
|
-
Will remain unchanged during merging
|
|
1207
|
-
gdf_2 (gpd.GeoDataFrame): Secondary GeoDataFrame containing building footprints
|
|
1208
|
-
Must have 'geometry' column with building polygons
|
|
1209
|
-
Must have 'id' and 'building_id' columns (or specified id_columns)
|
|
1210
|
-
IDs will be modified if conflicts exist with gdf_1
|
|
1211
|
-
id_columns (list, optional): List of column names to check for ID conflicts
|
|
1212
|
-
Default is ['id', 'building_id']
|
|
1213
|
-
All specified columns must exist in both GeoDataFrames
|
|
1214
|
-
|
|
1215
|
-
Returns:
|
|
1216
|
-
gpd.GeoDataFrame: Merged GeoDataFrame with resolved ID conflicts
|
|
1217
|
-
Contains all buildings from both input GeoDataFrames
|
|
1218
|
-
All ID columns are unique across the entire dataset
|
|
1219
|
-
Original geometries and other properties are preserved
|
|
1220
|
-
Missing columns are filled with None values
|
|
1221
|
-
|
|
1222
|
-
Note:
|
|
1223
|
-
- Uses the maximum ID values from gdf_1 as the starting point for new IDs in gdf_2
|
|
1224
|
-
- Modifies all specified ID columns in gdf_2 to maintain consistency
|
|
1225
|
-
- Preserves all other columns and data from both GeoDataFrames
|
|
1226
|
-
- Assumes both GeoDataFrames have the same coordinate reference system
|
|
1227
|
-
- Handles missing ID columns gracefully by skipping them
|
|
1228
|
-
- Sets missing columns to None instead of NaN for better compatibility
|
|
1229
|
-
"""
|
|
1230
|
-
# Make copies to avoid modifying original GeoDataFrames
|
|
1231
|
-
gdf_primary = gdf_1.copy()
|
|
1232
|
-
gdf_secondary = gdf_2.copy()
|
|
1233
|
-
|
|
1234
|
-
# Validate that required ID columns exist in both GeoDataFrames
|
|
1235
|
-
missing_columns = []
|
|
1236
|
-
for col in id_columns:
|
|
1237
|
-
if col not in gdf_primary.columns:
|
|
1238
|
-
missing_columns.append(f"'{col}' missing from gdf_1")
|
|
1239
|
-
if col not in gdf_secondary.columns:
|
|
1240
|
-
missing_columns.append(f"'{col}' missing from gdf_2")
|
|
1241
|
-
|
|
1242
|
-
if missing_columns:
|
|
1243
|
-
print(f"Warning: Missing ID columns: {', '.join(missing_columns)}")
|
|
1244
|
-
# Remove missing columns from the list to process
|
|
1245
|
-
id_columns = [col for col in id_columns
|
|
1246
|
-
if col in gdf_primary.columns and col in gdf_secondary.columns]
|
|
1247
|
-
|
|
1248
|
-
if not id_columns:
|
|
1249
|
-
print("Warning: No valid ID columns found. Merging without ID conflict resolution.")
|
|
1250
|
-
# Handle missing columns before concatenation
|
|
1251
|
-
merged_gdf = _merge_gdfs_with_missing_columns(gdf_primary, gdf_secondary)
|
|
1252
|
-
return merged_gdf
|
|
1253
|
-
|
|
1254
|
-
# Calculate the maximum ID values from the primary GeoDataFrame for each ID column
|
|
1255
|
-
max_ids = {}
|
|
1256
|
-
for col in id_columns:
|
|
1257
|
-
if gdf_primary[col].dtype in ['int64', 'int32', 'float64', 'float32']:
|
|
1258
|
-
max_ids[col] = gdf_primary[col].max()
|
|
1259
|
-
else:
|
|
1260
|
-
# For non-numeric IDs, we'll use the length of the primary DataFrame
|
|
1261
|
-
max_ids[col] = len(gdf_primary)
|
|
1262
|
-
|
|
1263
|
-
# Create a mapping for new IDs in the secondary GeoDataFrame
|
|
1264
|
-
id_mapping = {}
|
|
1265
|
-
next_ids = {col: max_ids[col] + 1 for col in id_columns}
|
|
1266
|
-
|
|
1267
|
-
# Process each row in the secondary GeoDataFrame
|
|
1268
|
-
for idx, row in gdf_secondary.iterrows():
|
|
1269
|
-
needs_new_ids = False
|
|
1270
|
-
|
|
1271
|
-
# Check if any ID column conflicts with the primary GeoDataFrame
|
|
1272
|
-
for col in id_columns:
|
|
1273
|
-
current_id = row[col]
|
|
1274
|
-
|
|
1275
|
-
# Check if this ID exists in the primary GeoDataFrame
|
|
1276
|
-
if current_id in gdf_primary[col].values:
|
|
1277
|
-
needs_new_ids = True
|
|
1278
|
-
break
|
|
1279
|
-
|
|
1280
|
-
# If conflicts found, assign new IDs
|
|
1281
|
-
if needs_new_ids:
|
|
1282
|
-
for col in id_columns:
|
|
1283
|
-
new_id = next_ids[col]
|
|
1284
|
-
gdf_secondary.at[idx, col] = new_id
|
|
1285
|
-
next_ids[col] += 1
|
|
1286
|
-
|
|
1287
|
-
# Handle missing columns before merging
|
|
1288
|
-
merged_gdf = _merge_gdfs_with_missing_columns(gdf_primary, gdf_secondary)
|
|
1289
|
-
|
|
1290
|
-
# Print statistics about the merge
|
|
1291
|
-
total_buildings = len(merged_gdf)
|
|
1292
|
-
primary_buildings = len(gdf_primary)
|
|
1293
|
-
secondary_buildings = len(gdf_secondary)
|
|
1294
|
-
modified_buildings = sum(1 for idx, row in gdf_secondary.iterrows()
|
|
1295
|
-
if any(row[col] != gdf_2.iloc[idx][col] for col in id_columns))
|
|
1296
|
-
|
|
1297
|
-
print(f"Merged {primary_buildings} buildings from primary dataset with {secondary_buildings} buildings from secondary dataset.")
|
|
1298
|
-
print(f"Total buildings in merged dataset: {total_buildings}")
|
|
1299
|
-
if modified_buildings > 0:
|
|
1300
|
-
print(f"Modified IDs for {modified_buildings} buildings in secondary dataset to resolve conflicts.")
|
|
1301
|
-
|
|
1302
|
-
return merged_gdf
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
def _merge_gdfs_with_missing_columns(gdf_1, gdf_2):
|
|
1306
|
-
"""
|
|
1307
|
-
Helper function to merge two GeoDataFrames while handling missing columns.
|
|
1308
|
-
|
|
1309
|
-
This function ensures that when one GeoDataFrame has columns that the other doesn't,
|
|
1310
|
-
those missing values are filled with None instead of NaN.
|
|
1311
|
-
|
|
1312
|
-
Args:
|
|
1313
|
-
gdf_1 (gpd.GeoDataFrame): First GeoDataFrame
|
|
1314
|
-
gdf_2 (gpd.GeoDataFrame): Second GeoDataFrame
|
|
1315
|
-
|
|
1316
|
-
Returns:
|
|
1317
|
-
gpd.GeoDataFrame: Merged GeoDataFrame with all columns from both inputs
|
|
1318
|
-
"""
|
|
1319
|
-
# Find columns that exist in one GeoDataFrame but not the other
|
|
1320
|
-
columns_1 = set(gdf_1.columns)
|
|
1321
|
-
columns_2 = set(gdf_2.columns)
|
|
1322
|
-
|
|
1323
|
-
# Columns only in gdf_1
|
|
1324
|
-
only_in_1 = columns_1 - columns_2
|
|
1325
|
-
# Columns only in gdf_2
|
|
1326
|
-
only_in_2 = columns_2 - columns_1
|
|
1327
|
-
|
|
1328
|
-
# Add missing columns to gdf_1 with None values
|
|
1329
|
-
for col in only_in_2:
|
|
1330
|
-
gdf_1[col] = None
|
|
1331
|
-
|
|
1332
|
-
# Add missing columns to gdf_2 with None values
|
|
1333
|
-
for col in only_in_1:
|
|
1334
|
-
gdf_2[col] = None
|
|
1335
|
-
|
|
1336
|
-
# Ensure both GeoDataFrames have the same column order
|
|
1337
|
-
all_columns = sorted(list(columns_1.union(columns_2)))
|
|
1338
|
-
gdf_1 = gdf_1[all_columns]
|
|
1339
|
-
gdf_2 = gdf_2[all_columns]
|
|
1340
|
-
|
|
1341
|
-
# Merge the GeoDataFrames
|
|
1342
|
-
merged_gdf = pd.concat([gdf_1, gdf_2], ignore_index=True)
|
|
1343
|
-
|
|
1344
|
-
return merged_gdf
|