voxcity 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of voxcity might be problematic. Click here for more details.

@@ -27,24 +27,42 @@ def filter_and_convert_gdf_to_geojson(gdf, rectangle_vertices):
27
27
  """
28
28
  Filter a GeoDataFrame by a bounding rectangle and convert to GeoJSON format.
29
29
 
30
+ This function performs spatial filtering on a GeoDataFrame using a bounding rectangle,
31
+ and converts the filtered data to GeoJSON format. It handles both Polygon and MultiPolygon
32
+ geometries, splitting MultiPolygons into separate Polygon features.
33
+
30
34
  Args:
31
35
  gdf (GeoDataFrame): Input GeoDataFrame containing building data
36
+ Must have 'geometry' and 'height' columns
37
+ Any CRS is accepted, will be converted to WGS84 if needed
32
38
  rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
39
+ Must be in WGS84 (EPSG:4326) coordinate system
40
+ Must form a valid rectangle (4 vertices, clockwise or counterclockwise)
33
41
 
34
42
  Returns:
35
43
  list: List of GeoJSON features within the bounding rectangle
44
+ Each feature contains:
45
+ - geometry: Polygon coordinates in WGS84
46
+ - properties: Dictionary with 'height', 'confidence', and 'id'
47
+ - type: Always "Feature"
48
+
49
+ Memory Optimization:
50
+ - Uses spatial indexing for efficient filtering
51
+ - Downcasts numeric columns to save memory
52
+ - Cleans up intermediate data structures
53
+ - Splits MultiPolygons into separate features
36
54
  """
37
- # Reproject to WGS84 if necessary
55
+ # Reproject to WGS84 if necessary for consistent coordinate system
38
56
  if gdf.crs != 'EPSG:4326':
39
57
  gdf = gdf.to_crs(epsg=4326)
40
58
 
41
- # Downcast 'height' to save memory
59
+ # Downcast 'height' to float32 to save memory
42
60
  gdf['height'] = pd.to_numeric(gdf['height'], downcast='float')
43
61
 
44
- # Add 'confidence' column with default value
62
+ # Add 'confidence' column with default value for height reliability
45
63
  gdf['confidence'] = -1.0
46
64
 
47
- # Rectangle vertices already in (lon,lat) format for shapely
65
+ # Create shapely polygon from rectangle vertices for spatial filtering
48
66
  rectangle_polygon = Polygon(rectangle_vertices)
49
67
 
50
68
  # Use spatial index to efficiently filter geometries that intersect with rectangle
@@ -125,18 +143,39 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
125
143
  """
126
144
  Extract building heights from one GeoDataFrame and apply them to another based on spatial overlap.
127
145
 
146
+ This function transfers height information from a reference GeoDataFrame to a primary GeoDataFrame
147
+ based on the spatial overlap between building footprints. For each building in the primary dataset
148
+ that needs height data, it calculates a weighted average height from overlapping buildings in the
149
+ reference dataset.
150
+
128
151
  Args:
129
152
  gdf_0 (gpd.GeoDataFrame): Primary GeoDataFrame to update with heights
153
+ Must have 'geometry' column with building footprints
154
+ Will be updated with height values where missing or zero
130
155
  gdf_1 (gpd.GeoDataFrame): Reference GeoDataFrame containing height data
156
+ Must have 'geometry' column with building footprints
157
+ Must have 'height' column with valid height values
131
158
 
132
159
  Returns:
133
160
  gpd.GeoDataFrame: Updated primary GeoDataFrame with extracted heights
161
+ Buildings with overlapping reference data get weighted average heights
162
+ Buildings without overlapping data retain original height or get NaN
163
+
164
+ Statistics Tracked:
165
+ - count_0: Number of buildings without height in primary dataset
166
+ - count_1: Number of buildings successfully updated with height
167
+ - count_2: Number of buildings where no reference height data found
168
+
169
+ Note:
170
+ - Uses R-tree spatial indexing for efficient overlap detection
171
+ - Handles invalid geometries by attempting to fix them with buffer(0)
172
+ - Weighted average is based on the area of overlap between buildings
134
173
  """
135
174
  # Make a copy of input GeoDataFrame to avoid modifying original
136
175
  gdf_primary = gdf_0.copy()
137
176
  gdf_ref = gdf_1.copy()
138
177
 
139
- # Make sure height columns exist
178
+ # Make sure height columns exist with default values
140
179
  if 'height' not in gdf_primary.columns:
141
180
  gdf_primary['height'] = 0.0
142
181
  if 'height' not in gdf_ref.columns:
@@ -147,8 +186,7 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
147
186
  count_1 = 0 # Buildings updated with height
148
187
  count_2 = 0 # Buildings with no height data found
149
188
 
150
- # Create spatial index for reference buildings
151
- from rtree import index
189
+ # Create spatial index for reference buildings to speed up intersection tests
152
190
  spatial_index = index.Index()
153
191
  for i, geom in enumerate(gdf_ref.geometry):
154
192
  if geom.is_valid:
@@ -160,9 +198,9 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
160
198
  count_0 += 1
161
199
  geom = row.geometry
162
200
 
163
- # Calculate weighted average height based on overlapping areas
164
- overlapping_height_area = 0
165
- overlapping_area = 0
201
+ # Variables for weighted average height calculation
202
+ overlapping_height_area = 0 # Sum of (height * overlap_area)
203
+ overlapping_area = 0 # Total overlap area
166
204
 
167
205
  # Get potential intersecting buildings using spatial index
168
206
  potential_matches = list(spatial_index.intersection(geom.bounds))
@@ -174,6 +212,7 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
174
212
 
175
213
  ref_row = gdf_ref.iloc[ref_idx]
176
214
  try:
215
+ # Calculate intersection if geometries overlap
177
216
  if geom.intersects(ref_row.geometry):
178
217
  overlap_area = geom.intersection(ref_row.geometry).area
179
218
  overlapping_height_area += ref_row['height'] * overlap_area
@@ -193,7 +232,7 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
193
232
  # Update height if overlapping buildings found
194
233
  if overlapping_height_area > 0:
195
234
  count_1 += 1
196
- # Calculate weighted average height
235
+ # Calculate weighted average height based on overlap areas
197
236
  new_height = overlapping_height_area / overlapping_area
198
237
  gdf_primary.at[idx_primary, 'height'] = new_height
199
238
  else:
@@ -202,7 +241,6 @@ def extract_building_heights_from_gdf(gdf_0: gpd.GeoDataFrame, gdf_1: gpd.GeoDat
202
241
 
203
242
  # Print statistics about height updates
204
243
  if count_0 > 0:
205
- # print(f"{count_0} of the total {len(gdf_primary)} building footprint from OSM did not have height data.")
206
244
  print(f"For {count_1} of these building footprints without height, values from the complementary source were assigned.")
207
245
  print(f"For {count_2} of these building footprints without height, no data exist in complementary data.")
208
246
 
@@ -331,34 +369,55 @@ def geojson_to_gdf(geojson_data, id_col='id'):
331
369
  """
332
370
  Convert a list of GeoJSON-like dict features into a GeoDataFrame.
333
371
 
372
+ This function takes a list of GeoJSON feature dictionaries (Fiona-like format)
373
+ and converts them into a GeoDataFrame, handling geometry conversion and property
374
+ extraction. It ensures each feature has a unique identifier.
375
+
334
376
  Args:
335
- geojson_data (List[Dict]): A list of feature dicts (Fiona-like).
336
- id_col (str): Name of property to use as an identifier. If not found,
337
- we'll try to create a unique ID.
338
-
377
+ geojson_data (List[Dict]): A list of feature dicts (Fiona-like)
378
+ Each dict must have 'geometry' and 'properties' keys
379
+ 'geometry' must be a valid GeoJSON geometry
380
+ 'properties' can be empty but must be a dict if present
381
+ id_col (str, optional): Name of property to use as an identifier
382
+ Default is 'id'
383
+ If not found in properties, a sequential ID will be created
384
+ Must be a string that can be used as a column name
385
+
339
386
  Returns:
340
- gpd.GeoDataFrame: GeoDataFrame with geometry and property columns.
387
+ gpd.GeoDataFrame: GeoDataFrame with geometry and property columns
388
+ Will have 'geometry' column with Shapely geometries
389
+ Will have columns for all properties found in features
390
+ Will have id_col with unique identifiers
391
+ Will be set to WGS84 (EPSG:4326) coordinate system
392
+
393
+ Note:
394
+ - Handles missing properties gracefully
395
+ - Creates sequential IDs if id_col not found
396
+ - Converts GeoJSON geometries to Shapely objects
397
+ - Sets WGS84 as coordinate system
398
+ - Preserves all properties as columns
341
399
  """
342
400
  # Build lists for geometry and properties
343
401
  geometries = []
344
402
  all_props = []
345
403
 
346
404
  for i, feature in enumerate(geojson_data):
347
- # Extract geometry
405
+ # Extract geometry and convert to Shapely object
348
406
  geom = feature.get('geometry')
349
407
  shapely_geom = shape(geom) if geom else None
350
408
 
351
- # Extract properties
409
+ # Extract properties, ensuring they exist
352
410
  props = feature.get('properties', {})
353
411
 
354
- # If an ID column is missing, create one
412
+ # If specified ID column is missing, create sequential ID
355
413
  if id_col not in props:
356
414
  props[id_col] = i # fallback ID
357
415
 
358
- # Capture geometry and all props
416
+ # Capture geometry and all properties
359
417
  geometries.append(shapely_geom)
360
418
  all_props.append(props)
361
419
 
420
+ # Create GeoDataFrame with geometries and properties
362
421
  gdf = gpd.GeoDataFrame(all_props, geometry=geometries, crs="EPSG:4326")
363
422
  return gdf
364
423
 
@@ -503,18 +562,50 @@ def complement_building_heights_from_gdf(gdf_0, gdf_1,
503
562
  def gdf_to_geojson_dicts(gdf, id_col='id'):
504
563
  """
505
564
  Convert a GeoDataFrame to a list of dicts similar to GeoJSON features.
565
+
566
+ This function converts a GeoDataFrame into a list of dictionary objects that
567
+ follow the GeoJSON Feature format. Each feature will have geometry and properties,
568
+ with an optional ID field handled separately from other properties.
569
+
570
+ Args:
571
+ gdf (gpd.GeoDataFrame): GeoDataFrame to convert
572
+ Must have 'geometry' column with Shapely geometries
573
+ All non-geometry columns will become properties
574
+ Can optionally have id_col for unique identifiers
575
+ id_col (str, optional): Name of column to use as feature ID
576
+ Default is 'id'
577
+ If present, will be excluded from properties
578
+ If not present, features will not have explicit IDs
579
+
580
+ Returns:
581
+ list: List of GeoJSON-like feature dictionaries
582
+ Each dict will have:
583
+ - type: Always "Feature"
584
+ - geometry: GeoJSON geometry from Shapely object
585
+ - properties: All columns except geometry and ID
586
+
587
+ Note:
588
+ - Converts Shapely geometries to GeoJSON format
589
+ - Preserves all non-geometry columns as properties
590
+ - Handles missing ID column gracefully
591
+ - Maintains original property types
592
+ - Excludes ID from properties if specified
506
593
  """
594
+ # Convert GeoDataFrame to dictionary records for easier processing
507
595
  records = gdf.to_dict(orient='records')
508
596
  features = []
597
+
509
598
  for rec in records:
510
- # geometry is separate
599
+ # Extract and convert geometry to GeoJSON format using __geo_interface__
511
600
  geom = rec.pop('geometry', None)
512
601
  if geom is not None:
513
602
  geom = geom.__geo_interface__
514
- # use or set ID
603
+
604
+ # Extract ID if present and create properties dict excluding ID
515
605
  feature_id = rec.get(id_col, None)
516
606
  props = {k: v for k, v in rec.items() if k != id_col}
517
- # build GeoJSON-like feature dict
607
+
608
+ # Create GeoJSON Feature object with type, properties, and geometry
518
609
  feature = {
519
610
  'type': 'Feature',
520
611
  'properties': props,
@@ -526,38 +617,63 @@ def gdf_to_geojson_dicts(gdf, id_col='id'):
526
617
 
527
618
  def load_gdf_from_multiple_gz(file_paths):
528
619
  """
529
- Load GeoJSON features from multiple gzipped files into a GeoDataFrame.
620
+ Load GeoJSON features from multiple gzipped files into a single GeoDataFrame.
621
+
622
+ This function reads multiple gzipped GeoJSON files, where each line in each file
623
+ represents a single GeoJSON feature. It combines all features into a single
624
+ GeoDataFrame, ensuring height properties are properly handled and coordinates
625
+ are in WGS84.
530
626
 
531
627
  Args:
532
628
  file_paths (list): List of paths to gzipped GeoJSON files
629
+ Each file should contain one GeoJSON feature per line
630
+ Files should be readable as UTF-8 text
631
+ Features should be in WGS84 coordinate system
533
632
 
534
633
  Returns:
535
- gpd.GeoDataFrame: GeoDataFrame containing features from all files
634
+ gpd.GeoDataFrame: Combined GeoDataFrame containing all features
635
+ Will have 'geometry' column with building footprints
636
+ Will have 'height' column (0 for missing values)
637
+ Will be set to WGS84 (EPSG:4326) coordinate system
638
+
639
+ Note:
640
+ - Skips lines that cannot be parsed as valid JSON
641
+ - Sets missing height values to 0
642
+ - Assumes input coordinates are in WGS84
643
+ - Memory usage scales with total number of features
644
+ - Reports JSON parsing errors but continues processing
536
645
  """
646
+ # Initialize list to store all GeoJSON features
537
647
  geojson_objects = []
648
+
649
+ # Process each gzipped file
538
650
  for gz_file_path in file_paths:
539
- # Read each gzipped file line by line
651
+ # Read each gzipped file line by line as UTF-8 text
540
652
  with gzip.open(gz_file_path, 'rt', encoding='utf-8') as file:
541
653
  for line in file:
542
654
  try:
655
+ # Parse each line as a GeoJSON feature
543
656
  data = json.loads(line)
657
+
544
658
  # Ensure height property exists and has valid value
545
659
  if 'properties' in data and 'height' in data['properties']:
546
660
  if data['properties']['height'] is None:
547
661
  data['properties']['height'] = 0
548
662
  else:
663
+ # Create properties dict if missing
549
664
  if 'properties' not in data:
550
665
  data['properties'] = {}
666
+ # Set default height value
551
667
  data['properties']['height'] = 0
668
+
552
669
  geojson_objects.append(data)
553
670
  except json.JSONDecodeError as e:
554
671
  print(f"Skipping line in {gz_file_path} due to JSONDecodeError: {e}")
555
672
 
556
673
  # Convert list of GeoJSON features to GeoDataFrame
557
- # swap_coordinates(geojson_objects)
558
674
  gdf = gpd.GeoDataFrame.from_features(geojson_objects)
559
675
 
560
- # Set CRS to WGS84 which is typically used for these files
676
+ # Set coordinate reference system to WGS84
561
677
  gdf.set_crs(epsg=4326, inplace=True)
562
678
 
563
679
  return gdf
@@ -566,44 +682,92 @@ def filter_buildings(geojson_data, plotting_box):
566
682
  """
567
683
  Filter building features that intersect with a given bounding box.
568
684
 
685
+ This function filters a list of GeoJSON building features to keep only those
686
+ that intersect with a specified bounding box. It performs geometry validation
687
+ and handles invalid geometries gracefully.
688
+
569
689
  Args:
570
- geojson_data (list): List of GeoJSON features
690
+ geojson_data (list): List of GeoJSON features representing buildings
691
+ Each feature must have valid 'geometry' property
692
+ Coordinates must be in same CRS as plotting_box
693
+ Invalid geometries will be skipped with warning
571
694
  plotting_box (Polygon): Shapely polygon defining the bounding box
695
+ Must be a valid Shapely Polygon object
696
+ Must be in same coordinate system as geojson_data
697
+ Used for spatial intersection testing
572
698
 
573
699
  Returns:
574
700
  list: Filtered list of GeoJSON features that intersect with the bounding box
701
+ Features maintain their original structure
702
+ Invalid features are excluded
703
+ Order of features is preserved
704
+
705
+ Note:
706
+ - Validates polygon coordinates before processing
707
+ - Skips features with invalid geometries
708
+ - Reports validation and geometry errors
709
+ - No coordinate system transformation is performed
710
+ - Memory efficient as it creates new list only for valid features
575
711
  """
712
+ # Initialize list for valid intersecting features
576
713
  filtered_features = []
714
+
715
+ # Process each feature in the input data
577
716
  for feature in geojson_data:
578
717
  # Validate polygon coordinates before processing
579
718
  if not validate_polygon_coordinates(feature['geometry']):
580
719
  print("Skipping feature with invalid geometry")
581
720
  print(feature['geometry'])
582
721
  continue
722
+
583
723
  try:
584
- # Convert GeoJSON geometry to Shapely geometry
724
+ # Convert GeoJSON geometry to Shapely geometry for spatial operations
585
725
  geom = shape(feature['geometry'])
726
+
727
+ # Skip invalid geometries that can't be fixed
586
728
  if not geom.is_valid:
587
729
  print("Skipping invalid geometry")
588
730
  print(geom)
589
731
  continue
732
+
590
733
  # Keep features that intersect with bounding box
591
734
  if plotting_box.intersects(geom):
592
735
  filtered_features.append(feature)
736
+
593
737
  except ShapelyError as e:
738
+ # Log geometry errors but continue processing
594
739
  print(f"Skipping feature due to geometry error: {e}")
740
+
595
741
  return filtered_features
596
742
 
597
743
  def extract_building_heights_from_geotiff(geotiff_path, gdf):
598
744
  """
599
745
  Extract building heights from a GeoTIFF raster for building footprints in a GeoDataFrame.
600
746
 
747
+ This function processes building footprints to extract height information from a GeoTIFF
748
+ raster file. It handles coordinate transformation between WGS84 (EPSG:4326) and the raster's
749
+ CRS, and calculates average heights for each building footprint.
750
+
601
751
  Args:
602
- geotiff_path (str): Path to the GeoTIFF height raster
603
- gdf (gpd.GeoDataFrame): GeoDataFrame containing building footprints
752
+ geotiff_path (str): Path to the GeoTIFF height raster file containing elevation data
753
+ gdf (gpd.GeoDataFrame): GeoDataFrame containing building footprints with geometry column
754
+ The GeoDataFrame should be in WGS84 (EPSG:4326) coordinate system
604
755
 
605
756
  Returns:
606
- gpd.GeoDataFrame: Updated GeoDataFrame with extracted heights
757
+ gpd.GeoDataFrame: Updated GeoDataFrame with extracted heights in the 'height' column
758
+ - Buildings with valid height data will have their height values updated
759
+ - Buildings with no valid height data will have NaN values
760
+ - Original buildings with existing valid heights are preserved
761
+
762
+ Statistics Reported:
763
+ - Total number of buildings without height data
764
+ - Number of buildings successfully updated with height data
765
+ - Number of buildings where no height data could be found
766
+
767
+ Note:
768
+ - The function only processes Polygon geometries (not MultiPolygons or other types)
769
+ - Buildings are considered to need height processing if they have no height or height <= 0
770
+ - Heights are calculated as the mean of all valid raster values within the building footprint
607
771
  """
608
772
  # Make a copy to avoid modifying the input
609
773
  gdf = gdf.copy()
@@ -615,23 +779,28 @@ def extract_building_heights_from_geotiff(geotiff_path, gdf):
615
779
 
616
780
  # Open GeoTIFF and process buildings
617
781
  with rasterio.open(geotiff_path) as src:
618
- # Create coordinate transformer from WGS84 to raster CRS
782
+ # Create coordinate transformer from WGS84 to raster CRS for geometry transformation
619
783
  transformer = Transformer.from_crs(CRS.from_epsg(4326), src.crs, always_xy=True)
620
784
 
621
- # Filter buildings that need height processing
785
+ # Filter buildings that need height processing:
786
+ # - Must be Polygon type (not MultiPolygon)
787
+ # - Either has no height or height <= 0
622
788
  mask_condition = (gdf.geometry.geom_type == 'Polygon') & ((gdf.get('height', 0) <= 0) | gdf.get('height').isna())
623
789
  buildings_to_process = gdf[mask_condition]
624
790
  count_0 = len(buildings_to_process)
625
791
 
626
792
  for idx, row in buildings_to_process.iterrows():
627
- # Transform geometry to raster CRS
793
+ # Transform building polygon coordinates from WGS84 to raster CRS
628
794
  coords = list(row.geometry.exterior.coords)
629
795
  transformed_coords = [transformer.transform(lon, lat) for lon, lat in coords]
630
796
  polygon = shape({"type": "Polygon", "coordinates": [transformed_coords]})
631
797
 
632
798
  try:
633
- # Extract height values from raster within polygon
799
+ # Extract height values from raster within the building polygon
800
+ # all_touched=True ensures we get all pixels that the polygon touches
634
801
  masked_data, _ = rasterio.mask.mask(src, [polygon], crop=True, all_touched=True)
802
+
803
+ # Filter out nodata values from the raster
635
804
  heights = masked_data[0][masked_data[0] != src.nodata]
636
805
 
637
806
  # Calculate average height if valid samples exist
@@ -641,7 +810,6 @@ def extract_building_heights_from_geotiff(geotiff_path, gdf):
641
810
  else:
642
811
  count_2 += 1
643
812
  gdf.at[idx, 'height'] = np.nan
644
- # print(f"No valid height data for building at index {idx}")
645
813
  except ValueError as e:
646
814
  print(f"Error processing building at index {idx}. Error: {str(e)}")
647
815
  gdf.at[idx, 'height'] = None
@@ -656,14 +824,33 @@ def extract_building_heights_from_geotiff(geotiff_path, gdf):
656
824
 
657
825
  def get_gdf_from_gpkg(gpkg_path, rectangle_vertices):
658
826
  """
659
- Read a GeoPackage file and convert it to GeoJSON format within a bounding rectangle.
827
+ Read a GeoPackage file and convert it to a GeoDataFrame with consistent CRS.
828
+
829
+ This function reads a GeoPackage file containing building footprints and ensures
830
+ the data is properly formatted with WGS84 coordinate system and unique identifiers.
831
+ It handles CRS conversion if needed and adds sequential IDs.
660
832
 
661
833
  Args:
662
834
  gpkg_path (str): Path to the GeoPackage file
835
+ File must exist and be readable
836
+ Must contain valid building footprint geometries
837
+ Any coordinate system is accepted
663
838
  rectangle_vertices (list): List of (lon, lat) tuples defining the bounding rectangle
839
+ Must be in WGS84 (EPSG:4326) coordinate system
840
+ Used for spatial filtering (not implemented in this function)
664
841
 
665
842
  Returns:
666
- list: List of GeoJSON features within the bounding rectangle
843
+ gpd.GeoDataFrame: GeoDataFrame containing building footprints
844
+ Will have 'geometry' column with building geometries
845
+ Will have 'id' column with sequential integers
846
+ Will be in WGS84 (EPSG:4326) coordinate system
847
+
848
+ Note:
849
+ - Prints informative message when opening file
850
+ - Sets CRS to WGS84 if not specified
851
+ - Transforms to WGS84 if different CRS
852
+ - Adds sequential IDs starting from 0
853
+ - rectangle_vertices parameter is currently unused
667
854
  """
668
855
  # Open and read the GPKG file
669
856
  print(f"Opening GPKG file: {gpkg_path}")
@@ -676,7 +863,7 @@ def get_gdf_from_gpkg(gpkg_path, rectangle_vertices):
676
863
  elif gdf.crs != "EPSG:4326":
677
864
  gdf = gdf.to_crs(epsg=4326)
678
865
 
679
- # Replace id column with index numbers
866
+ # Replace id column with sequential index numbers
680
867
  gdf['id'] = gdf.index
681
868
 
682
869
  return gdf
@@ -685,66 +872,127 @@ def swap_coordinates(features):
685
872
  """
686
873
  Swap coordinate ordering in GeoJSON features from (lat, lon) to (lon, lat).
687
874
 
875
+ This function modifies GeoJSON features in-place to swap the order of coordinates
876
+ from (latitude, longitude) to (longitude, latitude). It handles both Polygon and
877
+ MultiPolygon geometries, maintaining their structure while swapping coordinates.
878
+
688
879
  Args:
689
880
  features (list): List of GeoJSON features to process
881
+ Features must have 'geometry' property
882
+ Supported geometry types: 'Polygon', 'MultiPolygon'
883
+ Coordinates must be in (lat, lon) order initially
884
+
885
+ Returns:
886
+ None: Features are modified in-place
887
+
888
+ Note:
889
+ - Modifies features directly (no copy created)
890
+ - Handles both Polygon and MultiPolygon geometries
891
+ - For Polygons: processes single coordinate ring
892
+ - For MultiPolygons: processes multiple coordinate rings
893
+ - Assumes input coordinates are in (lat, lon) order
894
+ - Resulting coordinates will be in (lon, lat) order
690
895
  """
691
896
  # Process each feature based on geometry type
692
897
  for feature in features:
693
898
  if feature['geometry']['type'] == 'Polygon':
694
899
  # Swap coordinates for simple polygons
900
+ # Each polygon is a list of rings (exterior and optional holes)
695
901
  new_coords = [[[lon, lat] for lat, lon in polygon] for polygon in feature['geometry']['coordinates']]
696
902
  feature['geometry']['coordinates'] = new_coords
697
903
  elif feature['geometry']['type'] == 'MultiPolygon':
698
904
  # Swap coordinates for multi-polygons (polygons with holes)
905
+ # Each multipolygon is a list of polygons, each with its own rings
699
906
  new_coords = [[[[lon, lat] for lat, lon in polygon] for polygon in multipolygon] for multipolygon in feature['geometry']['coordinates']]
700
907
  feature['geometry']['coordinates'] = new_coords
701
908
 
702
909
  def save_geojson(features, save_path):
703
910
  """
704
- Save GeoJSON features to a file with swapped coordinates.
911
+ Save GeoJSON features to a file with coordinate swapping and pretty printing.
912
+
913
+ This function takes a list of GeoJSON features, swaps their coordinate ordering
914
+ if needed, wraps them in a FeatureCollection, and saves to a file with proper
915
+ JSON formatting. It creates a deep copy to avoid modifying the original data.
705
916
 
706
917
  Args:
707
918
  features (list): List of GeoJSON features to save
919
+ Each feature should have valid GeoJSON structure
920
+ Features can be Polygon or MultiPolygon type
921
+ Coordinates will be swapped if in (lat, lon) order
708
922
  save_path (str): Path where the GeoJSON file should be saved
923
+ Will overwrite existing file if present
924
+ Directory must exist and be writable
925
+ File will be created with UTF-8 encoding
926
+
927
+ Returns:
928
+ None
929
+
930
+ Note:
931
+ - Creates deep copy to preserve original feature data
932
+ - Swaps coordinates from (lat, lon) to (lon, lat) order
933
+ - Wraps features in a FeatureCollection object
934
+ - Uses pretty printing with 2-space indentation
935
+ - Handles both Polygon and MultiPolygon geometries
709
936
  """
710
937
  # Create deep copy to avoid modifying original data
711
938
  geojson_features = copy.deepcopy(features)
712
939
 
713
- # Swap coordinate ordering
940
+ # Swap coordinate ordering from (lat, lon) to (lon, lat)
714
941
  swap_coordinates(geojson_features)
715
942
 
716
- # Create FeatureCollection
943
+ # Create FeatureCollection structure
717
944
  geojson = {
718
945
  "type": "FeatureCollection",
719
946
  "features": geojson_features
720
947
  }
721
948
 
722
- # Write to file with pretty printing
949
+ # Write to file with pretty printing (2-space indentation)
723
950
  with open(save_path, 'w') as f:
724
951
  json.dump(geojson, f, indent=2)
725
952
 
726
953
  def find_building_containing_point(building_gdf, target_point):
727
954
  """
728
- Find building IDs that contain a given point.
955
+ Find building IDs that contain a given point in their footprint.
956
+
957
+ This function identifies all buildings in a GeoDataFrame whose footprint contains
958
+ a specified geographic point. Only Polygon geometries are considered, and the point
959
+ must be fully contained within the building footprint (not just touching).
729
960
 
730
961
  Args:
731
962
  building_gdf (GeoDataFrame): GeoDataFrame containing building geometries and IDs
732
- target_point (tuple): Tuple of (lon, lat)
963
+ Must have 'geometry' column with Polygon geometries
964
+ Must have 'id' column or index will be used as fallback
965
+ Geometries must be in same CRS as target_point coordinates
966
+ target_point (tuple): Tuple of (lon, lat) coordinates to check
967
+ Must be in same coordinate system as building_gdf geometries
968
+ Order must be (longitude, latitude) if using WGS84
733
969
 
734
970
  Returns:
735
971
  list: List of building IDs containing the target point
972
+ Empty list if no buildings contain the point
973
+ Multiple IDs possible if buildings overlap
974
+ IDs are in arbitrary order
975
+
976
+ Note:
977
+ - Only processes Polygon geometries (skips MultiPolygons and others)
978
+ - Uses Shapely's contains() method which requires point to be fully inside polygon
979
+ - No spatial indexing is used, performs linear search through all buildings
736
980
  """
737
- # Create Shapely point
981
+ # Create Shapely point from input coordinates
738
982
  point = Point(target_point[0], target_point[1])
739
983
 
984
+ # Initialize list to store matching building IDs
740
985
  id_list = []
986
+
987
+ # Check each building in the GeoDataFrame
741
988
  for idx, row in building_gdf.iterrows():
742
- # Skip any geometry that is not Polygon
989
+ # Skip any geometry that is not a simple Polygon
743
990
  if not isinstance(row.geometry, Polygon):
744
991
  continue
745
992
 
746
- # Check if point is within polygon
993
+ # Check if point is fully contained within building footprint
747
994
  if row.geometry.contains(point):
995
+ # Use specified ID column or None if not found
748
996
  id_list.append(row.get('id', None))
749
997
 
750
998
  return id_list
@@ -752,40 +1000,51 @@ def find_building_containing_point(building_gdf, target_point):
752
1000
  def get_buildings_in_drawn_polygon(building_gdf, drawn_polygon_vertices,
753
1001
  operation='within'):
754
1002
  """
755
- Given a GeoDataFrame of building footprints and a set of drawn polygon
756
- vertices (in lon, lat), return the building IDs that fall within or
757
- intersect the drawn polygon.
758
-
1003
+ Find buildings that intersect with or are contained within a user-drawn polygon.
1004
+
1005
+ This function identifies buildings from a GeoDataFrame that have a specified spatial
1006
+ relationship with a polygon defined by user-drawn vertices. The relationship can be
1007
+ either intersection (building overlaps polygon) or containment (building fully within
1008
+ polygon).
1009
+
759
1010
  Args:
760
- building_gdf (GeoDataFrame):
761
- A GeoDataFrame containing building footprints with:
762
- - geometry column containing Polygon geometries
763
- - id column containing building IDs
764
-
765
- drawn_polygon_vertices (list):
766
- A list of (lon, lat) tuples representing the polygon drawn by the user.
767
-
768
- operation (str):
769
- Determines how to include buildings.
770
- Use "intersect" to include buildings that intersect the drawn polygon.
771
- Use "within" to include buildings that lie entirely within the drawn polygon.
772
-
1011
+ building_gdf (GeoDataFrame): GeoDataFrame containing building footprints
1012
+ Must have 'geometry' column with Polygon geometries
1013
+ Must have 'id' column or index will be used as fallback
1014
+ Geometries must be in same CRS as drawn_polygon_vertices
1015
+ drawn_polygon_vertices (list): List of (lon, lat) tuples defining polygon vertices
1016
+ Must be in same coordinate system as building_gdf geometries
1017
+ Must form a valid polygon (3+ vertices, first != last)
1018
+ Order must be (longitude, latitude) if using WGS84
1019
+ operation (str, optional): Type of spatial relationship to check
1020
+ 'within': buildings must be fully contained in drawn polygon (default)
1021
+ 'intersect': buildings must overlap with drawn polygon
1022
+
773
1023
  Returns:
774
- list:
775
- A list of building IDs (strings or ints) that satisfy the condition.
1024
+ list: List of building IDs that satisfy the spatial relationship
1025
+ Empty list if no buildings meet the criteria
1026
+ IDs are returned in order of processing
1027
+ May contain None values if buildings lack IDs
1028
+
1029
+ Note:
1030
+ - Only processes Polygon geometries (skips MultiPolygons and others)
1031
+ - No spatial indexing is used, performs linear search through all buildings
1032
+ - Invalid operation parameter will raise ValueError
1033
+ - Does not validate polygon closure (first vertex = last vertex)
776
1034
  """
777
1035
  # Create Shapely Polygon from drawn vertices
778
1036
  drawn_polygon_shapely = Polygon(drawn_polygon_vertices)
779
1037
 
1038
+ # Initialize list to store matching building IDs
780
1039
  included_building_ids = []
781
1040
 
782
1041
  # Check each building in the GeoDataFrame
783
1042
  for idx, row in building_gdf.iterrows():
784
- # Skip any geometry that is not Polygon
1043
+ # Skip any geometry that is not a simple Polygon
785
1044
  if not isinstance(row.geometry, Polygon):
786
1045
  continue
787
1046
 
788
- # Depending on the operation, check the relationship
1047
+ # Check spatial relationship based on specified operation
789
1048
  if operation == 'intersect':
790
1049
  if row.geometry.intersects(drawn_polygon_shapely):
791
1050
  included_building_ids.append(row.get('id', None))
@@ -799,23 +1058,41 @@ def get_buildings_in_drawn_polygon(building_gdf, drawn_polygon_vertices,
799
1058
 
800
1059
  def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
801
1060
  """
802
- Process building footprints to merge overlapping buildings.
1061
+ Process building footprints to merge overlapping buildings based on area overlap ratio.
1062
+
1063
+ This function identifies and merges building footprints that significantly overlap with each other.
1064
+ Buildings are processed in order of decreasing area, and smaller buildings that overlap significantly
1065
+ with larger ones are assigned the ID of the larger building, effectively merging them.
803
1066
 
804
1067
  Args:
805
1068
  filtered_gdf (geopandas.GeoDataFrame): GeoDataFrame containing building footprints
806
- overlap_threshold (float): Threshold for overlap ratio (0.0-1.0) to merge buildings
1069
+ Must have 'geometry' column with building polygons
1070
+ If CRS is set, areas will be calculated in Web Mercator projection
1071
+ overlap_threshold (float, optional): Threshold for overlap ratio (0.0-1.0) to merge buildings
1072
+ Default is 0.5 (50% overlap)
1073
+ Higher values require more overlap for merging
1074
+ Lower values will result in more aggressive merging
807
1075
 
808
1076
  Returns:
809
1077
  geopandas.GeoDataFrame: Processed GeoDataFrame with updated IDs
1078
+ Overlapping buildings will share the same ID
1079
+ Original geometries are preserved, only IDs are updated
1080
+ All other columns remain unchanged
1081
+
1082
+ Note:
1083
+ - Uses R-tree spatial indexing for efficient overlap detection
1084
+ - Projects to Web Mercator (EPSG:3857) for accurate area calculation if CRS is set
1085
+ - Handles invalid geometries by attempting to fix them with buffer(0)
1086
+ - Processes buildings in order of decreasing area (largest first)
810
1087
  """
811
1088
  # Make a copy to avoid modifying the original
812
1089
  gdf = filtered_gdf.copy()
813
1090
 
814
- # Ensure 'id' column exists
1091
+ # Ensure 'id' column exists, use index if not present
815
1092
  if 'id' not in gdf.columns:
816
1093
  gdf['id'] = gdf.index
817
1094
 
818
- # Check if CRS is set before transforming
1095
+ # Project to Web Mercator for accurate area calculation if CRS is set
819
1096
  if gdf.crs is None:
820
1097
  # Work with original geometries if no CRS is set
821
1098
  gdf_projected = gdf.copy()
@@ -825,18 +1102,18 @@ def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
825
1102
  # Project to Web Mercator for accurate area calculation
826
1103
  gdf_projected = gdf.to_crs("EPSG:3857")
827
1104
 
828
- # Calculate areas on the geometries
1105
+ # Calculate areas and sort by decreasing area for processing largest buildings first
829
1106
  gdf_projected['area'] = gdf_projected.geometry.area
830
1107
  gdf_projected = gdf_projected.sort_values(by='area', ascending=False)
831
1108
  gdf_projected = gdf_projected.reset_index(drop=True)
832
1109
 
833
- # Create spatial index for efficient querying
1110
+ # Create spatial index for efficient querying of potential overlaps
834
1111
  spatial_idx = index.Index()
835
1112
  for i, geom in enumerate(gdf_projected.geometry):
836
1113
  if geom.is_valid:
837
1114
  spatial_idx.insert(i, geom.bounds)
838
1115
  else:
839
- # Fix invalid geometries
1116
+ # Fix invalid geometries using buffer(0) technique
840
1117
  fixed_geom = geom.buffer(0)
841
1118
  if fixed_geom.is_valid:
842
1119
  spatial_idx.insert(i, fixed_geom.bounds)
@@ -844,52 +1121,52 @@ def process_building_footprints_by_overlap(filtered_gdf, overlap_threshold=0.5):
844
1121
  # Track ID replacements to avoid repeated processing
845
1122
  id_mapping = {}
846
1123
 
847
- # Process each building (skip the largest one)
1124
+ # Process each building (skip the largest one as it's our reference)
848
1125
  for i in range(1, len(gdf_projected)):
849
1126
  current_poly = gdf_projected.iloc[i].geometry
850
1127
  current_area = gdf_projected.iloc[i].area
851
1128
  current_id = gdf_projected.iloc[i]['id']
852
1129
 
853
- # Skip if already mapped
1130
+ # Skip if already mapped to another ID
854
1131
  if current_id in id_mapping:
855
1132
  continue
856
1133
 
857
- # Ensure geometry is valid
1134
+ # Ensure geometry is valid for processing
858
1135
  if not current_poly.is_valid:
859
1136
  current_poly = current_poly.buffer(0)
860
1137
  if not current_poly.is_valid:
861
1138
  continue
862
1139
 
863
- # Find potential overlaps with larger polygons
1140
+ # Find potential overlaps with larger polygons using spatial index
864
1141
  potential_overlaps = [j for j in spatial_idx.intersection(current_poly.bounds) if j < i]
865
1142
 
866
1143
  for j in potential_overlaps:
867
1144
  larger_poly = gdf_projected.iloc[j].geometry
868
1145
  larger_id = gdf_projected.iloc[j]['id']
869
1146
 
870
- # Skip if already processed
1147
+ # Follow ID mapping chain to get final ID
871
1148
  if larger_id in id_mapping:
872
1149
  larger_id = id_mapping[larger_id]
873
1150
 
874
- # Ensure geometry is valid
1151
+ # Ensure geometry is valid for intersection test
875
1152
  if not larger_poly.is_valid:
876
1153
  larger_poly = larger_poly.buffer(0)
877
1154
  if not larger_poly.is_valid:
878
1155
  continue
879
1156
 
880
1157
  try:
881
- # Calculate overlap
1158
+ # Calculate overlap ratio relative to current building's area
882
1159
  if current_poly.intersects(larger_poly):
883
1160
  overlap = current_poly.intersection(larger_poly)
884
1161
  overlap_ratio = overlap.area / current_area
885
1162
 
886
- # Replace ID if overlap exceeds threshold
1163
+ # Merge buildings if overlap exceeds threshold
887
1164
  if overlap_ratio > overlap_threshold:
888
1165
  id_mapping[current_id] = larger_id
889
1166
  gdf_projected.at[i, 'id'] = larger_id
890
1167
  break # Stop at first significant overlap
891
1168
  except (GEOSException, ValueError) as e:
892
- # Handle geometry errors gracefully
1169
+ # Skip problematic geometries
893
1170
  continue
894
1171
 
895
1172
  # Propagate ID changes through the original DataFrame