voxcity 0.5.14__py3-none-any.whl → 0.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of voxcity might be problematic. Click here for more details.

@@ -1,3 +1,17 @@
1
+ """
2
+ CityGML Parser Module for PLATEAU Data
3
+
4
+ This module provides functionality to parse CityGML files from Japan's PLATEAU dataset,
5
+ extracting building footprints, terrain information, and vegetation data.
6
+ The module handles various LOD (Level of Detail) representations and coordinate systems.
7
+
8
+ Main features:
9
+ - Download and extract PLATEAU data from URLs
10
+ - Parse CityGML files for buildings, terrain, and vegetation
11
+ - Handle coordinate transformations and validations
12
+ - Support for mesh code decoding
13
+ """
14
+
1
15
  import requests
2
16
  import zipfile
3
17
  import io
@@ -19,8 +33,18 @@ from shapely.geometry import Polygon
19
33
 
20
34
  def decode_2nd_level_mesh(mesh6):
21
35
  """
22
- Given exactly 6 digits (string) for a standard (2nd-level) mesh code,
23
- return (lat_sw, lon_sw, lat_ne, lon_ne) in degrees.
36
+ Decode a standard (2nd-level) mesh code to geographic coordinates.
37
+
38
+ Args:
39
+ mesh6 (str): A 6-digit mesh code string.
40
+
41
+ Returns:
42
+ tuple: (lat_sw, lon_sw, lat_ne, lon_ne) coordinates in degrees representing
43
+ the southwest and northeast corners of the mesh.
44
+
45
+ Notes:
46
+ - The mesh system divides Japan into a grid of cells
47
+ - Each 2nd-level mesh is 1/12° latitude × 0.125° longitude
24
48
  """
25
49
  code = int(mesh6)
26
50
  # Extract each piece
@@ -46,11 +70,20 @@ def decode_2nd_level_mesh(mesh6):
46
70
 
47
71
  def decode_mesh_code(mesh_str):
48
72
  """
49
- Handles:
50
- - 6-digit codes (standard 2nd-level mesh),
51
- - 8-digit codes (2nd-level subdivided 10×10).
73
+ Decode mesh codes into geographic boundary coordinates.
74
+
75
+ Args:
76
+ mesh_str (str): A mesh code string (6 or 8 digits).
77
+
78
+ Returns:
79
+ list: List of (lon, lat) tuples forming a closed polygon in WGS84.
80
+
81
+ Raises:
82
+ ValueError: If mesh code length is invalid or unsupported.
52
83
 
53
- Returns a list of (lon, lat) forming a *closed* bounding polygon in WGS84.
84
+ Notes:
85
+ - 6-digit codes represent standard 2nd-level mesh
86
+ - 8-digit codes represent 2nd-level mesh subdivided 10×10
54
87
  """
55
88
  if len(mesh_str) < 6:
56
89
  raise ValueError(f"Mesh code '{mesh_str}' is too short.")
@@ -99,9 +132,16 @@ def decode_mesh_code(mesh_str):
99
132
 
100
133
  def get_tile_polygon_from_filename(filename):
101
134
  """
102
- Extract the mesh code from a typical Project PLATEAU filename
103
- (e.g. '51357348_bldg_6697_op.gml') and decode it.
104
- Returns the bounding polygon in WGS84 as a list of (lon, lat).
135
+ Extract and decode mesh code from PLATEAU filename into boundary polygon.
136
+
137
+ Args:
138
+ filename (str): PLATEAU format filename (e.g. '51357348_bldg_6697_op.gml')
139
+
140
+ Returns:
141
+ list: List of (lon, lat) tuples forming the tile boundary polygon in WGS84.
142
+
143
+ Raises:
144
+ ValueError: If no mesh code found in filename.
105
145
  """
106
146
  # Look for leading digits until the first underscore
107
147
  m = re.match(r'^(\d+)_', filename)
@@ -118,7 +158,18 @@ def get_tile_polygon_from_filename(filename):
118
158
 
119
159
  def download_and_extract_zip(url, extract_to='.'):
120
160
  """
121
- Download and extract a zip file from a URL
161
+ Download and extract a zip file from a URL to specified directory.
162
+
163
+ Args:
164
+ url (str): URL of the zip file to download.
165
+ extract_to (str): Directory to extract files to (default: current directory).
166
+
167
+ Returns:
168
+ tuple: (extraction_path, folder_name) where files were extracted.
169
+
170
+ Notes:
171
+ - Creates a subdirectory named after the zip file (without .zip)
172
+ - Prints status messages for success/failure
122
173
  """
123
174
  response = requests.get(url)
124
175
  if response.status_code == 200:
@@ -141,14 +192,30 @@ def download_and_extract_zip(url, extract_to='.'):
141
192
 
142
193
  def validate_coords(coords):
143
194
  """
144
- Validate that coordinates are not infinite or NaN
195
+ Validate that coordinates are finite numbers.
196
+
197
+ Args:
198
+ coords (list): List of coordinate tuples.
199
+
200
+ Returns:
201
+ bool: True if all coordinates are valid (not inf/NaN), False otherwise.
145
202
  """
146
203
  return all(not np.isinf(x) and not np.isnan(x) for coord in coords for x in coord)
147
204
 
148
205
 
149
206
  def swap_coordinates(polygon):
150
207
  """
151
- Swap coordinates in a polygon (lat/lon to lon/lat or vice versa)
208
+ Swap coordinate order in a polygon (lat/lon to lon/lat or vice versa).
209
+
210
+ Args:
211
+ polygon (Polygon/MultiPolygon): Input polygon with coordinates to swap.
212
+
213
+ Returns:
214
+ Polygon/MultiPolygon: New polygon with swapped coordinates.
215
+
216
+ Notes:
217
+ - Handles both single Polygon and MultiPolygon geometries
218
+ - Creates new geometry objects rather than modifying in place
152
219
  """
153
220
  if isinstance(polygon, MultiPolygon):
154
221
  new_polygons = []
@@ -165,7 +232,25 @@ def swap_coordinates(polygon):
165
232
 
166
233
  def extract_terrain_info(file_path, namespaces):
167
234
  """
168
- Extract terrain elevation information from a CityGML file
235
+ Extract terrain elevation data from CityGML file.
236
+
237
+ Args:
238
+ file_path (str): Path to CityGML file.
239
+ namespaces (dict): XML namespace mappings.
240
+
241
+ Returns:
242
+ list: List of dictionaries containing terrain features:
243
+ - relief_id: Feature identifier
244
+ - tin_id: TIN surface identifier
245
+ - triangle_id/breakline_id/mass_point_id: Specific element ID
246
+ - elevation: Height value
247
+ - geometry: Shapely geometry object
248
+ - source_file: Original file name
249
+
250
+ Notes:
251
+ - Processes TIN Relief, breaklines, and mass points
252
+ - Validates all geometries before inclusion
253
+ - Handles coordinate conversion and validation
169
254
  """
170
255
  try:
171
256
  tree = ET.parse(file_path)
@@ -290,8 +375,24 @@ def extract_terrain_info(file_path, namespaces):
290
375
 
291
376
  def extract_vegetation_info(file_path, namespaces):
292
377
  """
293
- Extract vegetation features (PlantCover, SolitaryVegetationObject)
294
- from a CityGML file, handling LOD0..LOD3 geometry and MultiSurface/CompositeSurface.
378
+ Extract vegetation features from CityGML file.
379
+
380
+ Args:
381
+ file_path (str): Path to CityGML file.
382
+ namespaces (dict): XML namespace mappings.
383
+
384
+ Returns:
385
+ list: List of dictionaries containing vegetation features:
386
+ - object_type: 'PlantCover' or 'SolitaryVegetationObject'
387
+ - vegetation_id: Feature identifier
388
+ - height: Vegetation height (if available)
389
+ - geometry: Shapely geometry object
390
+ - source_file: Original file name
391
+
392
+ Notes:
393
+ - Handles both PlantCover and SolitaryVegetationObject features
394
+ - Processes multiple LOD representations
395
+ - Validates geometries before inclusion
295
396
  """
296
397
  vegetation_elements = []
297
398
  try:
@@ -399,7 +500,21 @@ def extract_vegetation_info(file_path, namespaces):
399
500
 
400
501
  def extract_building_footprint(building, namespaces):
401
502
  """
402
- Extract building footprint from possible LOD representations
503
+ Extract building footprint from CityGML building element.
504
+
505
+ Args:
506
+ building (Element): XML element representing a building.
507
+ namespaces (dict): XML namespace mappings.
508
+
509
+ Returns:
510
+ tuple: (pos_list, ground_elevation) where:
511
+ - pos_list: XML element containing footprint coordinates
512
+ - ground_elevation: Ground level elevation if available
513
+
514
+ Notes:
515
+ - Tries multiple LOD representations (LOD0-LOD2)
516
+ - For LOD1/LOD2 solids, finds the bottom face
517
+ - Returns None if no valid footprint found
403
518
  """
404
519
  lod_tags = [
405
520
  # LOD0
@@ -443,7 +558,20 @@ def extract_building_footprint(building, namespaces):
443
558
 
444
559
  def process_citygml_file(file_path):
445
560
  """
446
- Process a CityGML file to extract building, terrain, and vegetation information
561
+ Process a CityGML file to extract all relevant features.
562
+
563
+ Args:
564
+ file_path (str): Path to CityGML file.
565
+
566
+ Returns:
567
+ tuple: (buildings, terrain_elements, vegetation_elements) where each is a list
568
+ of dictionaries containing feature information.
569
+
570
+ Notes:
571
+ - Processes buildings, terrain, and vegetation features
572
+ - Validates all geometries
573
+ - Handles coordinate transformations
574
+ - Includes error handling and reporting
447
575
  """
448
576
  buildings = []
449
577
  terrain_elements = []
@@ -524,7 +652,20 @@ def process_citygml_file(file_path):
524
652
 
525
653
  def parse_file(file_path, file_type=None):
526
654
  """
527
- Parse a file based on its detected type
655
+ Parse a file based on its type (auto-detected or specified).
656
+
657
+ Args:
658
+ file_path (str): Path to file to parse.
659
+ file_type (str, optional): Force specific file type parsing.
660
+ Valid values: 'citygml', 'geojson', 'xml'
661
+
662
+ Returns:
663
+ tuple: (buildings, terrain_elements, vegetation_elements) lists.
664
+
665
+ Notes:
666
+ - Auto-detects file type from extension if not specified
667
+ - Currently fully implements CityGML parsing only
668
+ - Returns empty lists for unsupported types
528
669
  """
529
670
  if file_type is None:
530
671
  file_ext = os.path.splitext(file_path)[1].lower()
@@ -562,10 +703,19 @@ def parse_file(file_path, file_type=None):
562
703
 
563
704
  def swap_coordinates_if_needed(gdf, geometry_col='geometry'):
564
705
  """
565
- Swap lat/lon coordinates in a GeoDataFrame if its geometry is in lat-lon order.
566
- We assume the original data is EPSG:6697 (which is a projected coordinate system).
567
- But we frequently find that data is actually lat-lon. This function ensures
568
- final geometry is in the correct coordinate order (lon, lat).
706
+ Ensure correct coordinate order in GeoDataFrame geometries.
707
+
708
+ Args:
709
+ gdf (GeoDataFrame): Input GeoDataFrame.
710
+ geometry_col (str): Name of geometry column.
711
+
712
+ Returns:
713
+ list: List of geometries with corrected coordinate order.
714
+
715
+ Notes:
716
+ - Assumes input is EPSG:6697 but may be in lat-lon order
717
+ - Handles Polygon, MultiPolygon, and Point geometries
718
+ - Returns geometries in lon-lat order
569
719
  """
570
720
  swapped_geometries = []
571
721
  for geom in gdf[geometry_col]:
@@ -584,11 +734,24 @@ def load_buid_dem_veg_from_citygml(url=None,
584
734
  citygml_path=None,
585
735
  rectangle_vertices=None):
586
736
  """
587
- Load PLATEAU data, extracting Buildings, Terrain, and Vegetation data.
588
- Can process from URL (download & extract) or directly from local file.
589
-
590
- If rectangle_vertices is provided (as [(lon1, lat1), (lon2, lat2), ...]),
591
- only tiles intersecting that rectangle will be processed.
737
+ Load and process PLATEAU data from URL or local files.
738
+
739
+ Args:
740
+ url (str, optional): URL to download PLATEAU data from.
741
+ base_dir (str): Base directory for file operations.
742
+ citygml_path (str, optional): Path to local CityGML files.
743
+ rectangle_vertices (list, optional): List of (lon, lat) tuples defining
744
+ a bounding rectangle for filtering tiles.
745
+
746
+ Returns:
747
+ tuple: (gdf_buildings, gdf_terrain, gdf_vegetation) GeoDataFrames
748
+ containing processed features.
749
+
750
+ Notes:
751
+ - Can process from URL (download & extract) or local files
752
+ - Optionally filters tiles by geographic extent
753
+ - Handles coordinate transformations
754
+ - Creates GeoDataFrames with proper CRS
592
755
  """
593
756
  all_buildings = []
594
757
  all_terrain = []
@@ -685,7 +848,18 @@ def load_buid_dem_veg_from_citygml(url=None,
685
848
 
686
849
  def process_single_file(file_path):
687
850
  """
688
- Process a single file (for testing)
851
+ Process a single CityGML file for testing purposes.
852
+
853
+ Args:
854
+ file_path (str): Path to CityGML file.
855
+
856
+ Returns:
857
+ tuple: (buildings, terrain, vegetation) lists of extracted features.
858
+
859
+ Notes:
860
+ - Useful for testing and debugging
861
+ - Saves building data to GeoJSON if successful
862
+ - Prints processing statistics
689
863
  """
690
864
  file_ext = os.path.splitext(file_path)[1].lower()
691
865
  if file_ext in ['.gml', '.xml']:
@@ -4,6 +4,26 @@ Module for downloading and processing building data from the EUBUCCO dataset.
4
4
  This module provides functionality to download, extract, filter and convert building footprint data
5
5
  from the EUBUCCO (European Building Characteristics) dataset. It handles downloading zipped GeoPackage
6
6
  files, extracting building geometries and heights, and converting them to GeoJSON format.
7
+
8
+ The module supports:
9
+ - Downloading building data for specific European countries
10
+ - Extracting and processing GeoPackage files
11
+ - Converting coordinates between different coordinate reference systems (CRS)
12
+ - Filtering buildings by geographic area
13
+ - Handling building height data and confidence values
14
+ - Converting to standardized GeoJSON format
15
+
16
+ Key functions:
17
+ - filter_and_convert_gdf_to_geojson_eubucco(): Filters and converts GeoPackage data to GeoJSON
18
+ - download_extract_open_gpkg_from_eubucco(): Downloads and extracts EUBUCCO data
19
+ - get_gdf_from_eubucco(): Gets GeoDataFrame from EUBUCCO for a specific area
20
+ - load_gdf_from_eubucco(): Main interface for loading EUBUCCO building data
21
+
22
+ Dependencies:
23
+ - shapely: For geometric operations
24
+ - fiona: For reading GeoPackage files
25
+ - geopandas: For GeoDataFrame operations
26
+ - requests: For downloading data
7
27
  """
8
28
 
9
29
  import json
@@ -66,11 +86,27 @@ def filter_and_convert_gdf_to_geojson_eubucco(gpkg_file, layer_name, rectangle_v
66
86
  """
67
87
  Filters features in a GeoPackage that intersect with a given rectangle and writes them to a GeoJSON file.
68
88
 
89
+ This function:
90
+ 1. Creates a polygon from the input rectangle vertices
91
+ 2. Handles coordinate system transformations if needed
92
+ 3. Filters buildings that intersect with the target area
93
+ 4. Processes building geometries and properties
94
+ 5. Writes filtered data to GeoJSON format
95
+
69
96
  Parameters:
70
- - gpkg_file (str): Path to the GeoPackage file.
71
- - layer_name (str): Name of the layer within the GeoPackage to process.
72
- - rectangle_vertices (list of tuples): List of (longitude, latitude) tuples defining the rectangle.
73
- - output_geojson (str): Path to the output GeoJSON file.
97
+ - gpkg_file (str): Path to the GeoPackage file containing building data
98
+ - layer_name (str): Name of the layer within the GeoPackage to process
99
+ - rectangle_vertices (list of tuples): List of (longitude, latitude) tuples defining the rectangle vertices
100
+ - output_geojson (str): Path where the output GeoJSON file will be written
101
+
102
+ Returns:
103
+ None
104
+
105
+ Notes:
106
+ - The function assumes input coordinates are in WGS84 (EPSG:4326)
107
+ - Building heights are stored in meters
108
+ - Missing or invalid heights are assigned a default value of -1.0
109
+ - A confidence value of -1.0 indicates no confidence data available
74
110
  """
75
111
  # Create polygon from rectangle vertices (already in lon,lat format)
76
112
  rectangle_polygon = Polygon(rectangle_vertices)
@@ -211,12 +247,25 @@ def download_extract_open_gpkg_from_eubucco(url, output_dir):
211
247
  """
212
248
  Downloads a ZIP file from a URL, extracts the GeoPackage (.gpkg) file, and returns its path.
213
249
 
250
+ This function:
251
+ 1. Downloads a ZIP file from the EUBUCCO API
252
+ 2. Extracts the contents to a specified directory
253
+ 3. Locates and returns the path to the GeoPackage file
254
+
214
255
  Parameters:
215
- - url (str): URL to download the ZIP file containing the GeoPackage.
216
- - output_dir (str): Directory to store extracted files
256
+ - url (str): URL to download the ZIP file containing the GeoPackage
257
+ - output_dir (str): Directory where extracted files will be stored
217
258
 
218
259
  Returns:
219
- - str: Path to the extracted GeoPackage file.
260
+ - str: Absolute path to the extracted GeoPackage file
261
+
262
+ Raises:
263
+ - Exception: If download fails or no GeoPackage file is found
264
+ - requests.exceptions.RequestException: For network-related errors
265
+
266
+ Notes:
267
+ - Creates a subdirectory 'EUBUCCO_raw' in the output directory
268
+ - Logs progress and errors using the logging module
220
269
  """
221
270
  # Download ZIP file from URL
222
271
  logging.info("Downloading file...")
@@ -251,14 +300,27 @@ def get_gdf_from_eubucco(rectangle_vertices, country_links, output_dir, file_nam
251
300
  """
252
301
  Downloads, extracts, filters, and converts GeoPackage data to GeoJSON based on the rectangle vertices.
253
302
 
303
+ This function:
304
+ 1. Determines the target country based on input coordinates
305
+ 2. Downloads and extracts EUBUCCO data for that country
306
+ 3. Reads the GeoPackage into a GeoDataFrame
307
+ 4. Ensures correct coordinate reference system
308
+ 5. Assigns unique IDs to buildings
309
+
254
310
  Parameters:
255
- - rectangle_vertices (list of tuples): List of (longitude, latitude) tuples defining the rectangle.
256
- - country_links (dict): Dictionary mapping country names to their respective GeoPackage URLs.
257
- - output_dir (str): Directory to save output files
258
- - file_name (str): Name for output GeoJSON file
311
+ - rectangle_vertices (list of tuples): List of (longitude, latitude) tuples defining the area of interest
312
+ - country_links (dict): Dictionary mapping country names to their respective GeoPackage URLs
313
+ - output_dir (str): Directory to save downloaded and processed files
314
+ - file_name (str): Name for the output GeoJSON file
259
315
 
260
316
  Returns:
261
- - None: Writes the output to a GeoJSON file.
317
+ - geopandas.GeoDataFrame: DataFrame containing building geometries and properties
318
+ or None if the target area has no EUBUCCO data
319
+
320
+ Notes:
321
+ - Automatically transforms coordinates to WGS84 (EPSG:4326) if needed
322
+ - Assigns sequential IDs to buildings starting from 0
323
+ - Logs errors if target area is not covered by EUBUCCO
262
324
  """
263
325
  # Determine country based on first vertex
264
326
  country_name = get_country_name(rectangle_vertices[0][0], rectangle_vertices[0][1]) # Swap order for get_country_name
@@ -290,12 +352,27 @@ def load_gdf_from_eubucco(rectangle_vertices, output_dir):
290
352
  """
291
353
  Downloads EUBUCCO data and loads it as GeoJSON.
292
354
 
355
+ This function serves as the main interface for loading EUBUCCO building data.
356
+ It handles the complete workflow from downloading to processing the data.
357
+
293
358
  Parameters:
294
359
  - rectangle_vertices (list of tuples): List of (longitude, latitude) tuples defining the area
295
- - output_dir (str): Directory to save intermediate files
360
+ The first vertex is used to determine which country's data to download
361
+ - output_dir (str): Directory to save intermediate and output files
362
+ Creates a subdirectory 'EUBUCCO_raw' for raw downloaded data
296
363
 
297
364
  Returns:
298
- - list: List of GeoJSON features containing building footprints and heights
365
+ - geopandas.GeoDataFrame: DataFrame containing:
366
+ - geometry: Building footprint polygons
367
+ - height: Building heights in meters
368
+ - id: Unique identifier for each building
369
+ or None if the target area has no EUBUCCO data
370
+
371
+ Notes:
372
+ - Output is always in WGS84 (EPSG:4326) coordinate system
373
+ - Building heights are in meters
374
+ - Buildings without height data are assigned a height of -1.0
375
+ - The function automatically determines the appropriate country dataset
299
376
  """
300
377
  # Define output file path
301
378
  file_name = 'building.geojson'