voxcity 0.5.27__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of voxcity might be problematic. Click here for more details.

voxcity/downloader/osm.py CHANGED
@@ -1,1041 +1,1039 @@
1
- """
2
- Module for downloading and processing OpenStreetMap data.
3
-
4
- This module provides functionality to download and process building footprints, land cover,
5
- and other geographic features from OpenStreetMap. It handles downloading data via the Overpass API,
6
- processing the responses, and converting them to standardized GeoJSON format with proper properties.
7
-
8
- The module includes functions for:
9
- - Converting OSM JSON to GeoJSON format
10
- - Processing building footprints with height information
11
- - Handling land cover classifications
12
- - Managing coordinate systems and projections
13
- - Processing roads and other geographic features
14
- """
15
-
16
- import requests
17
- from shapely.geometry import Polygon, shape, mapping
18
- from shapely.ops import transform
19
- import pyproj
20
- from collections import defaultdict
21
- import requests
22
- import json
23
- from shapely.geometry import shape, mapping, Polygon, LineString, Point, MultiPolygon
24
- from shapely.ops import transform
25
- import pyproj
26
- import pandas as pd
27
- import geopandas as gpd
28
-
29
- def osm_json_to_geojson(osm_data):
30
- """
31
- Convert OSM JSON data to GeoJSON format with proper handling of complex relations.
32
-
33
- Args:
34
- osm_data (dict): OSM JSON data from Overpass API
35
-
36
- Returns:
37
- dict: GeoJSON FeatureCollection
38
- """
39
- features = []
40
-
41
- # Create a mapping of node IDs to their coordinates
42
- nodes = {}
43
- ways = {}
44
-
45
- # First pass: index all nodes and ways
46
- for element in osm_data['elements']:
47
- if element['type'] == 'node':
48
- nodes[element['id']] = (element['lon'], element['lat'])
49
- elif element['type'] == 'way':
50
- ways[element['id']] = element
51
-
52
- # Second pass: generate features
53
- for element in osm_data['elements']:
54
- if element['type'] == 'node' and 'tags' in element and element['tags']:
55
- # Convert POI nodes to Point features
56
- feature = {
57
- 'type': 'Feature',
58
- 'properties': {
59
- 'id': element['id'],
60
- 'type': 'node',
61
- 'tags': element.get('tags', {})
62
- },
63
- 'geometry': {
64
- 'type': 'Point',
65
- 'coordinates': [element['lon'], element['lat']]
66
- }
67
- }
68
- features.append(feature)
69
-
70
- elif element['type'] == 'way' and 'nodes' in element:
71
- # Skip ways that are part of relations - we'll handle those in relation processing
72
- if is_part_of_relation(element['id'], osm_data):
73
- continue
74
-
75
- # Process standalone way
76
- coords = get_way_coords(element, nodes)
77
- if not coords or len(coords) < 2:
78
- continue
79
-
80
- # Determine if it's a polygon or a line
81
- is_polygon = is_way_polygon(element)
82
-
83
- # Make sure polygons have valid geometry (closed loop with at least 4 points)
84
- if is_polygon:
85
- # For closed ways, make sure first and last coordinates are the same
86
- if coords[0] != coords[-1]:
87
- coords.append(coords[0])
88
-
89
- # Check if we have enough coordinates for a valid polygon (at least 4)
90
- if len(coords) < 4:
91
- # Not enough coordinates for a polygon, convert to LineString
92
- is_polygon = False
93
-
94
- feature = {
95
- 'type': 'Feature',
96
- 'properties': {
97
- 'id': element['id'],
98
- 'type': 'way',
99
- 'tags': element.get('tags', {})
100
- },
101
- 'geometry': {
102
- 'type': 'Polygon' if is_polygon else 'LineString',
103
- 'coordinates': [coords] if is_polygon else coords
104
- }
105
- }
106
- features.append(feature)
107
-
108
- elif element['type'] == 'relation' and 'members' in element and 'tags' in element:
109
- tags = element.get('tags', {})
110
-
111
- # Process multipolygon relations
112
- if tags.get('type') == 'multipolygon' or any(key in tags for key in ['natural', 'water', 'waterway']):
113
- # Group member ways by role
114
- members_by_role = {'outer': [], 'inner': []}
115
-
116
- for member in element['members']:
117
- if member['type'] == 'way' and member['ref'] in ways:
118
- role = member['role']
119
- if role not in ['outer', 'inner']:
120
- role = 'outer' # Default to outer if role not specified
121
- members_by_role[role].append(member['ref'])
122
-
123
- # Skip if no outer members
124
- if not members_by_role['outer']:
125
- continue
126
-
127
- # Create rings from member ways
128
- outer_rings = create_rings_from_ways(members_by_role['outer'], ways, nodes)
129
- inner_rings = create_rings_from_ways(members_by_role['inner'], ways, nodes)
130
-
131
- # Skip if no valid outer rings
132
- if not outer_rings:
133
- continue
134
-
135
- # Create feature based on number of outer rings
136
- if len(outer_rings) == 1:
137
- # Single polygon with possible inner rings
138
- feature = {
139
- 'type': 'Feature',
140
- 'properties': {
141
- 'id': element['id'],
142
- 'type': 'relation',
143
- 'tags': tags
144
- },
145
- 'geometry': {
146
- 'type': 'Polygon',
147
- 'coordinates': [outer_rings[0]] + inner_rings
148
- }
149
- }
150
- else:
151
- # MultiPolygon
152
- # Each outer ring forms a polygon, and we assign inner rings to each polygon
153
- # This is a simplification - proper assignment would check for containment
154
- multipolygon_coords = []
155
- for outer_ring in outer_rings:
156
- polygon_coords = [outer_ring]
157
- # For simplicity, assign all inner rings to the first polygon
158
- # A more accurate implementation would check which outer ring contains each inner ring
159
- if len(multipolygon_coords) == 0:
160
- polygon_coords.extend(inner_rings)
161
- multipolygon_coords.append(polygon_coords)
162
-
163
- feature = {
164
- 'type': 'Feature',
165
- 'properties': {
166
- 'id': element['id'],
167
- 'type': 'relation',
168
- 'tags': tags
169
- },
170
- 'geometry': {
171
- 'type': 'MultiPolygon',
172
- 'coordinates': multipolygon_coords
173
- }
174
- }
175
-
176
- features.append(feature)
177
-
178
- return {
179
- 'type': 'FeatureCollection',
180
- 'features': features
181
- }
182
-
183
- def is_part_of_relation(way_id, osm_data):
184
- """Check if a way is part of any relation in the OSM data.
185
-
186
- Args:
187
- way_id (int): The ID of the way to check
188
- osm_data (dict): OSM JSON data containing elements
189
-
190
- Returns:
191
- bool: True if the way is part of a relation, False otherwise
192
- """
193
- for element in osm_data['elements']:
194
- if element['type'] == 'relation' and 'members' in element:
195
- for member in element['members']:
196
- if member['type'] == 'way' and member['ref'] == way_id:
197
- return True
198
- return False
199
-
200
- def is_way_polygon(way):
201
- """Determine if a way should be treated as a polygon based on OSM tags and geometry.
202
-
203
- A way is considered a polygon if:
204
- 1. It forms a closed loop (first and last nodes are the same)
205
- 2. It has tags indicating it represents an area (building, landuse, etc.)
206
-
207
- Args:
208
- way (dict): OSM way element with nodes and tags
209
-
210
- Returns:
211
- bool: True if the way should be treated as a polygon, False otherwise
212
- """
213
- # Check if the way is closed (first and last nodes are the same)
214
- if 'nodes' in way and way['nodes'][0] == way['nodes'][-1]:
215
- # Check for tags that indicate this is an area
216
- if 'tags' in way:
217
- tags = way['tags']
218
- if 'building' in tags or ('area' in tags and tags['area'] == 'yes'):
219
- return True
220
- if any(k in tags for k in ['landuse', 'natural', 'water', 'leisure', 'amenity']):
221
- return True
222
- return False
223
-
224
- def get_way_coords(way, nodes):
225
- """Extract coordinates for a way from its node references.
226
-
227
- Args:
228
- way (dict): OSM way element containing node references
229
- nodes (dict): Dictionary mapping node IDs to their coordinates
230
-
231
- Returns:
232
- list: List of coordinate pairs [(lon, lat), ...] for the way,
233
- or empty list if any nodes are missing
234
- """
235
- coords = []
236
- if 'nodes' not in way:
237
- return coords
238
-
239
- for node_id in way['nodes']:
240
- if node_id in nodes:
241
- coords.append(nodes[node_id])
242
- else:
243
- # Missing node - skip this way
244
- return []
245
-
246
- return coords
247
-
248
- def create_rings_from_ways(way_ids, ways, nodes):
249
- """Create continuous rings by connecting ways that share nodes.
250
-
251
- This function handles complex relations by:
252
- 1. Connecting ways that share end nodes
253
- 2. Handling reversed way directions
254
- 3. Closing rings when possible
255
- 4. Converting node references to coordinates
256
-
257
- Args:
258
- way_ids (list): List of way IDs that make up the ring(s)
259
- ways (dict): Dictionary mapping way IDs to way elements
260
- nodes (dict): Dictionary mapping node IDs to coordinates
261
-
262
- Returns:
263
- list: List of rings, where each ring is a list of coordinate pairs [(lon, lat), ...]
264
- forming a closed polygon with at least 4 points
265
- """
266
- if not way_ids:
267
- return []
268
-
269
- # Extract node IDs for each way
270
- way_nodes = {}
271
- for way_id in way_ids:
272
- if way_id in ways and 'nodes' in ways[way_id]:
273
- way_nodes[way_id] = ways[way_id]['nodes']
274
-
275
- # If we have no valid ways, return empty list
276
- if not way_nodes:
277
- return []
278
-
279
- # Connect the ways to form rings
280
- rings = []
281
- unused_ways = set(way_nodes.keys())
282
-
283
- while unused_ways:
284
- # Start a new ring with the first unused way
285
- current_way_id = next(iter(unused_ways))
286
- unused_ways.remove(current_way_id)
287
-
288
- # Get the first and last node IDs of the current way
289
- current_nodes = way_nodes[current_way_id]
290
- if not current_nodes:
291
- continue
292
-
293
- # Start building a ring with the nodes of the first way
294
- ring_nodes = list(current_nodes)
295
-
296
- # Try to connect more ways to complete the ring
297
- connected = True
298
- while connected and unused_ways:
299
- connected = False
300
-
301
- # Get the first and last nodes of the current ring
302
- first_node = ring_nodes[0]
303
- last_node = ring_nodes[-1]
304
-
305
- # Try to find a way that connects to either end of our ring
306
- for way_id in list(unused_ways):
307
- nodes_in_way = way_nodes[way_id]
308
- if not nodes_in_way:
309
- unused_ways.remove(way_id)
310
- continue
311
-
312
- # Check if this way connects at the start of our ring
313
- if nodes_in_way[-1] == first_node:
314
- # This way connects to the start of our ring (reversed)
315
- ring_nodes = nodes_in_way[:-1] + ring_nodes
316
- unused_ways.remove(way_id)
317
- connected = True
318
- break
319
- elif nodes_in_way[0] == first_node:
320
- # This way connects to the start of our ring
321
- ring_nodes = list(reversed(nodes_in_way))[:-1] + ring_nodes
322
- unused_ways.remove(way_id)
323
- connected = True
324
- break
325
- # Check if this way connects at the end of our ring
326
- elif nodes_in_way[0] == last_node:
327
- # This way connects to the end of our ring
328
- ring_nodes.extend(nodes_in_way[1:])
329
- unused_ways.remove(way_id)
330
- connected = True
331
- break
332
- elif nodes_in_way[-1] == last_node:
333
- # This way connects to the end of our ring (reversed)
334
- ring_nodes.extend(list(reversed(nodes_in_way))[1:])
335
- unused_ways.remove(way_id)
336
- connected = True
337
- break
338
-
339
- # Check if the ring is closed (first node equals last node)
340
- if ring_nodes and ring_nodes[0] == ring_nodes[-1] and len(ring_nodes) >= 4:
341
- # Convert node IDs to coordinates
342
- ring_coords = []
343
- for node_id in ring_nodes:
344
- if node_id in nodes:
345
- ring_coords.append(nodes[node_id])
346
- else:
347
- # Missing node - skip this ring
348
- ring_coords = []
349
- break
350
-
351
- if ring_coords and len(ring_coords) >= 4:
352
- rings.append(ring_coords)
353
- else:
354
- # Try to close the ring if it's almost complete
355
- if ring_nodes and len(ring_nodes) >= 3 and ring_nodes[0] != ring_nodes[-1]:
356
- ring_nodes.append(ring_nodes[0])
357
-
358
- # Convert node IDs to coordinates
359
- ring_coords = []
360
- for node_id in ring_nodes:
361
- if node_id in nodes:
362
- ring_coords.append(nodes[node_id])
363
- else:
364
- # Missing node - skip this ring
365
- ring_coords = []
366
- break
367
-
368
- if ring_coords and len(ring_coords) >= 4:
369
- rings.append(ring_coords)
370
-
371
- return rings
372
-
373
- def load_gdf_from_openstreetmap(rectangle_vertices):
374
- """Download and process building footprint data from OpenStreetMap.
375
-
376
- This function:
377
- 1. Downloads building data using the Overpass API
378
- 2. Processes complex relations and their members
379
- 3. Extracts height information and other properties
380
- 4. Converts features to a GeoDataFrame with standardized properties
381
-
382
- Args:
383
- rectangle_vertices (list): List of (lon, lat) coordinates defining the bounding box
384
-
385
- Returns:
386
- geopandas.GeoDataFrame: GeoDataFrame containing building footprints with properties:
387
- - geometry: Polygon or MultiPolygon
388
- - height: Building height in meters
389
- - levels: Number of building levels
390
- - min_height: Minimum height (for elevated structures)
391
- - building_type: Type of building
392
- - And other OSM tags as properties
393
- """
394
- # Create a bounding box from the rectangle vertices
395
- min_lon = min(v[0] for v in rectangle_vertices)
396
- max_lon = max(v[0] for v in rectangle_vertices)
397
- min_lat = min(v[1] for v in rectangle_vertices)
398
- max_lat = max(v[1] for v in rectangle_vertices)
399
-
400
- # Enhanced Overpass API query with recursive member extraction
401
- overpass_url = "http://overpass-api.de/api/interpreter"
402
- overpass_query = f"""
403
- [out:json];
404
- (
405
- way["building"]({min_lat},{min_lon},{max_lat},{max_lon});
406
- way["building:part"]({min_lat},{min_lon},{max_lat},{max_lon});
407
- relation["building"]({min_lat},{min_lon},{max_lat},{max_lon});
408
- way["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
409
- relation["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
410
- );
411
- (._; >;); // Recursively get all nodes, ways, and relations within relations
412
- out geom;
413
- """
414
-
415
- # Send the request to the Overpass API
416
- response = requests.get(overpass_url, params={'data': overpass_query})
417
- data = response.json()
418
-
419
- # Build a mapping from (type, id) to element
420
- id_map = {}
421
- for element in data['elements']:
422
- id_map[(element['type'], element['id'])] = element
423
-
424
- # Process the response and create features list
425
- features = []
426
-
427
- def process_coordinates(geometry):
428
- """Helper function to process and reverse coordinate pairs.
429
-
430
- Args:
431
- geometry: List of coordinate pairs to process
432
-
433
- Returns:
434
- list: Processed coordinate pairs with reversed order
435
- """
436
- return [coord for coord in geometry] # Keep original order since already (lon, lat)
437
-
438
- def get_height_from_properties(properties):
439
- """Helper function to extract height from properties.
440
-
441
- Args:
442
- properties: Dictionary of feature properties
443
-
444
- Returns:
445
- float: Extracted or calculated height value
446
- """
447
- height = properties.get('height', properties.get('building:height', None))
448
- if height is not None:
449
- try:
450
- return float(height)
451
- except ValueError:
452
- pass
453
-
454
- return 0 # Default height if no valid height found
455
-
456
- def extract_properties(element):
457
- """Helper function to extract and process properties from an element.
458
-
459
- Args:
460
- element: OSM element containing tags and properties
461
-
462
- Returns:
463
- dict: Processed properties dictionary
464
- """
465
- properties = element.get('tags', {})
466
-
467
- # Get height (now using the helper function)
468
- height = get_height_from_properties(properties)
469
-
470
- # Get min_height and min_level
471
- min_height = properties.get('min_height', '0')
472
- min_level = properties.get('building:min_level', properties.get('min_level', '0'))
473
- try:
474
- min_height = float(min_height)
475
- except ValueError:
476
- min_height = 0
477
-
478
- levels = properties.get('building:levels', properties.get('levels', None))
479
- try:
480
- levels = float(levels) if levels is not None else None
481
- except ValueError:
482
- levels = None
483
-
484
- # Extract additional properties, including those relevant to artworks
485
- extracted_props = {
486
- "id": element['id'],
487
- "height": height,
488
- "min_height": min_height,
489
- "confidence": -1.0,
490
- "is_inner": False,
491
- "levels": levels,
492
- "height_source": "explicit" if properties.get('height') or properties.get('building:height')
493
- else "levels" if levels is not None
494
- else "default",
495
- "min_level": min_level if min_level != '0' else None,
496
- "building": properties.get('building', 'no'),
497
- "building_part": properties.get('building:part', 'no'),
498
- "building_material": properties.get('building:material'),
499
- "building_colour": properties.get('building:colour'),
500
- "roof_shape": properties.get('roof:shape'),
501
- "roof_material": properties.get('roof:material'),
502
- "roof_angle": properties.get('roof:angle'),
503
- "roof_colour": properties.get('roof:colour'),
504
- "roof_direction": properties.get('roof:direction'),
505
- "architect": properties.get('architect'),
506
- "start_date": properties.get('start_date'),
507
- "name": properties.get('name'),
508
- "name:en": properties.get('name:en'),
509
- "name:es": properties.get('name:es'),
510
- "email": properties.get('email'),
511
- "phone": properties.get('phone'),
512
- "wheelchair": properties.get('wheelchair'),
513
- "tourism": properties.get('tourism'),
514
- "artwork_type": properties.get('artwork_type'),
515
- "area": properties.get('area'),
516
- "layer": properties.get('layer')
517
- }
518
-
519
- # Remove None values to keep the properties clean
520
- return {k: v for k, v in extracted_props.items() if v is not None}
521
-
522
- def create_polygon_feature(coords, properties, is_inner=False):
523
- """Helper function to create a polygon feature.
524
-
525
- Args:
526
- coords: List of coordinate pairs defining the polygon
527
- properties: Dictionary of feature properties
528
- is_inner: Boolean indicating if this is an inner ring
529
-
530
- Returns:
531
- dict: GeoJSON Feature object or None if invalid
532
- """
533
- if len(coords) >= 4:
534
- properties = properties.copy()
535
- properties["is_inner"] = is_inner
536
- return {
537
- "type": "Feature",
538
- "properties": properties,
539
- "geometry": {
540
- "type": "Polygon",
541
- "coordinates": [process_coordinates(coords)]
542
- }
543
- }
544
- return None
545
-
546
- # Process each element, handling relations and their way members
547
- for element in data['elements']:
548
- if element['type'] == 'way':
549
- if 'geometry' in element:
550
- coords = [(node['lon'], node['lat']) for node in element['geometry']]
551
- properties = extract_properties(element)
552
- feature = create_polygon_feature(coords, properties)
553
- if feature:
554
- features.append(feature)
555
-
556
- elif element['type'] == 'relation':
557
- properties = extract_properties(element)
558
-
559
- # Process each member of the relation
560
- for member in element['members']:
561
- if member['type'] == 'way':
562
- # Look up the way in id_map
563
- way = id_map.get(('way', member['ref']))
564
- if way and 'geometry' in way:
565
- coords = [(node['lon'], node['lat']) for node in way['geometry']]
566
- is_inner = member['role'] == 'inner'
567
- member_properties = properties.copy()
568
- member_properties['member_id'] = way['id'] # Include id of the way
569
- feature = create_polygon_feature(coords, member_properties, is_inner)
570
- if feature:
571
- feature['properties']['role'] = member['role']
572
- features.append(feature)
573
-
574
- # Convert features list to GeoDataFrame
575
- if not features:
576
- return gpd.GeoDataFrame()
577
-
578
- geometries = []
579
- properties_list = []
580
-
581
- for feature in features:
582
- geometries.append(shape(feature['geometry']))
583
- properties_list.append(feature['properties'])
584
-
585
- gdf = gpd.GeoDataFrame(properties_list, geometry=geometries, crs="EPSG:4326")
586
- return gdf
587
-
588
- def convert_feature(feature):
589
- """Convert a GeoJSON feature to a standardized format with height information.
590
-
591
- This function:
592
- 1. Handles both Polygon and MultiPolygon geometries
593
- 2. Extracts and validates height information
594
- 3. Ensures coordinate order consistency (lon, lat)
595
- 4. Adds confidence scores for height estimates
596
-
597
- Args:
598
- feature (dict): Input GeoJSON feature with geometry and properties
599
-
600
- Returns:
601
- dict: Converted feature with:
602
- - Standardized geometry (always Polygon)
603
- - Height information in properties
604
- - Confidence score for height values
605
- Or None if the feature is invalid or not a polygon
606
- """
607
- new_feature = {}
608
- new_feature['type'] = 'Feature'
609
- new_feature['properties'] = {}
610
- new_feature['geometry'] = {}
611
-
612
- # Convert geometry
613
- geometry = feature['geometry']
614
- geom_type = geometry['type']
615
-
616
- # Convert MultiPolygon to Polygon if necessary
617
- if geom_type == 'MultiPolygon':
618
- # Flatten MultiPolygon to Polygon by taking the first polygon
619
- # Alternatively, you can merge all polygons into one if needed
620
- coordinates = geometry['coordinates'][0] # Take the first polygon
621
- if len(coordinates[0]) < 3:
622
- return None
623
- elif geom_type == 'Polygon':
624
- coordinates = geometry['coordinates']
625
- if len(coordinates[0]) < 3:
626
- return None
627
- else:
628
- # Skip features that are not polygons
629
- return None
630
-
631
- # Reformat coordinates: convert lists to tuples
632
- new_coordinates = []
633
- for ring in coordinates:
634
- new_ring = []
635
- for coord in ring:
636
- # Swap the order if needed (assuming original is [lat, lon])
637
- lat, lon = coord
638
- new_ring.append((lon, lat)) # Changed to (lon, lat)
639
- new_coordinates.append(new_ring)
640
-
641
- new_feature['geometry']['type'] = 'Polygon'
642
- new_feature['geometry']['coordinates'] = new_coordinates
643
-
644
- # Process properties
645
- properties = feature.get('properties', {})
646
- height = properties.get('height')
647
-
648
- # If height is not available, estimate it based on building levels
649
- if not height:
650
- levels = properties.get('building:levels')
651
- if levels:
652
- if type(levels)==str:
653
- # If levels is a string (invalid format), use default height
654
- height = 10.0 # Default height in meters
655
- else:
656
- # Calculate height based on number of levels
657
- height = float(levels) * 3.0 # Assume 3m per level
658
- else:
659
- # No level information available, use default height
660
- height = 10.0 # Default height in meters
661
-
662
- new_feature['properties']['height'] = float(height)
663
- new_feature['properties']['confidence'] = -1.0 # Confidence score for height estimate
664
-
665
- return new_feature
666
-
667
-
668
- # Classification mapping defines the land cover/use classes and their associated tags
669
- # The numbers (0-13) represent class codes used in the system
670
- classification_mapping = {
671
- 11: {'name': 'Road', 'tags': ['highway', 'road', 'path', 'track', 'street']},
672
- 12: {'name': 'Building', 'tags': ['building', 'house', 'apartment', 'commercial_building', 'industrial_building']},
673
- 10: {'name': 'Developed space', 'tags': ['industrial', 'retail', 'commercial', 'residential', 'construction', 'railway', 'parking', 'islet', 'island']},
674
- 0: {'name': 'Bareland', 'tags': ['quarry', 'brownfield', 'bare_rock', 'scree', 'shingle', 'rock', 'sand', 'desert', 'landfill', 'beach']},
675
- 1: {'name': 'Rangeland', 'tags': ['grass', 'meadow', 'grassland', 'heath', 'garden', 'park']},
676
- 2: {'name': 'Shrub', 'tags': ['scrub', 'shrubland', 'bush', 'thicket']},
677
- 3: {'name': 'Agriculture land', 'tags': ['farmland', 'orchard', 'vineyard', 'plant_nursery', 'greenhouse_horticulture', 'flowerbed', 'allotments', 'cropland']},
678
- 4: {'name': 'Tree', 'tags': ['wood', 'forest', 'tree', 'tree_row', 'tree_canopy']},
679
- 5: {'name': 'Moss and lichen', 'tags': ['moss', 'lichen', 'tundra_vegetation']},
680
- 6: {'name': 'Wet land', 'tags': ['wetland', 'marsh', 'swamp', 'bog', 'fen', 'flooded_vegetation']},
681
- 7: {'name': 'Mangrove', 'tags': ['mangrove', 'mangrove_forest', 'mangrove_swamp']},
682
- 8: {'name': 'Water', 'tags': ['water', 'waterway', 'reservoir', 'basin', 'bay', 'ocean', 'sea', 'river', 'lake']},
683
- 9: {'name': 'Snow and ice', 'tags': ['glacier', 'snow', 'ice', 'snowfield', 'ice_shelf']},
684
- 13: {'name': 'No Data', 'tags': ['unknown', 'no_data', 'clouds', 'undefined']}
685
- }
686
-
687
- # Maps classification tags to specific OSM key-value pairs
688
- # '*' means match any value for that key
689
- tag_osm_key_value_mapping = {
690
- # Road
691
- 'highway': {'highway': '*'},
692
- 'road': {'highway': '*'},
693
- 'path': {'highway': 'path'},
694
- 'track': {'highway': 'track'},
695
- 'street': {'highway': '*'},
696
-
697
- # Building
698
- 'building': {'building': '*'},
699
- 'house': {'building': 'house'},
700
- 'apartment': {'building': 'apartments'},
701
- 'commercial_building': {'building': 'commercial'},
702
- 'industrial_building': {'building': 'industrial'},
703
-
704
- # Developed space
705
- 'industrial': {'landuse': 'industrial'},
706
- 'retail': {'landuse': 'retail'},
707
- 'commercial': {'landuse': 'commercial'},
708
- 'residential': {'landuse': 'residential'},
709
- 'construction': {'landuse': 'construction'},
710
- 'railway': {'landuse': 'railway'},
711
- 'parking': {'amenity': 'parking'},
712
- 'islet': {'place': 'islet'},
713
- 'island': {'place': 'island'},
714
-
715
- # Bareland
716
- 'quarry': {'landuse': 'quarry'},
717
- 'brownfield': {'landuse': 'brownfield'},
718
- 'bare_rock': {'natural': 'bare_rock'},
719
- 'scree': {'natural': 'scree'},
720
- 'shingle': {'natural': 'shingle'},
721
- 'rock': {'natural': 'rock'},
722
- 'sand': {'natural': 'sand'},
723
- 'desert': {'natural': 'desert'},
724
- 'landfill': {'landuse': 'landfill'},
725
- 'beach': {'natural': 'beach'},
726
-
727
- # Rangeland
728
- 'grass': {'landuse': 'grass'},
729
- 'meadow': {'landuse': 'meadow'},
730
- 'grassland': {'natural': 'grassland'},
731
- 'heath': {'natural': 'heath'},
732
- 'garden': {'leisure': 'garden'},
733
- 'park': {'leisure': 'park'},
734
-
735
- # Shrub
736
- 'scrub': {'natural': 'scrub'},
737
- 'shrubland': {'natural': 'scrub'},
738
- 'bush': {'natural': 'scrub'},
739
- 'thicket': {'natural': 'scrub'},
740
-
741
- # Agriculture land
742
- 'farmland': {'landuse': 'farmland'},
743
- 'orchard': {'landuse': 'orchard'},
744
- 'vineyard': {'landuse': 'vineyard'},
745
- 'plant_nursery': {'landuse': 'plant_nursery'},
746
- 'greenhouse_horticulture': {'landuse': 'greenhouse_horticulture'},
747
- 'flowerbed': {'landuse': 'flowerbed'},
748
- 'allotments': {'landuse': 'allotments'},
749
- 'cropland': {'landuse': 'farmland'},
750
-
751
- # Tree
752
- 'wood': {'natural': 'wood'},
753
- 'forest': {'landuse': 'forest'},
754
- 'tree': {'natural': 'tree'},
755
- 'tree_row': {'natural': 'tree_row'},
756
- 'tree_canopy': {'natural': 'tree_canopy'},
757
-
758
- # Moss and lichen
759
- 'moss': {'natural': 'fell'},
760
- 'lichen': {'natural': 'fell'},
761
- 'tundra_vegetation': {'natural': 'fell'},
762
-
763
- # Wet land
764
- 'wetland': {'natural': 'wetland'},
765
- 'marsh': {'wetland': 'marsh'},
766
- 'swamp': {'wetland': 'swamp'},
767
- 'bog': {'wetland': 'bog'},
768
- 'fen': {'wetland': 'fen'},
769
- 'flooded_vegetation': {'natural': 'wetland'},
770
-
771
- # Mangrove
772
- 'mangrove': {'natural': 'wetland', 'wetland': 'mangrove'},
773
- 'mangrove_forest': {'natural': 'wetland', 'wetland': 'mangrove'},
774
- 'mangrove_swamp': {'natural': 'wetland', 'wetland': 'mangrove'},
775
-
776
- # Water
777
- 'water': {'natural': 'water'},
778
- 'waterway': {'waterway': '*'},
779
- 'reservoir': {'landuse': 'reservoir'},
780
- 'basin': {'landuse': 'basin'},
781
- 'bay': {'natural': 'bay'},
782
- 'ocean': {'natural': 'water', 'water': 'ocean'},
783
- 'sea': {'natural': 'water', 'water': 'sea'},
784
- 'river': {'waterway': 'river'},
785
- 'lake': {'natural': 'water', 'water': 'lake'},
786
-
787
- # Snow and ice
788
- 'glacier': {'natural': 'glacier'},
789
- 'snow': {'natural': 'glacier'},
790
- 'ice': {'natural': 'glacier'},
791
- 'snowfield': {'natural': 'glacier'},
792
- 'ice_shelf': {'natural': 'glacier'},
793
-
794
- # No Data
795
- 'unknown': {'FIXME': '*'},
796
- 'no_data': {'FIXME': '*'},
797
- 'clouds': {'natural': 'cloud'},
798
- 'undefined': {'FIXME': '*'}
799
- }
800
-
801
- def get_classification(tags):
802
- """Determine the land cover/use classification based on OSM tags.
803
-
804
- This function maps OSM tags to standardized land cover classes using:
805
- 1. A hierarchical classification system (codes 0-13)
806
- 2. Tag matching patterns for different feature types
807
- 3. Special cases for roads, water bodies, etc.
808
-
809
- Args:
810
- tags (dict): Dictionary of OSM tags (key-value pairs)
811
-
812
- Returns:
813
- tuple: (classification_code, classification_name) where:
814
- - classification_code (int): Numeric code (0-13) for the land cover class
815
- - classification_name (str): Human-readable name of the class
816
- Or (None, None) if no matching classification is found
817
- """
818
- # Iterate through each classification code and its associated info
819
- for code, info in classification_mapping.items():
820
- # Check each tag associated with this classification
821
- for tag in info['tags']:
822
- osm_mappings = tag_osm_key_value_mapping.get(tag)
823
- if osm_mappings:
824
- # Check if the feature's tags match any of the OSM key-value pairs
825
- for key, value in osm_mappings.items():
826
- if key in tags:
827
- if value == '*' or tags[key] == value:
828
- return code, info['name']
829
- # Special case for islets and islands
830
- if tag in ['islet', 'island'] and tags.get('place') == tag:
831
- return code, info['name']
832
- # Special case for roads mapped as areas
833
- if 'area:highway' in tags:
834
- return 11, 'Road'
835
- return None, None
836
-
837
- def swap_coordinates(geom_mapping):
838
- """Swap coordinate order in a GeoJSON geometry object.
839
-
840
- This function:
841
- 1. Handles nested coordinate structures (Polygons, MultiPolygons)
842
- 2. Preserves the original coordinate order if already correct
843
- 3. Works recursively for complex geometries
844
-
845
- Args:
846
- geom_mapping (dict): GeoJSON geometry object with coordinates
847
-
848
- Returns:
849
- dict: Geometry with coordinates in the correct order (lon, lat)
850
- """
851
- coords = geom_mapping['coordinates']
852
-
853
- def swap_coords(coord_list):
854
- # Recursively swap coordinates for nested lists
855
- if isinstance(coord_list[0], (list, tuple)):
856
- return [swap_coords(c) for c in coord_list]
857
- else:
858
- # Keep original order since already (lon, lat)
859
- return coord_list
860
-
861
- geom_mapping['coordinates'] = swap_coords(coords)
862
- return geom_mapping
863
-
864
- def load_land_cover_gdf_from_osm(rectangle_vertices_ori):
865
- """Load and classify land cover data from OpenStreetMap.
866
-
867
- This function:
868
- 1. Downloads land cover features using the Overpass API
869
- 2. Classifies features based on OSM tags
870
- 3. Handles special cases like roads with width information
871
- 4. Projects geometries for accurate buffering
872
- 5. Creates a standardized GeoDataFrame with classifications
873
-
874
- Args:
875
- rectangle_vertices_ori (list): List of (lon, lat) coordinates defining the area
876
-
877
- Returns:
878
- geopandas.GeoDataFrame: GeoDataFrame with:
879
- - geometry: Polygon or MultiPolygon features
880
- - class: Land cover classification name
881
- - Additional properties from OSM tags
882
- """
883
- # Close the rectangle polygon by adding first vertex at the end
884
- rectangle_vertices = rectangle_vertices_ori.copy()
885
- rectangle_vertices.append(rectangle_vertices_ori[0])
886
-
887
- # Instead of using poly:"lat lon lat lon...", use area coordinates
888
- min_lat = min(lat for lon, lat in rectangle_vertices)
889
- max_lat = max(lat for lon, lat in rectangle_vertices)
890
- min_lon = min(lon for lon, lat in rectangle_vertices)
891
- max_lon = max(lon for lon, lat in rectangle_vertices)
892
-
893
- # Initialize dictionary to store OSM keys and their allowed values
894
- osm_keys_values = defaultdict(list)
895
-
896
- # Build mapping of OSM keys to their possible values from classification mapping
897
- for info in classification_mapping.values():
898
- tags = info['tags']
899
- for tag in tags:
900
- osm_mappings = tag_osm_key_value_mapping.get(tag)
901
- if osm_mappings:
902
- for key, value in osm_mappings.items():
903
- if value == '*':
904
- osm_keys_values[key] = ['*'] # Match all values
905
- else:
906
- if osm_keys_values[key] != ['*'] and value not in osm_keys_values[key]:
907
- osm_keys_values[key].append(value)
908
-
909
- # Build Overpass API query parts for each key-value pair
910
- query_parts = []
911
- for key, values in osm_keys_values.items():
912
- if values:
913
- if values == ['*']:
914
- # Query for any value of this key using bounding box
915
- query_parts.append(f'way["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
916
- query_parts.append(f'relation["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
917
- else:
918
- # Remove duplicate values
919
- values = list(set(values))
920
- # Build regex pattern for specific values
921
- values_regex = '|'.join(values)
922
- query_parts.append(f'way["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
923
- query_parts.append(f'relation["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
924
-
925
- # Combine query parts into complete Overpass query
926
- query_body = "\n ".join(query_parts)
927
- query = (
928
- "[out:json];\n"
929
- "(\n"
930
- f" {query_body}\n"
931
- ");\n"
932
- "out body;\n"
933
- ">;\n"
934
- "out skel qt;"
935
- )
936
-
937
- # Overpass API endpoint
938
- overpass_url = "http://overpass-api.de/api/interpreter"
939
-
940
- # Fetch data from Overpass API
941
- print("Fetching data from Overpass API...")
942
- response = requests.get(overpass_url, params={'data': query})
943
- response.raise_for_status()
944
- data = response.json()
945
-
946
- # Convert OSM data to GeoJSON format using our custom converter instead of json2geojson
947
- print("Converting data to GeoJSON format...")
948
- geojson_data = osm_json_to_geojson(data)
949
-
950
- # Create shapely polygon from rectangle vertices (in lon,lat order)
951
- rectangle_polygon = Polygon(rectangle_vertices)
952
-
953
- # Calculate center point for projection
954
- center_lat = sum(lat for lon, lat in rectangle_vertices) / len(rectangle_vertices)
955
- center_lon = sum(lon for lon, lat in rectangle_vertices) / len(rectangle_vertices)
956
-
957
- # Set up coordinate reference systems for projection
958
- wgs84 = pyproj.CRS('EPSG:4326') # Standard lat/lon
959
- # Albers Equal Area projection centered on area of interest
960
- aea = pyproj.CRS(proj='aea', lat_1=rectangle_polygon.bounds[1], lat_2=rectangle_polygon.bounds[3], lat_0=center_lat, lon_0=center_lon)
961
-
962
- # Create transformers for projecting coordinates
963
- project = pyproj.Transformer.from_crs(wgs84, aea, always_xy=True).transform
964
- project_back = pyproj.Transformer.from_crs(aea, wgs84, always_xy=True).transform
965
-
966
- # Lists to store geometries and properties for GeoDataFrame
967
- geometries = []
968
- properties = []
969
-
970
- for feature in geojson_data['features']:
971
- # Convert feature geometry to shapely object
972
- geom = shape(feature['geometry'])
973
- if not (geom.is_valid and geom.intersects(rectangle_polygon)):
974
- continue
975
-
976
- # Get classification for feature
977
- tags = feature['properties'].get('tags', {})
978
- classification_code, classification_name = get_classification(tags)
979
- if classification_code is None:
980
- continue
981
-
982
- # Special handling for roads
983
- if classification_code == 11:
984
- highway_value = tags.get('highway', '')
985
- # Skip minor paths and walkways
986
- if highway_value in ['footway', 'path', 'pedestrian', 'steps', 'cycleway', 'bridleway']:
987
- continue
988
-
989
- # Determine road width for buffering
990
- width_value = tags.get('width')
991
- lanes_value = tags.get('lanes')
992
- buffer_distance = None
993
-
994
- # Calculate buffer distance based on width or number of lanes
995
- if width_value is not None:
996
- try:
997
- width_meters = float(width_value)
998
- buffer_distance = width_meters / 2
999
- except ValueError:
1000
- pass
1001
- elif lanes_value is not None:
1002
- try:
1003
- num_lanes = float(lanes_value)
1004
- width_meters = num_lanes * 3.0 # 3m per lane
1005
- buffer_distance = width_meters / 2
1006
- except ValueError:
1007
- pass
1008
- else:
1009
- # Default road width
1010
- buffer_distance = 2.5 # 5m total width
1011
-
1012
- if buffer_distance is None:
1013
- continue
1014
-
1015
- # Buffer line features to create polygons
1016
- if geom.geom_type in ['LineString', 'MultiLineString']:
1017
- # Project to planar CRS, buffer, and project back
1018
- geom_proj = transform(project, geom)
1019
- buffered_geom_proj = geom_proj.buffer(buffer_distance)
1020
- buffered_geom = transform(project_back, buffered_geom_proj)
1021
- # Clip to rectangle
1022
- geom = buffered_geom.intersection(rectangle_polygon)
1023
- else:
1024
- continue
1025
-
1026
- # Skip empty geometries
1027
- if geom.is_empty:
1028
- continue
1029
-
1030
- # Add geometries and properties
1031
- if geom.geom_type == 'Polygon':
1032
- geometries.append(geom)
1033
- properties.append({'class': classification_name})
1034
- elif geom.geom_type == 'MultiPolygon':
1035
- for poly in geom.geoms:
1036
- geometries.append(poly)
1037
- properties.append({'class': classification_name})
1038
-
1039
- # Create GeoDataFrame
1040
- gdf = gpd.GeoDataFrame(properties, geometry=geometries, crs="EPSG:4326")
1
+ """
2
+ Module for downloading and processing OpenStreetMap data.
3
+
4
+ This module provides functionality to download and process building footprints, land cover,
5
+ and other geographic features from OpenStreetMap. It handles downloading data via the Overpass API,
6
+ processing the responses, and converting them to standardized GeoJSON format with proper properties.
7
+
8
+ The module includes functions for:
9
+ - Converting OSM JSON to GeoJSON format
10
+ - Processing building footprints with height information
11
+ - Handling land cover classifications
12
+ - Managing coordinate systems and projections
13
+ - Processing roads and other geographic features
14
+ """
15
+
16
+ import requests
17
+ from shapely.geometry import Polygon, shape, mapping
18
+ from shapely.ops import transform
19
+ import pyproj
20
+ from collections import defaultdict
21
+ import requests
22
+ import json
23
+ from shapely.geometry import shape, mapping, Polygon, LineString, Point, MultiPolygon
24
+ from shapely.ops import transform
25
+ import pyproj
26
+ import pandas as pd
27
+ import geopandas as gpd
28
+
29
+ def osm_json_to_geojson(osm_data):
30
+ """
31
+ Convert OSM JSON data to GeoJSON format with proper handling of complex relations.
32
+
33
+ Args:
34
+ osm_data (dict): OSM JSON data from Overpass API
35
+
36
+ Returns:
37
+ dict: GeoJSON FeatureCollection
38
+ """
39
+ features = []
40
+
41
+ # Create a mapping of node IDs to their coordinates
42
+ nodes = {}
43
+ ways = {}
44
+
45
+ # First pass: index all nodes and ways
46
+ for element in osm_data['elements']:
47
+ if element['type'] == 'node':
48
+ nodes[element['id']] = (element['lon'], element['lat'])
49
+ elif element['type'] == 'way':
50
+ ways[element['id']] = element
51
+
52
+ # Second pass: generate features
53
+ for element in osm_data['elements']:
54
+ if element['type'] == 'node' and 'tags' in element and element['tags']:
55
+ # Convert POI nodes to Point features
56
+ feature = {
57
+ 'type': 'Feature',
58
+ 'properties': {
59
+ 'id': element['id'],
60
+ 'type': 'node',
61
+ 'tags': element.get('tags', {})
62
+ },
63
+ 'geometry': {
64
+ 'type': 'Point',
65
+ 'coordinates': [element['lon'], element['lat']]
66
+ }
67
+ }
68
+ features.append(feature)
69
+
70
+ elif element['type'] == 'way' and 'nodes' in element:
71
+ # Skip ways that are part of relations - we'll handle those in relation processing
72
+ if is_part_of_relation(element['id'], osm_data):
73
+ continue
74
+
75
+ # Process standalone way
76
+ coords = get_way_coords(element, nodes)
77
+ if not coords or len(coords) < 2:
78
+ continue
79
+
80
+ # Determine if it's a polygon or a line
81
+ is_polygon = is_way_polygon(element)
82
+
83
+ # Make sure polygons have valid geometry (closed loop with at least 4 points)
84
+ if is_polygon:
85
+ # For closed ways, make sure first and last coordinates are the same
86
+ if coords[0] != coords[-1]:
87
+ coords.append(coords[0])
88
+
89
+ # Check if we have enough coordinates for a valid polygon (at least 4)
90
+ if len(coords) < 4:
91
+ # Not enough coordinates for a polygon, convert to LineString
92
+ is_polygon = False
93
+
94
+ feature = {
95
+ 'type': 'Feature',
96
+ 'properties': {
97
+ 'id': element['id'],
98
+ 'type': 'way',
99
+ 'tags': element.get('tags', {})
100
+ },
101
+ 'geometry': {
102
+ 'type': 'Polygon' if is_polygon else 'LineString',
103
+ 'coordinates': [coords] if is_polygon else coords
104
+ }
105
+ }
106
+ features.append(feature)
107
+
108
+ elif element['type'] == 'relation' and 'members' in element and 'tags' in element:
109
+ tags = element.get('tags', {})
110
+
111
+ # Process multipolygon relations
112
+ if tags.get('type') == 'multipolygon' or any(key in tags for key in ['natural', 'water', 'waterway']):
113
+ # Group member ways by role
114
+ members_by_role = {'outer': [], 'inner': []}
115
+
116
+ for member in element['members']:
117
+ if member['type'] == 'way' and member['ref'] in ways:
118
+ role = member['role']
119
+ if role not in ['outer', 'inner']:
120
+ role = 'outer' # Default to outer if role not specified
121
+ members_by_role[role].append(member['ref'])
122
+
123
+ # Skip if no outer members
124
+ if not members_by_role['outer']:
125
+ continue
126
+
127
+ # Create rings from member ways
128
+ outer_rings = create_rings_from_ways(members_by_role['outer'], ways, nodes)
129
+ inner_rings = create_rings_from_ways(members_by_role['inner'], ways, nodes)
130
+
131
+ # Skip if no valid outer rings
132
+ if not outer_rings:
133
+ continue
134
+
135
+ # Create feature based on number of outer rings
136
+ if len(outer_rings) == 1:
137
+ # Single polygon with possible inner rings
138
+ feature = {
139
+ 'type': 'Feature',
140
+ 'properties': {
141
+ 'id': element['id'],
142
+ 'type': 'relation',
143
+ 'tags': tags
144
+ },
145
+ 'geometry': {
146
+ 'type': 'Polygon',
147
+ 'coordinates': [outer_rings[0]] + inner_rings
148
+ }
149
+ }
150
+ else:
151
+ # MultiPolygon
152
+ # Each outer ring forms a polygon, and we assign inner rings to each polygon
153
+ # This is a simplification - proper assignment would check for containment
154
+ multipolygon_coords = []
155
+ for outer_ring in outer_rings:
156
+ polygon_coords = [outer_ring]
157
+ # For simplicity, assign all inner rings to the first polygon
158
+ # A more accurate implementation would check which outer ring contains each inner ring
159
+ if len(multipolygon_coords) == 0:
160
+ polygon_coords.extend(inner_rings)
161
+ multipolygon_coords.append(polygon_coords)
162
+
163
+ feature = {
164
+ 'type': 'Feature',
165
+ 'properties': {
166
+ 'id': element['id'],
167
+ 'type': 'relation',
168
+ 'tags': tags
169
+ },
170
+ 'geometry': {
171
+ 'type': 'MultiPolygon',
172
+ 'coordinates': multipolygon_coords
173
+ }
174
+ }
175
+
176
+ features.append(feature)
177
+
178
+ return {
179
+ 'type': 'FeatureCollection',
180
+ 'features': features
181
+ }
182
+
183
+ def is_part_of_relation(way_id, osm_data):
184
+ """Check if a way is part of any relation in the OSM data.
185
+
186
+ Args:
187
+ way_id (int): The ID of the way to check
188
+ osm_data (dict): OSM JSON data containing elements
189
+
190
+ Returns:
191
+ bool: True if the way is part of a relation, False otherwise
192
+ """
193
+ for element in osm_data['elements']:
194
+ if element['type'] == 'relation' and 'members' in element:
195
+ for member in element['members']:
196
+ if member['type'] == 'way' and member['ref'] == way_id:
197
+ return True
198
+ return False
199
+
200
+ def is_way_polygon(way):
201
+ """Determine if a way should be treated as a polygon based on OSM tags and geometry.
202
+
203
+ A way is considered a polygon if:
204
+ 1. It forms a closed loop (first and last nodes are the same)
205
+ 2. It has tags indicating it represents an area (building, landuse, etc.)
206
+
207
+ Args:
208
+ way (dict): OSM way element with nodes and tags
209
+
210
+ Returns:
211
+ bool: True if the way should be treated as a polygon, False otherwise
212
+ """
213
+ # Check if the way is closed (first and last nodes are the same)
214
+ if 'nodes' in way and way['nodes'][0] == way['nodes'][-1]:
215
+ # Check for tags that indicate this is an area
216
+ if 'tags' in way:
217
+ tags = way['tags']
218
+ if 'building' in tags or ('area' in tags and tags['area'] == 'yes'):
219
+ return True
220
+ if any(k in tags for k in ['landuse', 'natural', 'water', 'leisure', 'amenity']):
221
+ return True
222
+ return False
223
+
224
+ def get_way_coords(way, nodes):
225
+ """Extract coordinates for a way from its node references.
226
+
227
+ Args:
228
+ way (dict): OSM way element containing node references
229
+ nodes (dict): Dictionary mapping node IDs to their coordinates
230
+
231
+ Returns:
232
+ list: List of coordinate pairs [(lon, lat), ...] for the way,
233
+ or empty list if any nodes are missing
234
+ """
235
+ coords = []
236
+ if 'nodes' not in way:
237
+ return coords
238
+
239
+ for node_id in way['nodes']:
240
+ if node_id in nodes:
241
+ coords.append(nodes[node_id])
242
+ else:
243
+ # Missing node - skip this way
244
+ return []
245
+
246
+ return coords
247
+
248
+ def create_rings_from_ways(way_ids, ways, nodes):
249
+ """Create continuous rings by connecting ways that share nodes.
250
+
251
+ This function handles complex relations by:
252
+ 1. Connecting ways that share end nodes
253
+ 2. Handling reversed way directions
254
+ 3. Closing rings when possible
255
+ 4. Converting node references to coordinates
256
+
257
+ Args:
258
+ way_ids (list): List of way IDs that make up the ring(s)
259
+ ways (dict): Dictionary mapping way IDs to way elements
260
+ nodes (dict): Dictionary mapping node IDs to coordinates
261
+
262
+ Returns:
263
+ list: List of rings, where each ring is a list of coordinate pairs [(lon, lat), ...]
264
+ forming a closed polygon with at least 4 points
265
+ """
266
+ if not way_ids:
267
+ return []
268
+
269
+ # Extract node IDs for each way
270
+ way_nodes = {}
271
+ for way_id in way_ids:
272
+ if way_id in ways and 'nodes' in ways[way_id]:
273
+ way_nodes[way_id] = ways[way_id]['nodes']
274
+
275
+ # If we have no valid ways, return empty list
276
+ if not way_nodes:
277
+ return []
278
+
279
+ # Connect the ways to form rings
280
+ rings = []
281
+ unused_ways = set(way_nodes.keys())
282
+
283
+ while unused_ways:
284
+ # Start a new ring with the first unused way
285
+ current_way_id = next(iter(unused_ways))
286
+ unused_ways.remove(current_way_id)
287
+
288
+ # Get the first and last node IDs of the current way
289
+ current_nodes = way_nodes[current_way_id]
290
+ if not current_nodes:
291
+ continue
292
+
293
+ # Start building a ring with the nodes of the first way
294
+ ring_nodes = list(current_nodes)
295
+
296
+ # Try to connect more ways to complete the ring
297
+ connected = True
298
+ while connected and unused_ways:
299
+ connected = False
300
+
301
+ # Get the first and last nodes of the current ring
302
+ first_node = ring_nodes[0]
303
+ last_node = ring_nodes[-1]
304
+
305
+ # Try to find a way that connects to either end of our ring
306
+ for way_id in list(unused_ways):
307
+ nodes_in_way = way_nodes[way_id]
308
+ if not nodes_in_way:
309
+ unused_ways.remove(way_id)
310
+ continue
311
+
312
+ # Check if this way connects at the start of our ring
313
+ if nodes_in_way[-1] == first_node:
314
+ # This way connects to the start of our ring (reversed)
315
+ ring_nodes = nodes_in_way[:-1] + ring_nodes
316
+ unused_ways.remove(way_id)
317
+ connected = True
318
+ break
319
+ elif nodes_in_way[0] == first_node:
320
+ # This way connects to the start of our ring
321
+ ring_nodes = list(reversed(nodes_in_way))[:-1] + ring_nodes
322
+ unused_ways.remove(way_id)
323
+ connected = True
324
+ break
325
+ # Check if this way connects at the end of our ring
326
+ elif nodes_in_way[0] == last_node:
327
+ # This way connects to the end of our ring
328
+ ring_nodes.extend(nodes_in_way[1:])
329
+ unused_ways.remove(way_id)
330
+ connected = True
331
+ break
332
+ elif nodes_in_way[-1] == last_node:
333
+ # This way connects to the end of our ring (reversed)
334
+ ring_nodes.extend(list(reversed(nodes_in_way))[1:])
335
+ unused_ways.remove(way_id)
336
+ connected = True
337
+ break
338
+
339
+ # Check if the ring is closed (first node equals last node)
340
+ if ring_nodes and ring_nodes[0] == ring_nodes[-1] and len(ring_nodes) >= 4:
341
+ # Convert node IDs to coordinates
342
+ ring_coords = []
343
+ for node_id in ring_nodes:
344
+ if node_id in nodes:
345
+ ring_coords.append(nodes[node_id])
346
+ else:
347
+ # Missing node - skip this ring
348
+ ring_coords = []
349
+ break
350
+
351
+ if ring_coords and len(ring_coords) >= 4:
352
+ rings.append(ring_coords)
353
+ else:
354
+ # Try to close the ring if it's almost complete
355
+ if ring_nodes and len(ring_nodes) >= 3 and ring_nodes[0] != ring_nodes[-1]:
356
+ ring_nodes.append(ring_nodes[0])
357
+
358
+ # Convert node IDs to coordinates
359
+ ring_coords = []
360
+ for node_id in ring_nodes:
361
+ if node_id in nodes:
362
+ ring_coords.append(nodes[node_id])
363
+ else:
364
+ # Missing node - skip this ring
365
+ ring_coords = []
366
+ break
367
+
368
+ if ring_coords and len(ring_coords) >= 4:
369
+ rings.append(ring_coords)
370
+
371
+ return rings
372
+
373
+ def load_gdf_from_openstreetmap(rectangle_vertices):
374
+ """Download and process building footprint data from OpenStreetMap.
375
+
376
+ This function:
377
+ 1. Downloads building data using the Overpass API
378
+ 2. Processes complex relations and their members
379
+ 3. Extracts height information and other properties
380
+ 4. Converts features to a GeoDataFrame with standardized properties
381
+
382
+ Args:
383
+ rectangle_vertices (list): List of (lon, lat) coordinates defining the bounding box
384
+
385
+ Returns:
386
+ geopandas.GeoDataFrame: GeoDataFrame containing building footprints with properties:
387
+ - geometry: Polygon or MultiPolygon
388
+ - height: Building height in meters
389
+ - levels: Number of building levels
390
+ - min_height: Minimum height (for elevated structures)
391
+ - building_type: Type of building
392
+ - And other OSM tags as properties
393
+ """
394
+ # Create a bounding box from the rectangle vertices
395
+ min_lon = min(v[0] for v in rectangle_vertices)
396
+ max_lon = max(v[0] for v in rectangle_vertices)
397
+ min_lat = min(v[1] for v in rectangle_vertices)
398
+ max_lat = max(v[1] for v in rectangle_vertices)
399
+
400
+ # Enhanced Overpass API query with recursive member extraction
401
+ overpass_url = "http://overpass-api.de/api/interpreter"
402
+ overpass_query = f"""
403
+ [out:json];
404
+ (
405
+ way["building"]({min_lat},{min_lon},{max_lat},{max_lon});
406
+ way["building:part"]({min_lat},{min_lon},{max_lat},{max_lon});
407
+ relation["building"]({min_lat},{min_lon},{max_lat},{max_lon});
408
+ way["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
409
+ relation["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
410
+ );
411
+ (._; >;); // Recursively get all nodes, ways, and relations within relations
412
+ out geom;
413
+ """
414
+
415
+ # Send the request to the Overpass API
416
+ response = requests.get(overpass_url, params={'data': overpass_query})
417
+ data = response.json()
418
+
419
+ # Build a mapping from (type, id) to element
420
+ id_map = {}
421
+ for element in data['elements']:
422
+ id_map[(element['type'], element['id'])] = element
423
+
424
+ # Process the response and create features list
425
+ features = []
426
+
427
+ def process_coordinates(geometry):
428
+ """Helper function to process and reverse coordinate pairs.
429
+
430
+ Args:
431
+ geometry: List of coordinate pairs to process
432
+
433
+ Returns:
434
+ list: Processed coordinate pairs with reversed order
435
+ """
436
+ return [coord for coord in geometry] # Keep original order since already (lon, lat)
437
+
438
+ def get_height_from_properties(properties):
439
+ """Helper function to extract height from properties.
440
+
441
+ Args:
442
+ properties: Dictionary of feature properties
443
+
444
+ Returns:
445
+ float: Extracted or calculated height value
446
+ """
447
+ height = properties.get('height', properties.get('building:height', None))
448
+ if height is not None:
449
+ try:
450
+ return float(height)
451
+ except ValueError:
452
+ pass
453
+
454
+ return 0 # Default height if no valid height found
455
+
456
+ def extract_properties(element):
457
+ """Helper function to extract and process properties from an element.
458
+
459
+ Args:
460
+ element: OSM element containing tags and properties
461
+
462
+ Returns:
463
+ dict: Processed properties dictionary
464
+ """
465
+ properties = element.get('tags', {})
466
+
467
+ # Get height (now using the helper function)
468
+ height = get_height_from_properties(properties)
469
+
470
+ # Get min_height and min_level
471
+ min_height = properties.get('min_height', '0')
472
+ min_level = properties.get('building:min_level', properties.get('min_level', '0'))
473
+ try:
474
+ min_height = float(min_height)
475
+ except ValueError:
476
+ min_height = 0
477
+
478
+ levels = properties.get('building:levels', properties.get('levels', None))
479
+ try:
480
+ levels = float(levels) if levels is not None else None
481
+ except ValueError:
482
+ levels = None
483
+
484
+ # Extract additional properties, including those relevant to artworks
485
+ extracted_props = {
486
+ "id": element['id'],
487
+ "height": height,
488
+ "min_height": min_height,
489
+ "confidence": -1.0,
490
+ "is_inner": False,
491
+ "levels": levels,
492
+ "height_source": "explicit" if properties.get('height') or properties.get('building:height')
493
+ else "levels" if levels is not None
494
+ else "default",
495
+ "min_level": min_level if min_level != '0' else None,
496
+ "building": properties.get('building', 'no'),
497
+ "building_part": properties.get('building:part', 'no'),
498
+ "building_material": properties.get('building:material'),
499
+ "building_colour": properties.get('building:colour'),
500
+ "roof_shape": properties.get('roof:shape'),
501
+ "roof_material": properties.get('roof:material'),
502
+ "roof_angle": properties.get('roof:angle'),
503
+ "roof_colour": properties.get('roof:colour'),
504
+ "roof_direction": properties.get('roof:direction'),
505
+ "architect": properties.get('architect'),
506
+ "start_date": properties.get('start_date'),
507
+ "name": properties.get('name'),
508
+ "name:en": properties.get('name:en'),
509
+ "name:es": properties.get('name:es'),
510
+ "email": properties.get('email'),
511
+ "phone": properties.get('phone'),
512
+ "wheelchair": properties.get('wheelchair'),
513
+ "tourism": properties.get('tourism'),
514
+ "artwork_type": properties.get('artwork_type'),
515
+ "area": properties.get('area'),
516
+ "layer": properties.get('layer')
517
+ }
518
+
519
+ # Remove None values to keep the properties clean
520
+ return {k: v for k, v in extracted_props.items() if v is not None}
521
+
522
+ def create_polygon_feature(coords, properties, is_inner=False):
523
+ """Helper function to create a polygon feature.
524
+
525
+ Args:
526
+ coords: List of coordinate pairs defining the polygon
527
+ properties: Dictionary of feature properties
528
+ is_inner: Boolean indicating if this is an inner ring
529
+
530
+ Returns:
531
+ dict: GeoJSON Feature object or None if invalid
532
+ """
533
+ if len(coords) >= 4:
534
+ properties = properties.copy()
535
+ properties["is_inner"] = is_inner
536
+ return {
537
+ "type": "Feature",
538
+ "properties": properties,
539
+ "geometry": {
540
+ "type": "Polygon",
541
+ "coordinates": [process_coordinates(coords)]
542
+ }
543
+ }
544
+ return None
545
+
546
+ # Process each element, handling relations and their way members
547
+ for element in data['elements']:
548
+ if element['type'] == 'way':
549
+ if 'geometry' in element:
550
+ coords = [(node['lon'], node['lat']) for node in element['geometry']]
551
+ properties = extract_properties(element)
552
+ feature = create_polygon_feature(coords, properties)
553
+ if feature:
554
+ features.append(feature)
555
+
556
+ elif element['type'] == 'relation':
557
+ properties = extract_properties(element)
558
+
559
+ # Process each member of the relation
560
+ for member in element['members']:
561
+ if member['type'] == 'way':
562
+ # Look up the way in id_map
563
+ way = id_map.get(('way', member['ref']))
564
+ if way and 'geometry' in way:
565
+ coords = [(node['lon'], node['lat']) for node in way['geometry']]
566
+ is_inner = member['role'] == 'inner'
567
+ member_properties = properties.copy()
568
+ member_properties['member_id'] = way['id'] # Include id of the way
569
+ feature = create_polygon_feature(coords, member_properties, is_inner)
570
+ if feature:
571
+ feature['properties']['role'] = member['role']
572
+ features.append(feature)
573
+
574
+ # Convert features list to GeoDataFrame
575
+ if not features:
576
+ return gpd.GeoDataFrame()
577
+
578
+ geometries = []
579
+ properties_list = []
580
+
581
+ for feature in features:
582
+ geometries.append(shape(feature['geometry']))
583
+ properties_list.append(feature['properties'])
584
+
585
+ gdf = gpd.GeoDataFrame(properties_list, geometry=geometries, crs="EPSG:4326")
586
+ return gdf
587
+
588
+ def convert_feature(feature):
589
+ """Convert a GeoJSON feature to a standardized format with height information.
590
+
591
+ This function:
592
+ 1. Handles both Polygon and MultiPolygon geometries
593
+ 2. Extracts and validates height information
594
+ 3. Ensures coordinate order consistency (lon, lat)
595
+ 4. Adds confidence scores for height estimates
596
+
597
+ Args:
598
+ feature (dict): Input GeoJSON feature with geometry and properties
599
+
600
+ Returns:
601
+ dict: Converted feature with:
602
+ - Standardized geometry (always Polygon)
603
+ - Height information in properties
604
+ - Confidence score for height values
605
+ Or None if the feature is invalid or not a polygon
606
+ """
607
+ new_feature = {}
608
+ new_feature['type'] = 'Feature'
609
+ new_feature['properties'] = {}
610
+ new_feature['geometry'] = {}
611
+
612
+ # Convert geometry
613
+ geometry = feature['geometry']
614
+ geom_type = geometry['type']
615
+
616
+ # Convert MultiPolygon to Polygon if necessary
617
+ if geom_type == 'MultiPolygon':
618
+ # Flatten MultiPolygon to Polygon by taking the first polygon
619
+ # Alternatively, you can merge all polygons into one if needed
620
+ coordinates = geometry['coordinates'][0] # Take the first polygon
621
+ if len(coordinates[0]) < 3:
622
+ return None
623
+ elif geom_type == 'Polygon':
624
+ coordinates = geometry['coordinates']
625
+ if len(coordinates[0]) < 3:
626
+ return None
627
+ else:
628
+ # Skip features that are not polygons
629
+ return None
630
+
631
+ # Reformat coordinates: convert lists to tuples
632
+ new_coordinates = []
633
+ for ring in coordinates:
634
+ new_ring = []
635
+ for coord in ring:
636
+ # Swap the order if needed (assuming original is [lat, lon])
637
+ lat, lon = coord
638
+ new_ring.append((lon, lat)) # Changed to (lon, lat)
639
+ new_coordinates.append(new_ring)
640
+
641
+ new_feature['geometry']['type'] = 'Polygon'
642
+ new_feature['geometry']['coordinates'] = new_coordinates
643
+
644
+ # Process properties
645
+ properties = feature.get('properties', {})
646
+ height = properties.get('height')
647
+
648
+ # If height is not available, estimate it based on building levels
649
+ if not height:
650
+ levels = properties.get('building:levels')
651
+ if levels:
652
+ if type(levels)==str:
653
+ # If levels is a string (invalid format), use default height
654
+ height = 10.0 # Default height in meters
655
+ else:
656
+ # Calculate height based on number of levels
657
+ height = float(levels) * 3.0 # Assume 3m per level
658
+ else:
659
+ # No level information available, use default height
660
+ height = 10.0 # Default height in meters
661
+
662
+ new_feature['properties']['height'] = float(height)
663
+ new_feature['properties']['confidence'] = -1.0 # Confidence score for height estimate
664
+
665
+ return new_feature
666
+
667
+
668
+ # Classification mapping defines the land cover/use classes and their associated tags
669
+ # The numbers (0-13) represent class codes used in the system
670
+ classification_mapping = {
671
+ 11: {'name': 'Road', 'tags': ['highway', 'road', 'path', 'track', 'street']},
672
+ 12: {'name': 'Building', 'tags': ['building', 'house', 'apartment', 'commercial_building', 'industrial_building']},
673
+ 10: {'name': 'Developed space', 'tags': ['industrial', 'retail', 'commercial', 'residential', 'construction', 'railway', 'parking', 'islet', 'island']},
674
+ 0: {'name': 'Bareland', 'tags': ['quarry', 'brownfield', 'bare_rock', 'scree', 'shingle', 'rock', 'sand', 'desert', 'landfill', 'beach']},
675
+ 1: {'name': 'Rangeland', 'tags': ['grass', 'meadow', 'grassland', 'heath', 'garden', 'park']},
676
+ 2: {'name': 'Shrub', 'tags': ['scrub', 'shrubland', 'bush', 'thicket']},
677
+ 3: {'name': 'Agriculture land', 'tags': ['farmland', 'orchard', 'vineyard', 'plant_nursery', 'greenhouse_horticulture', 'flowerbed', 'allotments', 'cropland']},
678
+ 4: {'name': 'Tree', 'tags': ['wood', 'forest', 'tree', 'tree_row', 'tree_canopy']},
679
+ 5: {'name': 'Moss and lichen', 'tags': ['moss', 'lichen', 'tundra_vegetation']},
680
+ 6: {'name': 'Wet land', 'tags': ['wetland', 'marsh', 'swamp', 'bog', 'fen', 'flooded_vegetation']},
681
+ 7: {'name': 'Mangrove', 'tags': ['mangrove', 'mangrove_forest', 'mangrove_swamp']},
682
+ 8: {'name': 'Water', 'tags': ['water', 'reservoir', 'basin', 'bay', 'ocean', 'sea', 'lake']},
683
+ 9: {'name': 'Snow and ice', 'tags': ['glacier', 'snow', 'ice', 'snowfield', 'ice_shelf']},
684
+ 13: {'name': 'No Data', 'tags': ['unknown', 'no_data', 'clouds', 'undefined']}
685
+ }
686
+
687
+ # Maps classification tags to specific OSM key-value pairs
688
+ # '*' means match any value for that key
689
+ tag_osm_key_value_mapping = {
690
+ # Road
691
+ 'highway': {'highway': '*'},
692
+ 'road': {'highway': '*'},
693
+ 'path': {'highway': 'path'},
694
+ 'track': {'highway': 'track'},
695
+ 'street': {'highway': '*'},
696
+
697
+ # Building
698
+ 'building': {'building': '*'},
699
+ 'house': {'building': 'house'},
700
+ 'apartment': {'building': 'apartments'},
701
+ 'commercial_building': {'building': 'commercial'},
702
+ 'industrial_building': {'building': 'industrial'},
703
+
704
+ # Developed space
705
+ 'industrial': {'landuse': 'industrial'},
706
+ 'retail': {'landuse': 'retail'},
707
+ 'commercial': {'landuse': 'commercial'},
708
+ 'residential': {'landuse': 'residential'},
709
+ 'construction': {'landuse': 'construction'},
710
+ 'railway': {'landuse': 'railway'},
711
+ 'parking': {'amenity': 'parking'},
712
+ 'islet': {'place': 'islet'},
713
+ 'island': {'place': 'island'},
714
+
715
+ # Bareland
716
+ 'quarry': {'landuse': 'quarry'},
717
+ 'brownfield': {'landuse': 'brownfield'},
718
+ 'bare_rock': {'natural': 'bare_rock'},
719
+ 'scree': {'natural': 'scree'},
720
+ 'shingle': {'natural': 'shingle'},
721
+ 'rock': {'natural': 'rock'},
722
+ 'sand': {'natural': 'sand'},
723
+ 'desert': {'natural': 'desert'},
724
+ 'landfill': {'landuse': 'landfill'},
725
+ 'beach': {'natural': 'beach'},
726
+
727
+ # Rangeland
728
+ 'grass': {'landuse': 'grass'},
729
+ 'meadow': {'landuse': 'meadow'},
730
+ 'grassland': {'natural': 'grassland'},
731
+ 'heath': {'natural': 'heath'},
732
+ 'garden': {'leisure': 'garden'},
733
+ 'park': {'leisure': 'park'},
734
+
735
+ # Shrub
736
+ 'scrub': {'natural': 'scrub'},
737
+ 'shrubland': {'natural': 'scrub'},
738
+ 'bush': {'natural': 'scrub'},
739
+ 'thicket': {'natural': 'scrub'},
740
+
741
+ # Agriculture land
742
+ 'farmland': {'landuse': 'farmland'},
743
+ 'orchard': {'landuse': 'orchard'},
744
+ 'vineyard': {'landuse': 'vineyard'},
745
+ 'plant_nursery': {'landuse': 'plant_nursery'},
746
+ 'greenhouse_horticulture': {'landuse': 'greenhouse_horticulture'},
747
+ 'flowerbed': {'landuse': 'flowerbed'},
748
+ 'allotments': {'landuse': 'allotments'},
749
+ 'cropland': {'landuse': 'farmland'},
750
+
751
+ # Tree
752
+ 'wood': {'natural': 'wood'},
753
+ 'forest': {'landuse': 'forest'},
754
+ 'tree': {'natural': 'tree'},
755
+ 'tree_row': {'natural': 'tree_row'},
756
+ 'tree_canopy': {'natural': 'tree_canopy'},
757
+
758
+ # Moss and lichen
759
+ 'moss': {'natural': 'fell'},
760
+ 'lichen': {'natural': 'fell'},
761
+ 'tundra_vegetation': {'natural': 'fell'},
762
+
763
+ # Wet land
764
+ 'wetland': {'natural': 'wetland'},
765
+ 'marsh': {'wetland': 'marsh'},
766
+ 'swamp': {'wetland': 'swamp'},
767
+ 'bog': {'wetland': 'bog'},
768
+ 'fen': {'wetland': 'fen'},
769
+ 'flooded_vegetation': {'natural': 'wetland'},
770
+
771
+ # Mangrove
772
+ 'mangrove': {'natural': 'wetland', 'wetland': 'mangrove'},
773
+ 'mangrove_forest': {'natural': 'wetland', 'wetland': 'mangrove'},
774
+ 'mangrove_swamp': {'natural': 'wetland', 'wetland': 'mangrove'},
775
+
776
+ # Water
777
+ 'water': {'natural': 'water'},
778
+ 'reservoir': {'landuse': 'reservoir'},
779
+ 'basin': {'landuse': 'basin'},
780
+ 'bay': {'natural': 'bay'},
781
+ 'ocean': {'natural': 'water', 'water': 'ocean'},
782
+ 'sea': {'natural': 'water', 'water': 'sea'},
783
+ 'lake': {'natural': 'water', 'water': 'lake'},
784
+
785
+ # Snow and ice
786
+ 'glacier': {'natural': 'glacier'},
787
+ 'snow': {'natural': 'glacier'},
788
+ 'ice': {'natural': 'glacier'},
789
+ 'snowfield': {'natural': 'glacier'},
790
+ 'ice_shelf': {'natural': 'glacier'},
791
+
792
+ # No Data
793
+ 'unknown': {'FIXME': '*'},
794
+ 'no_data': {'FIXME': '*'},
795
+ 'clouds': {'natural': 'cloud'},
796
+ 'undefined': {'FIXME': '*'}
797
+ }
798
+
799
+ def get_classification(tags):
800
+ """Determine the land cover/use classification based on OSM tags.
801
+
802
+ This function maps OSM tags to standardized land cover classes using:
803
+ 1. A hierarchical classification system (codes 0-13)
804
+ 2. Tag matching patterns for different feature types
805
+ 3. Special cases for roads, water bodies, etc.
806
+
807
+ Args:
808
+ tags (dict): Dictionary of OSM tags (key-value pairs)
809
+
810
+ Returns:
811
+ tuple: (classification_code, classification_name) where:
812
+ - classification_code (int): Numeric code (0-13) for the land cover class
813
+ - classification_name (str): Human-readable name of the class
814
+ Or (None, None) if no matching classification is found
815
+ """
816
+ # Iterate through each classification code and its associated info
817
+ for code, info in classification_mapping.items():
818
+ # Check each tag associated with this classification
819
+ for tag in info['tags']:
820
+ osm_mappings = tag_osm_key_value_mapping.get(tag)
821
+ if osm_mappings:
822
+ # Check if the feature's tags match any of the OSM key-value pairs
823
+ for key, value in osm_mappings.items():
824
+ if key in tags:
825
+ if value == '*' or tags[key] == value:
826
+ return code, info['name']
827
+ # Special case for islets and islands
828
+ if tag in ['islet', 'island'] and tags.get('place') == tag:
829
+ return code, info['name']
830
+ # Special case for roads mapped as areas
831
+ if 'area:highway' in tags:
832
+ return 11, 'Road'
833
+ return None, None
834
+
835
+ def swap_coordinates(geom_mapping):
836
+ """Swap coordinate order in a GeoJSON geometry object.
837
+
838
+ This function:
839
+ 1. Handles nested coordinate structures (Polygons, MultiPolygons)
840
+ 2. Preserves the original coordinate order if already correct
841
+ 3. Works recursively for complex geometries
842
+
843
+ Args:
844
+ geom_mapping (dict): GeoJSON geometry object with coordinates
845
+
846
+ Returns:
847
+ dict: Geometry with coordinates in the correct order (lon, lat)
848
+ """
849
+ coords = geom_mapping['coordinates']
850
+
851
+ def swap_coords(coord_list):
852
+ # Recursively swap coordinates for nested lists
853
+ if isinstance(coord_list[0], (list, tuple)):
854
+ return [swap_coords(c) for c in coord_list]
855
+ else:
856
+ # Keep original order since already (lon, lat)
857
+ return coord_list
858
+
859
+ geom_mapping['coordinates'] = swap_coords(coords)
860
+ return geom_mapping
861
+
862
+ def load_land_cover_gdf_from_osm(rectangle_vertices_ori):
863
+ """Load and classify land cover data from OpenStreetMap.
864
+
865
+ This function:
866
+ 1. Downloads land cover features using the Overpass API
867
+ 2. Classifies features based on OSM tags
868
+ 3. Handles special cases like roads with width information
869
+ 4. Projects geometries for accurate buffering
870
+ 5. Creates a standardized GeoDataFrame with classifications
871
+
872
+ Args:
873
+ rectangle_vertices_ori (list): List of (lon, lat) coordinates defining the area
874
+
875
+ Returns:
876
+ geopandas.GeoDataFrame: GeoDataFrame with:
877
+ - geometry: Polygon or MultiPolygon features
878
+ - class: Land cover classification name
879
+ - Additional properties from OSM tags
880
+ """
881
+ # Close the rectangle polygon by adding first vertex at the end
882
+ rectangle_vertices = rectangle_vertices_ori.copy()
883
+ rectangle_vertices.append(rectangle_vertices_ori[0])
884
+
885
+ # Instead of using poly:"lat lon lat lon...", use area coordinates
886
+ min_lat = min(lat for lon, lat in rectangle_vertices)
887
+ max_lat = max(lat for lon, lat in rectangle_vertices)
888
+ min_lon = min(lon for lon, lat in rectangle_vertices)
889
+ max_lon = max(lon for lon, lat in rectangle_vertices)
890
+
891
+ # Initialize dictionary to store OSM keys and their allowed values
892
+ osm_keys_values = defaultdict(list)
893
+
894
+ # Build mapping of OSM keys to their possible values from classification mapping
895
+ for info in classification_mapping.values():
896
+ tags = info['tags']
897
+ for tag in tags:
898
+ osm_mappings = tag_osm_key_value_mapping.get(tag)
899
+ if osm_mappings:
900
+ for key, value in osm_mappings.items():
901
+ if value == '*':
902
+ osm_keys_values[key] = ['*'] # Match all values
903
+ else:
904
+ if osm_keys_values[key] != ['*'] and value not in osm_keys_values[key]:
905
+ osm_keys_values[key].append(value)
906
+
907
+ # Build Overpass API query parts for each key-value pair
908
+ query_parts = []
909
+ for key, values in osm_keys_values.items():
910
+ if values:
911
+ if values == ['*']:
912
+ # Query for any value of this key using bounding box
913
+ query_parts.append(f'way["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
914
+ query_parts.append(f'relation["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
915
+ else:
916
+ # Remove duplicate values
917
+ values = list(set(values))
918
+ # Build regex pattern for specific values
919
+ values_regex = '|'.join(values)
920
+ query_parts.append(f'way["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
921
+ query_parts.append(f'relation["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
922
+
923
+ # Combine query parts into complete Overpass query
924
+ query_body = "\n ".join(query_parts)
925
+ query = (
926
+ "[out:json];\n"
927
+ "(\n"
928
+ f" {query_body}\n"
929
+ ");\n"
930
+ "out body;\n"
931
+ ">;\n"
932
+ "out skel qt;"
933
+ )
934
+
935
+ # Overpass API endpoint
936
+ overpass_url = "http://overpass-api.de/api/interpreter"
937
+
938
+ # Fetch data from Overpass API
939
+ print("Fetching data from Overpass API...")
940
+ response = requests.get(overpass_url, params={'data': query})
941
+ response.raise_for_status()
942
+ data = response.json()
943
+
944
+ # Convert OSM data to GeoJSON format using our custom converter instead of json2geojson
945
+ print("Converting data to GeoJSON format...")
946
+ geojson_data = osm_json_to_geojson(data)
947
+
948
+ # Create shapely polygon from rectangle vertices (in lon,lat order)
949
+ rectangle_polygon = Polygon(rectangle_vertices)
950
+
951
+ # Calculate center point for projection
952
+ center_lat = sum(lat for lon, lat in rectangle_vertices) / len(rectangle_vertices)
953
+ center_lon = sum(lon for lon, lat in rectangle_vertices) / len(rectangle_vertices)
954
+
955
+ # Set up coordinate reference systems for projection
956
+ wgs84 = pyproj.CRS('EPSG:4326') # Standard lat/lon
957
+ # Albers Equal Area projection centered on area of interest
958
+ aea = pyproj.CRS(proj='aea', lat_1=rectangle_polygon.bounds[1], lat_2=rectangle_polygon.bounds[3], lat_0=center_lat, lon_0=center_lon)
959
+
960
+ # Create transformers for projecting coordinates
961
+ project = pyproj.Transformer.from_crs(wgs84, aea, always_xy=True).transform
962
+ project_back = pyproj.Transformer.from_crs(aea, wgs84, always_xy=True).transform
963
+
964
+ # Lists to store geometries and properties for GeoDataFrame
965
+ geometries = []
966
+ properties = []
967
+
968
+ for feature in geojson_data['features']:
969
+ # Convert feature geometry to shapely object
970
+ geom = shape(feature['geometry'])
971
+ if not (geom.is_valid and geom.intersects(rectangle_polygon)):
972
+ continue
973
+
974
+ # Get classification for feature
975
+ tags = feature['properties'].get('tags', {})
976
+ classification_code, classification_name = get_classification(tags)
977
+ if classification_code is None:
978
+ continue
979
+
980
+ # Special handling for roads
981
+ if classification_code == 11:
982
+ highway_value = tags.get('highway', '')
983
+ # Skip minor paths and walkways
984
+ if highway_value in ['footway', 'path', 'pedestrian', 'steps', 'cycleway', 'bridleway']:
985
+ continue
986
+
987
+ # Determine road width for buffering
988
+ width_value = tags.get('width')
989
+ lanes_value = tags.get('lanes')
990
+ buffer_distance = None
991
+
992
+ # Calculate buffer distance based on width or number of lanes
993
+ if width_value is not None:
994
+ try:
995
+ width_meters = float(width_value)
996
+ buffer_distance = width_meters / 2
997
+ except ValueError:
998
+ pass
999
+ elif lanes_value is not None:
1000
+ try:
1001
+ num_lanes = float(lanes_value)
1002
+ width_meters = num_lanes * 3.0 # 3m per lane
1003
+ buffer_distance = width_meters / 2
1004
+ except ValueError:
1005
+ pass
1006
+ else:
1007
+ # Default road width
1008
+ buffer_distance = 2.5 # 5m total width
1009
+
1010
+ if buffer_distance is None:
1011
+ continue
1012
+
1013
+ # Buffer line features to create polygons
1014
+ if geom.geom_type in ['LineString', 'MultiLineString']:
1015
+ # Project to planar CRS, buffer, and project back
1016
+ geom_proj = transform(project, geom)
1017
+ buffered_geom_proj = geom_proj.buffer(buffer_distance)
1018
+ buffered_geom = transform(project_back, buffered_geom_proj)
1019
+ # Clip to rectangle
1020
+ geom = buffered_geom.intersection(rectangle_polygon)
1021
+ else:
1022
+ continue
1023
+
1024
+ # Skip empty geometries
1025
+ if geom.is_empty:
1026
+ continue
1027
+
1028
+ # Add geometries and properties
1029
+ if geom.geom_type == 'Polygon':
1030
+ geometries.append(geom)
1031
+ properties.append({'class': classification_name})
1032
+ elif geom.geom_type == 'MultiPolygon':
1033
+ for poly in geom.geoms:
1034
+ geometries.append(poly)
1035
+ properties.append({'class': classification_name})
1036
+
1037
+ # Create GeoDataFrame
1038
+ gdf = gpd.GeoDataFrame(properties, geometry=geometries, crs="EPSG:4326")
1041
1039
  return gdf