voxcity 0.5.11__py3-none-any.whl → 0.5.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of voxcity might be problematic. Click here for more details.

voxcity/downloader/osm.py CHANGED
@@ -1,647 +1,955 @@
1
- """
2
- Module for downloading and processing OpenStreetMap data.
3
-
4
- This module provides functionality to download and process building footprints, land cover,
5
- and other geographic features from OpenStreetMap. It handles downloading data via the Overpass API,
6
- processing the responses, and converting them to standardized GeoJSON format with proper properties.
7
- """
8
-
9
- import requests
10
- from osm2geojson import json2geojson
11
- from shapely.geometry import Polygon, shape, mapping
12
- from shapely.ops import transform
13
- import pyproj
14
- from collections import defaultdict
15
- import requests
16
- import json
17
- from shapely.geometry import shape, mapping, Polygon
18
- from shapely.ops import transform
19
- import pyproj
20
- from osm2geojson import json2geojson
21
- import pandas as pd
22
- import geopandas as gpd
23
-
24
- def load_gdf_from_openstreetmap(rectangle_vertices):
25
- """Download and process building footprint data from OpenStreetMap.
26
-
27
- Args:
28
- rectangle_vertices: List of (lon, lat) coordinates defining the bounding box
29
-
30
- Returns:
31
- geopandas.GeoDataFrame: GeoDataFrame containing building footprints with standardized properties
32
- """
33
- # Create a bounding box from the rectangle vertices
34
- min_lon = min(v[0] for v in rectangle_vertices)
35
- max_lon = max(v[0] for v in rectangle_vertices)
36
- min_lat = min(v[1] for v in rectangle_vertices)
37
- max_lat = max(v[1] for v in rectangle_vertices)
38
-
39
- # Enhanced Overpass API query with recursive member extraction
40
- overpass_url = "http://overpass-api.de/api/interpreter"
41
- overpass_query = f"""
42
- [out:json];
43
- (
44
- way["building"]({min_lat},{min_lon},{max_lat},{max_lon});
45
- way["building:part"]({min_lat},{min_lon},{max_lat},{max_lon});
46
- relation["building"]({min_lat},{min_lon},{max_lat},{max_lon});
47
- way["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
48
- relation["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
49
- );
50
- (._; >;); // Recursively get all nodes, ways, and relations within relations
51
- out geom;
52
- """
53
-
54
- # Send the request to the Overpass API
55
- response = requests.get(overpass_url, params={'data': overpass_query})
56
- data = response.json()
57
-
58
- # Build a mapping from (type, id) to element
59
- id_map = {}
60
- for element in data['elements']:
61
- id_map[(element['type'], element['id'])] = element
62
-
63
- # Process the response and create features list
64
- features = []
65
-
66
- def process_coordinates(geometry):
67
- """Helper function to process and reverse coordinate pairs.
68
-
69
- Args:
70
- geometry: List of coordinate pairs to process
71
-
72
- Returns:
73
- list: Processed coordinate pairs with reversed order
74
- """
75
- return [coord for coord in geometry] # Keep original order since already (lon, lat)
76
-
77
- def get_height_from_properties(properties):
78
- """Helper function to extract height from properties.
79
-
80
- Args:
81
- properties: Dictionary of feature properties
82
-
83
- Returns:
84
- float: Extracted or calculated height value
85
- """
86
- height = properties.get('height', properties.get('building:height', None))
87
- if height is not None:
88
- try:
89
- return float(height)
90
- except ValueError:
91
- pass
92
-
93
- return 0 # Default height if no valid height found
94
-
95
- def extract_properties(element):
96
- """Helper function to extract and process properties from an element.
97
-
98
- Args:
99
- element: OSM element containing tags and properties
100
-
101
- Returns:
102
- dict: Processed properties dictionary
103
- """
104
- properties = element.get('tags', {})
105
-
106
- # Get height (now using the helper function)
107
- height = get_height_from_properties(properties)
108
-
109
- # Get min_height and min_level
110
- min_height = properties.get('min_height', '0')
111
- min_level = properties.get('building:min_level', properties.get('min_level', '0'))
112
- try:
113
- min_height = float(min_height)
114
- except ValueError:
115
- min_height = 0
116
-
117
- levels = properties.get('building:levels', properties.get('levels', None))
118
- try:
119
- levels = float(levels) if levels is not None else None
120
- except ValueError:
121
- levels = None
122
-
123
- # Extract additional properties, including those relevant to artworks
124
- extracted_props = {
125
- "id": element['id'],
126
- "height": height,
127
- "min_height": min_height,
128
- "confidence": -1.0,
129
- "is_inner": False,
130
- "levels": levels,
131
- "height_source": "explicit" if properties.get('height') or properties.get('building:height')
132
- else "levels" if levels is not None
133
- else "default",
134
- "min_level": min_level if min_level != '0' else None,
135
- "building": properties.get('building', 'no'),
136
- "building_part": properties.get('building:part', 'no'),
137
- "building_material": properties.get('building:material'),
138
- "building_colour": properties.get('building:colour'),
139
- "roof_shape": properties.get('roof:shape'),
140
- "roof_material": properties.get('roof:material'),
141
- "roof_angle": properties.get('roof:angle'),
142
- "roof_colour": properties.get('roof:colour'),
143
- "roof_direction": properties.get('roof:direction'),
144
- "architect": properties.get('architect'),
145
- "start_date": properties.get('start_date'),
146
- "name": properties.get('name'),
147
- "name:en": properties.get('name:en'),
148
- "name:es": properties.get('name:es'),
149
- "email": properties.get('email'),
150
- "phone": properties.get('phone'),
151
- "wheelchair": properties.get('wheelchair'),
152
- "tourism": properties.get('tourism'),
153
- "artwork_type": properties.get('artwork_type'),
154
- "area": properties.get('area'),
155
- "layer": properties.get('layer')
156
- }
157
-
158
- # Remove None values to keep the properties clean
159
- return {k: v for k, v in extracted_props.items() if v is not None}
160
-
161
- def create_polygon_feature(coords, properties, is_inner=False):
162
- """Helper function to create a polygon feature.
163
-
164
- Args:
165
- coords: List of coordinate pairs defining the polygon
166
- properties: Dictionary of feature properties
167
- is_inner: Boolean indicating if this is an inner ring
168
-
169
- Returns:
170
- dict: GeoJSON Feature object or None if invalid
171
- """
172
- if len(coords) >= 4:
173
- properties = properties.copy()
174
- properties["is_inner"] = is_inner
175
- return {
176
- "type": "Feature",
177
- "properties": properties,
178
- "geometry": {
179
- "type": "Polygon",
180
- "coordinates": [process_coordinates(coords)]
181
- }
182
- }
183
- return None
184
-
185
- # Process each element, handling relations and their way members
186
- for element in data['elements']:
187
- if element['type'] == 'way':
188
- if 'geometry' in element:
189
- coords = [(node['lon'], node['lat']) for node in element['geometry']]
190
- properties = extract_properties(element)
191
- feature = create_polygon_feature(coords, properties)
192
- if feature:
193
- features.append(feature)
194
-
195
- elif element['type'] == 'relation':
196
- properties = extract_properties(element)
197
-
198
- # Process each member of the relation
199
- for member in element['members']:
200
- if member['type'] == 'way':
201
- # Look up the way in id_map
202
- way = id_map.get(('way', member['ref']))
203
- if way and 'geometry' in way:
204
- coords = [(node['lon'], node['lat']) for node in way['geometry']]
205
- is_inner = member['role'] == 'inner'
206
- member_properties = properties.copy()
207
- member_properties['member_id'] = way['id'] # Include id of the way
208
- feature = create_polygon_feature(coords, member_properties, is_inner)
209
- if feature:
210
- feature['properties']['role'] = member['role']
211
- features.append(feature)
212
-
213
- # Convert features list to GeoDataFrame
214
- if not features:
215
- return gpd.GeoDataFrame()
216
-
217
- geometries = []
218
- properties_list = []
219
-
220
- for feature in features:
221
- geometries.append(shape(feature['geometry']))
222
- properties_list.append(feature['properties'])
223
-
224
- gdf = gpd.GeoDataFrame(properties_list, geometry=geometries, crs="EPSG:4326")
225
- return gdf
226
-
227
- def convert_feature(feature):
228
- """Convert a GeoJSON feature to the desired format with height information.
229
-
230
- Args:
231
- feature (dict): Input GeoJSON feature
232
-
233
- Returns:
234
- dict: Converted feature with height and confidence values, or None if invalid
235
- """
236
- new_feature = {}
237
- new_feature['type'] = 'Feature'
238
- new_feature['properties'] = {}
239
- new_feature['geometry'] = {}
240
-
241
- # Convert geometry
242
- geometry = feature['geometry']
243
- geom_type = geometry['type']
244
-
245
- # Convert MultiPolygon to Polygon if necessary
246
- if geom_type == 'MultiPolygon':
247
- # Flatten MultiPolygon to Polygon by taking the first polygon
248
- # Alternatively, you can merge all polygons into one if needed
249
- coordinates = geometry['coordinates'][0] # Take the first polygon
250
- if len(coordinates[0]) < 3:
251
- return None
252
- elif geom_type == 'Polygon':
253
- coordinates = geometry['coordinates']
254
- if len(coordinates[0]) < 3:
255
- return None
256
- else:
257
- # Skip features that are not polygons
258
- return None
259
-
260
- # Reformat coordinates: convert lists to tuples
261
- new_coordinates = []
262
- for ring in coordinates:
263
- new_ring = []
264
- for coord in ring:
265
- # Swap the order if needed (assuming original is [lat, lon])
266
- lat, lon = coord
267
- new_ring.append((lon, lat)) # Changed to (lon, lat)
268
- new_coordinates.append(new_ring)
269
-
270
- new_feature['geometry']['type'] = 'Polygon'
271
- new_feature['geometry']['coordinates'] = new_coordinates
272
-
273
- # Process properties
274
- properties = feature.get('properties', {})
275
- height = properties.get('height')
276
-
277
- # If height is not available, estimate it based on building levels
278
- if not height:
279
- levels = properties.get('building:levels')
280
- if levels:
281
- if type(levels)==str:
282
- # If levels is a string (invalid format), use default height
283
- height = 10.0 # Default height in meters
284
- else:
285
- # Calculate height based on number of levels
286
- height = float(levels) * 3.0 # Assume 3m per level
287
- else:
288
- # No level information available, use default height
289
- height = 10.0 # Default height in meters
290
-
291
- new_feature['properties']['height'] = float(height)
292
- new_feature['properties']['confidence'] = -1.0 # Confidence score for height estimate
293
-
294
- return new_feature
295
-
296
-
297
- # Classification mapping defines the land cover/use classes and their associated tags
298
- # The numbers (0-13) represent class codes used in the system
299
- classification_mapping = {
300
- 11: {'name': 'Road', 'tags': ['highway', 'road', 'path', 'track', 'street']},
301
- 12: {'name': 'Building', 'tags': ['building', 'house', 'apartment', 'commercial_building', 'industrial_building']},
302
- 10: {'name': 'Developed space', 'tags': ['industrial', 'retail', 'commercial', 'residential', 'construction', 'railway', 'parking', 'islet', 'island']},
303
- 0: {'name': 'Bareland', 'tags': ['quarry', 'brownfield', 'bare_rock', 'scree', 'shingle', 'rock', 'sand', 'desert', 'landfill', 'beach']},
304
- 1: {'name': 'Rangeland', 'tags': ['grass', 'meadow', 'grassland', 'heath', 'garden', 'park']},
305
- 2: {'name': 'Shrub', 'tags': ['scrub', 'shrubland', 'bush', 'thicket']},
306
- 3: {'name': 'Agriculture land', 'tags': ['farmland', 'orchard', 'vineyard', 'plant_nursery', 'greenhouse_horticulture', 'flowerbed', 'allotments', 'cropland']},
307
- 4: {'name': 'Tree', 'tags': ['wood', 'forest', 'tree', 'tree_row', 'tree_canopy']},
308
- 5: {'name': 'Moss and lichen', 'tags': ['moss', 'lichen', 'tundra_vegetation']},
309
- 6: {'name': 'Wet land', 'tags': ['wetland', 'marsh', 'swamp', 'bog', 'fen', 'flooded_vegetation']},
310
- 7: {'name': 'Mangrove', 'tags': ['mangrove', 'mangrove_forest', 'mangrove_swamp']},
311
- 8: {'name': 'Water', 'tags': ['water', 'waterway', 'reservoir', 'basin', 'bay', 'ocean', 'sea', 'river', 'lake']},
312
- 9: {'name': 'Snow and ice', 'tags': ['glacier', 'snow', 'ice', 'snowfield', 'ice_shelf']},
313
- 13: {'name': 'No Data', 'tags': ['unknown', 'no_data', 'clouds', 'undefined']}
314
- }
315
-
316
- # Maps classification tags to specific OSM key-value pairs
317
- # '*' means match any value for that key
318
- tag_osm_key_value_mapping = {
319
- # Road
320
- 'highway': {'highway': '*'},
321
- 'road': {'highway': '*'},
322
- 'path': {'highway': 'path'},
323
- 'track': {'highway': 'track'},
324
- 'street': {'highway': '*'},
325
-
326
- # Building
327
- 'building': {'building': '*'},
328
- 'house': {'building': 'house'},
329
- 'apartment': {'building': 'apartments'},
330
- 'commercial_building': {'building': 'commercial'},
331
- 'industrial_building': {'building': 'industrial'},
332
-
333
- # Developed space
334
- 'industrial': {'landuse': 'industrial'},
335
- 'retail': {'landuse': 'retail'},
336
- 'commercial': {'landuse': 'commercial'},
337
- 'residential': {'landuse': 'residential'},
338
- 'construction': {'landuse': 'construction'},
339
- 'railway': {'landuse': 'railway'},
340
- 'parking': {'amenity': 'parking'},
341
- 'islet': {'place': 'islet'},
342
- 'island': {'place': 'island'},
343
-
344
- # Bareland
345
- 'quarry': {'landuse': 'quarry'},
346
- 'brownfield': {'landuse': 'brownfield'},
347
- 'bare_rock': {'natural': 'bare_rock'},
348
- 'scree': {'natural': 'scree'},
349
- 'shingle': {'natural': 'shingle'},
350
- 'rock': {'natural': 'rock'},
351
- 'sand': {'natural': 'sand'},
352
- 'desert': {'natural': 'desert'},
353
- 'landfill': {'landuse': 'landfill'},
354
- 'beach': {'natural': 'beach'},
355
-
356
- # Rangeland
357
- 'grass': {'landuse': 'grass'},
358
- 'meadow': {'landuse': 'meadow'},
359
- 'grassland': {'natural': 'grassland'},
360
- 'heath': {'natural': 'heath'},
361
- 'garden': {'leisure': 'garden'},
362
- 'park': {'leisure': 'park'},
363
-
364
- # Shrub
365
- 'scrub': {'natural': 'scrub'},
366
- 'shrubland': {'natural': 'scrub'},
367
- 'bush': {'natural': 'scrub'},
368
- 'thicket': {'natural': 'scrub'},
369
-
370
- # Agriculture land
371
- 'farmland': {'landuse': 'farmland'},
372
- 'orchard': {'landuse': 'orchard'},
373
- 'vineyard': {'landuse': 'vineyard'},
374
- 'plant_nursery': {'landuse': 'plant_nursery'},
375
- 'greenhouse_horticulture': {'landuse': 'greenhouse_horticulture'},
376
- 'flowerbed': {'landuse': 'flowerbed'},
377
- 'allotments': {'landuse': 'allotments'},
378
- 'cropland': {'landuse': 'farmland'},
379
-
380
- # Tree
381
- 'wood': {'natural': 'wood'},
382
- 'forest': {'landuse': 'forest'},
383
- 'tree': {'natural': 'tree'},
384
- 'tree_row': {'natural': 'tree_row'},
385
- 'tree_canopy': {'natural': 'tree_canopy'},
386
-
387
- # Moss and lichen
388
- 'moss': {'natural': 'fell'},
389
- 'lichen': {'natural': 'fell'},
390
- 'tundra_vegetation': {'natural': 'fell'},
391
-
392
- # Wet land
393
- 'wetland': {'natural': 'wetland'},
394
- 'marsh': {'wetland': 'marsh'},
395
- 'swamp': {'wetland': 'swamp'},
396
- 'bog': {'wetland': 'bog'},
397
- 'fen': {'wetland': 'fen'},
398
- 'flooded_vegetation': {'natural': 'wetland'},
399
-
400
- # Mangrove
401
- 'mangrove': {'natural': 'wetland', 'wetland': 'mangrove'},
402
- 'mangrove_forest': {'natural': 'wetland', 'wetland': 'mangrove'},
403
- 'mangrove_swamp': {'natural': 'wetland', 'wetland': 'mangrove'},
404
-
405
- # Water
406
- 'water': {'natural': 'water'},
407
- 'waterway': {'waterway': '*'},
408
- 'reservoir': {'landuse': 'reservoir'},
409
- 'basin': {'landuse': 'basin'},
410
- 'bay': {'natural': 'bay'},
411
- 'ocean': {'natural': 'water', 'water': 'ocean'},
412
- 'sea': {'natural': 'water', 'water': 'sea'},
413
- 'river': {'waterway': 'river'},
414
- 'lake': {'natural': 'water', 'water': 'lake'},
415
-
416
- # Snow and ice
417
- 'glacier': {'natural': 'glacier'},
418
- 'snow': {'natural': 'glacier'},
419
- 'ice': {'natural': 'glacier'},
420
- 'snowfield': {'natural': 'glacier'},
421
- 'ice_shelf': {'natural': 'glacier'},
422
-
423
- # No Data
424
- 'unknown': {'FIXME': '*'},
425
- 'no_data': {'FIXME': '*'},
426
- 'clouds': {'natural': 'cloud'},
427
- 'undefined': {'FIXME': '*'}
428
- }
429
-
430
- def get_classification(tags):
431
- """Determine the classification code and name for a feature based on its OSM tags.
432
-
433
- Args:
434
- tags (dict): Dictionary of OSM tags
435
-
436
- Returns:
437
- tuple: (classification_code, classification_name) or (None, None) if no match
438
- """
439
- # Iterate through each classification code and its associated info
440
- for code, info in classification_mapping.items():
441
- # Check each tag associated with this classification
442
- for tag in info['tags']:
443
- osm_mappings = tag_osm_key_value_mapping.get(tag)
444
- if osm_mappings:
445
- # Check if the feature's tags match any of the OSM key-value pairs
446
- for key, value in osm_mappings.items():
447
- if key in tags:
448
- if value == '*' or tags[key] == value:
449
- return code, info['name']
450
- # Special case for islets and islands
451
- if tag in ['islet', 'island'] and tags.get('place') == tag:
452
- return code, info['name']
453
- # Special case for roads mapped as areas
454
- if 'area:highway' in tags:
455
- return 11, 'Road'
456
- return None, None
457
-
458
- def swap_coordinates(geom_mapping):
459
- """Swap coordinates from (lon, lat) to (lat, lon) order.
460
-
461
- Args:
462
- geom_mapping (dict): GeoJSON geometry object
463
-
464
- Returns:
465
- dict: Geometry with swapped coordinates
466
- """
467
- coords = geom_mapping['coordinates']
468
-
469
- def swap_coords(coord_list):
470
- # Recursively swap coordinates for nested lists
471
- if isinstance(coord_list[0], (list, tuple)):
472
- return [swap_coords(c) for c in coord_list]
473
- else:
474
- # Keep original order since already (lon, lat)
475
- return coord_list
476
-
477
- geom_mapping['coordinates'] = swap_coords(coords)
478
- return geom_mapping
479
-
480
- def load_land_cover_gdf_from_osm(rectangle_vertices_ori):
481
- """Load land cover data from OpenStreetMap within a given rectangular area.
482
-
483
- Args:
484
- rectangle_vertices_ori (list): List of (lon, lat) coordinates defining the rectangle
485
-
486
- Returns:
487
- GeoDataFrame: GeoDataFrame containing land cover classifications
488
- """
489
- # Close the rectangle polygon by adding first vertex at the end
490
- rectangle_vertices = rectangle_vertices_ori.copy()
491
- rectangle_vertices.append(rectangle_vertices_ori[0])
492
-
493
- # Instead of using poly:"lat lon lat lon...", use area coordinates
494
- min_lat = min(lat for lon, lat in rectangle_vertices)
495
- max_lat = max(lat for lon, lat in rectangle_vertices)
496
- min_lon = min(lon for lon, lat in rectangle_vertices)
497
- max_lon = max(lon for lon, lat in rectangle_vertices)
498
-
499
- # Initialize dictionary to store OSM keys and their allowed values
500
- osm_keys_values = defaultdict(list)
501
-
502
- # Build mapping of OSM keys to their possible values from classification mapping
503
- for info in classification_mapping.values():
504
- tags = info['tags']
505
- for tag in tags:
506
- osm_mappings = tag_osm_key_value_mapping.get(tag)
507
- if osm_mappings:
508
- for key, value in osm_mappings.items():
509
- if value == '*':
510
- osm_keys_values[key] = ['*'] # Match all values
511
- else:
512
- if osm_keys_values[key] != ['*'] and value not in osm_keys_values[key]:
513
- osm_keys_values[key].append(value)
514
-
515
- # Build Overpass API query parts for each key-value pair
516
- query_parts = []
517
- for key, values in osm_keys_values.items():
518
- if values:
519
- if values == ['*']:
520
- # Query for any value of this key using bounding box
521
- query_parts.append(f'way["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
522
- query_parts.append(f'relation["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
523
- else:
524
- # Remove duplicate values
525
- values = list(set(values))
526
- # Build regex pattern for specific values
527
- values_regex = '|'.join(values)
528
- query_parts.append(f'way["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
529
- query_parts.append(f'relation["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
530
-
531
- # Combine query parts into complete Overpass query
532
- query_body = "\n ".join(query_parts)
533
- query = (
534
- "[out:json];\n"
535
- "(\n"
536
- f" {query_body}\n"
537
- ");\n"
538
- "out body;\n"
539
- ">;\n"
540
- "out skel qt;"
541
- )
542
-
543
- # Overpass API endpoint
544
- overpass_url = "http://overpass-api.de/api/interpreter"
545
-
546
- # Fetch data from Overpass API
547
- print("Fetching data from Overpass API...")
548
- response = requests.get(overpass_url, params={'data': query})
549
- response.raise_for_status()
550
- data = response.json()
551
-
552
- # Convert OSM data to GeoJSON format
553
- print("Converting data to GeoJSON format...")
554
- geojson_data = json2geojson(data)
555
-
556
- # Create shapely polygon from rectangle vertices (in lon,lat order)
557
- rectangle_polygon = Polygon(rectangle_vertices)
558
-
559
- # Calculate center point for projection
560
- center_lat = sum(lat for lon, lat in rectangle_vertices) / len(rectangle_vertices)
561
- center_lon = sum(lon for lon, lat in rectangle_vertices) / len(rectangle_vertices)
562
-
563
- # Set up coordinate reference systems for projection
564
- wgs84 = pyproj.CRS('EPSG:4326') # Standard lat/lon
565
- # Albers Equal Area projection centered on area of interest
566
- aea = pyproj.CRS(proj='aea', lat_1=rectangle_polygon.bounds[1], lat_2=rectangle_polygon.bounds[3], lat_0=center_lat, lon_0=center_lon)
567
-
568
- # Create transformers for projecting coordinates
569
- project = pyproj.Transformer.from_crs(wgs84, aea, always_xy=True).transform
570
- project_back = pyproj.Transformer.from_crs(aea, wgs84, always_xy=True).transform
571
-
572
- # Lists to store geometries and properties for GeoDataFrame
573
- geometries = []
574
- properties = []
575
-
576
- for feature in geojson_data['features']:
577
- # Convert feature geometry to shapely object
578
- geom = shape(feature['geometry'])
579
- if not (geom.is_valid and geom.intersects(rectangle_polygon)):
580
- continue
581
-
582
- # Get classification for feature
583
- tags = feature['properties'].get('tags', {})
584
- classification_code, classification_name = get_classification(tags)
585
- if classification_code is None:
586
- continue
587
-
588
- # Special handling for roads
589
- if classification_code == 11:
590
- highway_value = tags.get('highway', '')
591
- # Skip minor paths and walkways
592
- if highway_value in ['footway', 'path', 'pedestrian', 'steps', 'cycleway', 'bridleway']:
593
- continue
594
-
595
- # Determine road width for buffering
596
- width_value = tags.get('width')
597
- lanes_value = tags.get('lanes')
598
- buffer_distance = None
599
-
600
- # Calculate buffer distance based on width or number of lanes
601
- if width_value is not None:
602
- try:
603
- width_meters = float(width_value)
604
- buffer_distance = width_meters / 2
605
- except ValueError:
606
- pass
607
- elif lanes_value is not None:
608
- try:
609
- num_lanes = float(lanes_value)
610
- width_meters = num_lanes * 3.0 # 3m per lane
611
- buffer_distance = width_meters / 2
612
- except ValueError:
613
- pass
614
- else:
615
- # Default road width
616
- buffer_distance = 2.5 # 5m total width
617
-
618
- if buffer_distance is None:
619
- continue
620
-
621
- # Buffer line features to create polygons
622
- if geom.geom_type in ['LineString', 'MultiLineString']:
623
- # Project to planar CRS, buffer, and project back
624
- geom_proj = transform(project, geom)
625
- buffered_geom_proj = geom_proj.buffer(buffer_distance)
626
- buffered_geom = transform(project_back, buffered_geom_proj)
627
- # Clip to rectangle
628
- geom = buffered_geom.intersection(rectangle_polygon)
629
- else:
630
- continue
631
-
632
- # Skip empty geometries
633
- if geom.is_empty:
634
- continue
635
-
636
- # Add geometries and properties
637
- if geom.geom_type == 'Polygon':
638
- geometries.append(geom)
639
- properties.append({'class': classification_name})
640
- elif geom.geom_type == 'MultiPolygon':
641
- for poly in geom.geoms:
642
- geometries.append(poly)
643
- properties.append({'class': classification_name})
644
-
645
- # Create GeoDataFrame
646
- gdf = gpd.GeoDataFrame(properties, geometry=geometries, crs="EPSG:4326")
1
+ """
2
+ Module for downloading and processing OpenStreetMap data.
3
+
4
+ This module provides functionality to download and process building footprints, land cover,
5
+ and other geographic features from OpenStreetMap. It handles downloading data via the Overpass API,
6
+ processing the responses, and converting them to standardized GeoJSON format with proper properties.
7
+ """
8
+
9
+ import requests
10
+ from shapely.geometry import Polygon, shape, mapping
11
+ from shapely.ops import transform
12
+ import pyproj
13
+ from collections import defaultdict
14
+ import requests
15
+ import json
16
+ from shapely.geometry import shape, mapping, Polygon, LineString, Point, MultiPolygon
17
+ from shapely.ops import transform
18
+ import pyproj
19
+ import pandas as pd
20
+ import geopandas as gpd
21
+
22
+ def osm_json_to_geojson(osm_data):
23
+ """
24
+ Convert OSM JSON data to GeoJSON format with proper handling of complex relations.
25
+
26
+ Args:
27
+ osm_data (dict): OSM JSON data from Overpass API
28
+
29
+ Returns:
30
+ dict: GeoJSON FeatureCollection
31
+ """
32
+ features = []
33
+
34
+ # Create a mapping of node IDs to their coordinates
35
+ nodes = {}
36
+ ways = {}
37
+
38
+ # First pass: index all nodes and ways
39
+ for element in osm_data['elements']:
40
+ if element['type'] == 'node':
41
+ nodes[element['id']] = (element['lon'], element['lat'])
42
+ elif element['type'] == 'way':
43
+ ways[element['id']] = element
44
+
45
+ # Second pass: generate features
46
+ for element in osm_data['elements']:
47
+ if element['type'] == 'node' and 'tags' in element and element['tags']:
48
+ # Convert POI nodes to Point features
49
+ feature = {
50
+ 'type': 'Feature',
51
+ 'properties': {
52
+ 'id': element['id'],
53
+ 'type': 'node',
54
+ 'tags': element.get('tags', {})
55
+ },
56
+ 'geometry': {
57
+ 'type': 'Point',
58
+ 'coordinates': [element['lon'], element['lat']]
59
+ }
60
+ }
61
+ features.append(feature)
62
+
63
+ elif element['type'] == 'way' and 'nodes' in element:
64
+ # Skip ways that are part of relations - we'll handle those in relation processing
65
+ if is_part_of_relation(element['id'], osm_data):
66
+ continue
67
+
68
+ # Process standalone way
69
+ coords = get_way_coords(element, nodes)
70
+ if not coords or len(coords) < 2:
71
+ continue
72
+
73
+ # Determine if it's a polygon or a line
74
+ is_polygon = is_way_polygon(element)
75
+
76
+ # Make sure polygons have valid geometry (closed loop with at least 4 points)
77
+ if is_polygon:
78
+ # For closed ways, make sure first and last coordinates are the same
79
+ if coords[0] != coords[-1]:
80
+ coords.append(coords[0])
81
+
82
+ # Check if we have enough coordinates for a valid polygon (at least 4)
83
+ if len(coords) < 4:
84
+ # Not enough coordinates for a polygon, convert to LineString
85
+ is_polygon = False
86
+
87
+ feature = {
88
+ 'type': 'Feature',
89
+ 'properties': {
90
+ 'id': element['id'],
91
+ 'type': 'way',
92
+ 'tags': element.get('tags', {})
93
+ },
94
+ 'geometry': {
95
+ 'type': 'Polygon' if is_polygon else 'LineString',
96
+ 'coordinates': [coords] if is_polygon else coords
97
+ }
98
+ }
99
+ features.append(feature)
100
+
101
+ elif element['type'] == 'relation' and 'members' in element and 'tags' in element:
102
+ tags = element.get('tags', {})
103
+
104
+ # Process multipolygon relations
105
+ if tags.get('type') == 'multipolygon' or any(key in tags for key in ['natural', 'water', 'waterway']):
106
+ # Group member ways by role
107
+ members_by_role = {'outer': [], 'inner': []}
108
+
109
+ for member in element['members']:
110
+ if member['type'] == 'way' and member['ref'] in ways:
111
+ role = member['role']
112
+ if role not in ['outer', 'inner']:
113
+ role = 'outer' # Default to outer if role not specified
114
+ members_by_role[role].append(member['ref'])
115
+
116
+ # Skip if no outer members
117
+ if not members_by_role['outer']:
118
+ continue
119
+
120
+ # Create rings from member ways
121
+ outer_rings = create_rings_from_ways(members_by_role['outer'], ways, nodes)
122
+ inner_rings = create_rings_from_ways(members_by_role['inner'], ways, nodes)
123
+
124
+ # Skip if no valid outer rings
125
+ if not outer_rings:
126
+ continue
127
+
128
+ # Create feature based on number of outer rings
129
+ if len(outer_rings) == 1:
130
+ # Single polygon with possible inner rings
131
+ feature = {
132
+ 'type': 'Feature',
133
+ 'properties': {
134
+ 'id': element['id'],
135
+ 'type': 'relation',
136
+ 'tags': tags
137
+ },
138
+ 'geometry': {
139
+ 'type': 'Polygon',
140
+ 'coordinates': [outer_rings[0]] + inner_rings
141
+ }
142
+ }
143
+ else:
144
+ # MultiPolygon
145
+ # Each outer ring forms a polygon, and we assign inner rings to each polygon
146
+ # This is a simplification - proper assignment would check for containment
147
+ multipolygon_coords = []
148
+ for outer_ring in outer_rings:
149
+ polygon_coords = [outer_ring]
150
+ # For simplicity, assign all inner rings to the first polygon
151
+ # A more accurate implementation would check which outer ring contains each inner ring
152
+ if len(multipolygon_coords) == 0:
153
+ polygon_coords.extend(inner_rings)
154
+ multipolygon_coords.append(polygon_coords)
155
+
156
+ feature = {
157
+ 'type': 'Feature',
158
+ 'properties': {
159
+ 'id': element['id'],
160
+ 'type': 'relation',
161
+ 'tags': tags
162
+ },
163
+ 'geometry': {
164
+ 'type': 'MultiPolygon',
165
+ 'coordinates': multipolygon_coords
166
+ }
167
+ }
168
+
169
+ features.append(feature)
170
+
171
+ return {
172
+ 'type': 'FeatureCollection',
173
+ 'features': features
174
+ }
175
+
176
+ def is_part_of_relation(way_id, osm_data):
177
+ """Check if a way is part of any relation."""
178
+ for element in osm_data['elements']:
179
+ if element['type'] == 'relation' and 'members' in element:
180
+ for member in element['members']:
181
+ if member['type'] == 'way' and member['ref'] == way_id:
182
+ return True
183
+ return False
184
+
185
+ def is_way_polygon(way):
186
+ """Determine if a way should be treated as a polygon."""
187
+ # Check if the way is closed (first and last nodes are the same)
188
+ if 'nodes' in way and way['nodes'][0] == way['nodes'][-1]:
189
+ # Check for tags that indicate this is an area
190
+ if 'tags' in way:
191
+ tags = way['tags']
192
+ if 'building' in tags or ('area' in tags and tags['area'] == 'yes'):
193
+ return True
194
+ if any(k in tags for k in ['landuse', 'natural', 'water', 'leisure', 'amenity']):
195
+ return True
196
+ return False
197
+
198
+ def get_way_coords(way, nodes):
199
+ """Get coordinates for a way."""
200
+ coords = []
201
+ if 'nodes' not in way:
202
+ return coords
203
+
204
+ for node_id in way['nodes']:
205
+ if node_id in nodes:
206
+ coords.append(nodes[node_id])
207
+ else:
208
+ # Missing node - skip this way
209
+ return []
210
+
211
+ return coords
212
+
213
+ def create_rings_from_ways(way_ids, ways, nodes):
214
+ """
215
+ Create continuous rings by connecting ways.
216
+
217
+ Args:
218
+ way_ids: List of way IDs that make up the ring(s)
219
+ ways: Dictionary mapping way IDs to way elements
220
+ nodes: Dictionary mapping node IDs to coordinates
221
+
222
+ Returns:
223
+ List of rings, where each ring is a list of coordinates
224
+ """
225
+ if not way_ids:
226
+ return []
227
+
228
+ # Extract node IDs for each way
229
+ way_nodes = {}
230
+ for way_id in way_ids:
231
+ if way_id in ways and 'nodes' in ways[way_id]:
232
+ way_nodes[way_id] = ways[way_id]['nodes']
233
+
234
+ # If we have no valid ways, return empty list
235
+ if not way_nodes:
236
+ return []
237
+
238
+ # Connect the ways to form rings
239
+ rings = []
240
+ unused_ways = set(way_nodes.keys())
241
+
242
+ while unused_ways:
243
+ # Start a new ring with the first unused way
244
+ current_way_id = next(iter(unused_ways))
245
+ unused_ways.remove(current_way_id)
246
+
247
+ # Get the first and last node IDs of the current way
248
+ current_nodes = way_nodes[current_way_id]
249
+ if not current_nodes:
250
+ continue
251
+
252
+ # Start building a ring with the nodes of the first way
253
+ ring_nodes = list(current_nodes)
254
+
255
+ # Try to connect more ways to complete the ring
256
+ connected = True
257
+ while connected and unused_ways:
258
+ connected = False
259
+
260
+ # Get the first and last nodes of the current ring
261
+ first_node = ring_nodes[0]
262
+ last_node = ring_nodes[-1]
263
+
264
+ # Try to find a way that connects to either end of our ring
265
+ for way_id in list(unused_ways):
266
+ nodes_in_way = way_nodes[way_id]
267
+ if not nodes_in_way:
268
+ unused_ways.remove(way_id)
269
+ continue
270
+
271
+ # Check if this way connects at the start of our ring
272
+ if nodes_in_way[-1] == first_node:
273
+ # This way connects to the start of our ring (reversed)
274
+ ring_nodes = nodes_in_way[:-1] + ring_nodes
275
+ unused_ways.remove(way_id)
276
+ connected = True
277
+ break
278
+ elif nodes_in_way[0] == first_node:
279
+ # This way connects to the start of our ring
280
+ ring_nodes = list(reversed(nodes_in_way))[:-1] + ring_nodes
281
+ unused_ways.remove(way_id)
282
+ connected = True
283
+ break
284
+ # Check if this way connects at the end of our ring
285
+ elif nodes_in_way[0] == last_node:
286
+ # This way connects to the end of our ring
287
+ ring_nodes.extend(nodes_in_way[1:])
288
+ unused_ways.remove(way_id)
289
+ connected = True
290
+ break
291
+ elif nodes_in_way[-1] == last_node:
292
+ # This way connects to the end of our ring (reversed)
293
+ ring_nodes.extend(list(reversed(nodes_in_way))[1:])
294
+ unused_ways.remove(way_id)
295
+ connected = True
296
+ break
297
+
298
+ # Check if the ring is closed (first node equals last node)
299
+ if ring_nodes and ring_nodes[0] == ring_nodes[-1] and len(ring_nodes) >= 4:
300
+ # Convert node IDs to coordinates
301
+ ring_coords = []
302
+ for node_id in ring_nodes:
303
+ if node_id in nodes:
304
+ ring_coords.append(nodes[node_id])
305
+ else:
306
+ # Missing node - skip this ring
307
+ ring_coords = []
308
+ break
309
+
310
+ if ring_coords and len(ring_coords) >= 4:
311
+ rings.append(ring_coords)
312
+ else:
313
+ # Try to close the ring if it's almost complete
314
+ if ring_nodes and len(ring_nodes) >= 3 and ring_nodes[0] != ring_nodes[-1]:
315
+ ring_nodes.append(ring_nodes[0])
316
+
317
+ # Convert node IDs to coordinates
318
+ ring_coords = []
319
+ for node_id in ring_nodes:
320
+ if node_id in nodes:
321
+ ring_coords.append(nodes[node_id])
322
+ else:
323
+ # Missing node - skip this ring
324
+ ring_coords = []
325
+ break
326
+
327
+ if ring_coords and len(ring_coords) >= 4:
328
+ rings.append(ring_coords)
329
+
330
+ return rings
331
+
332
+ def load_gdf_from_openstreetmap(rectangle_vertices):
333
+ """Download and process building footprint data from OpenStreetMap.
334
+
335
+ Args:
336
+ rectangle_vertices: List of (lon, lat) coordinates defining the bounding box
337
+
338
+ Returns:
339
+ geopandas.GeoDataFrame: GeoDataFrame containing building footprints with standardized properties
340
+ """
341
+ # Create a bounding box from the rectangle vertices
342
+ min_lon = min(v[0] for v in rectangle_vertices)
343
+ max_lon = max(v[0] for v in rectangle_vertices)
344
+ min_lat = min(v[1] for v in rectangle_vertices)
345
+ max_lat = max(v[1] for v in rectangle_vertices)
346
+
347
+ # Enhanced Overpass API query with recursive member extraction
348
+ overpass_url = "http://overpass-api.de/api/interpreter"
349
+ overpass_query = f"""
350
+ [out:json];
351
+ (
352
+ way["building"]({min_lat},{min_lon},{max_lat},{max_lon});
353
+ way["building:part"]({min_lat},{min_lon},{max_lat},{max_lon});
354
+ relation["building"]({min_lat},{min_lon},{max_lat},{max_lon});
355
+ way["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
356
+ relation["tourism"="artwork"]["area"="yes"]({min_lat},{min_lon},{max_lat},{max_lon});
357
+ );
358
+ (._; >;); // Recursively get all nodes, ways, and relations within relations
359
+ out geom;
360
+ """
361
+
362
+ # Send the request to the Overpass API
363
+ response = requests.get(overpass_url, params={'data': overpass_query})
364
+ data = response.json()
365
+
366
+ # Build a mapping from (type, id) to element
367
+ id_map = {}
368
+ for element in data['elements']:
369
+ id_map[(element['type'], element['id'])] = element
370
+
371
+ # Process the response and create features list
372
+ features = []
373
+
374
+ def process_coordinates(geometry):
375
+ """Helper function to process and reverse coordinate pairs.
376
+
377
+ Args:
378
+ geometry: List of coordinate pairs to process
379
+
380
+ Returns:
381
+ list: Processed coordinate pairs with reversed order
382
+ """
383
+ return [coord for coord in geometry] # Keep original order since already (lon, lat)
384
+
385
+ def get_height_from_properties(properties):
386
+ """Helper function to extract height from properties.
387
+
388
+ Args:
389
+ properties: Dictionary of feature properties
390
+
391
+ Returns:
392
+ float: Extracted or calculated height value
393
+ """
394
+ height = properties.get('height', properties.get('building:height', None))
395
+ if height is not None:
396
+ try:
397
+ return float(height)
398
+ except ValueError:
399
+ pass
400
+
401
+ return 0 # Default height if no valid height found
402
+
403
+ def extract_properties(element):
404
+ """Helper function to extract and process properties from an element.
405
+
406
+ Args:
407
+ element: OSM element containing tags and properties
408
+
409
+ Returns:
410
+ dict: Processed properties dictionary
411
+ """
412
+ properties = element.get('tags', {})
413
+
414
+ # Get height (now using the helper function)
415
+ height = get_height_from_properties(properties)
416
+
417
+ # Get min_height and min_level
418
+ min_height = properties.get('min_height', '0')
419
+ min_level = properties.get('building:min_level', properties.get('min_level', '0'))
420
+ try:
421
+ min_height = float(min_height)
422
+ except ValueError:
423
+ min_height = 0
424
+
425
+ levels = properties.get('building:levels', properties.get('levels', None))
426
+ try:
427
+ levels = float(levels) if levels is not None else None
428
+ except ValueError:
429
+ levels = None
430
+
431
+ # Extract additional properties, including those relevant to artworks
432
+ extracted_props = {
433
+ "id": element['id'],
434
+ "height": height,
435
+ "min_height": min_height,
436
+ "confidence": -1.0,
437
+ "is_inner": False,
438
+ "levels": levels,
439
+ "height_source": "explicit" if properties.get('height') or properties.get('building:height')
440
+ else "levels" if levels is not None
441
+ else "default",
442
+ "min_level": min_level if min_level != '0' else None,
443
+ "building": properties.get('building', 'no'),
444
+ "building_part": properties.get('building:part', 'no'),
445
+ "building_material": properties.get('building:material'),
446
+ "building_colour": properties.get('building:colour'),
447
+ "roof_shape": properties.get('roof:shape'),
448
+ "roof_material": properties.get('roof:material'),
449
+ "roof_angle": properties.get('roof:angle'),
450
+ "roof_colour": properties.get('roof:colour'),
451
+ "roof_direction": properties.get('roof:direction'),
452
+ "architect": properties.get('architect'),
453
+ "start_date": properties.get('start_date'),
454
+ "name": properties.get('name'),
455
+ "name:en": properties.get('name:en'),
456
+ "name:es": properties.get('name:es'),
457
+ "email": properties.get('email'),
458
+ "phone": properties.get('phone'),
459
+ "wheelchair": properties.get('wheelchair'),
460
+ "tourism": properties.get('tourism'),
461
+ "artwork_type": properties.get('artwork_type'),
462
+ "area": properties.get('area'),
463
+ "layer": properties.get('layer')
464
+ }
465
+
466
+ # Remove None values to keep the properties clean
467
+ return {k: v for k, v in extracted_props.items() if v is not None}
468
+
469
+ def create_polygon_feature(coords, properties, is_inner=False):
470
+ """Helper function to create a polygon feature.
471
+
472
+ Args:
473
+ coords: List of coordinate pairs defining the polygon
474
+ properties: Dictionary of feature properties
475
+ is_inner: Boolean indicating if this is an inner ring
476
+
477
+ Returns:
478
+ dict: GeoJSON Feature object or None if invalid
479
+ """
480
+ if len(coords) >= 4:
481
+ properties = properties.copy()
482
+ properties["is_inner"] = is_inner
483
+ return {
484
+ "type": "Feature",
485
+ "properties": properties,
486
+ "geometry": {
487
+ "type": "Polygon",
488
+ "coordinates": [process_coordinates(coords)]
489
+ }
490
+ }
491
+ return None
492
+
493
+ # Process each element, handling relations and their way members
494
+ for element in data['elements']:
495
+ if element['type'] == 'way':
496
+ if 'geometry' in element:
497
+ coords = [(node['lon'], node['lat']) for node in element['geometry']]
498
+ properties = extract_properties(element)
499
+ feature = create_polygon_feature(coords, properties)
500
+ if feature:
501
+ features.append(feature)
502
+
503
+ elif element['type'] == 'relation':
504
+ properties = extract_properties(element)
505
+
506
+ # Process each member of the relation
507
+ for member in element['members']:
508
+ if member['type'] == 'way':
509
+ # Look up the way in id_map
510
+ way = id_map.get(('way', member['ref']))
511
+ if way and 'geometry' in way:
512
+ coords = [(node['lon'], node['lat']) for node in way['geometry']]
513
+ is_inner = member['role'] == 'inner'
514
+ member_properties = properties.copy()
515
+ member_properties['member_id'] = way['id'] # Include id of the way
516
+ feature = create_polygon_feature(coords, member_properties, is_inner)
517
+ if feature:
518
+ feature['properties']['role'] = member['role']
519
+ features.append(feature)
520
+
521
+ # Convert features list to GeoDataFrame
522
+ if not features:
523
+ return gpd.GeoDataFrame()
524
+
525
+ geometries = []
526
+ properties_list = []
527
+
528
+ for feature in features:
529
+ geometries.append(shape(feature['geometry']))
530
+ properties_list.append(feature['properties'])
531
+
532
+ gdf = gpd.GeoDataFrame(properties_list, geometry=geometries, crs="EPSG:4326")
533
+ return gdf
534
+
535
+ def convert_feature(feature):
536
+ """Convert a GeoJSON feature to the desired format with height information.
537
+
538
+ Args:
539
+ feature (dict): Input GeoJSON feature
540
+
541
+ Returns:
542
+ dict: Converted feature with height and confidence values, or None if invalid
543
+ """
544
+ new_feature = {}
545
+ new_feature['type'] = 'Feature'
546
+ new_feature['properties'] = {}
547
+ new_feature['geometry'] = {}
548
+
549
+ # Convert geometry
550
+ geometry = feature['geometry']
551
+ geom_type = geometry['type']
552
+
553
+ # Convert MultiPolygon to Polygon if necessary
554
+ if geom_type == 'MultiPolygon':
555
+ # Flatten MultiPolygon to Polygon by taking the first polygon
556
+ # Alternatively, you can merge all polygons into one if needed
557
+ coordinates = geometry['coordinates'][0] # Take the first polygon
558
+ if len(coordinates[0]) < 3:
559
+ return None
560
+ elif geom_type == 'Polygon':
561
+ coordinates = geometry['coordinates']
562
+ if len(coordinates[0]) < 3:
563
+ return None
564
+ else:
565
+ # Skip features that are not polygons
566
+ return None
567
+
568
+ # Reformat coordinates: convert lists to tuples
569
+ new_coordinates = []
570
+ for ring in coordinates:
571
+ new_ring = []
572
+ for coord in ring:
573
+ # Swap the order if needed (assuming original is [lat, lon])
574
+ lat, lon = coord
575
+ new_ring.append((lon, lat)) # Changed to (lon, lat)
576
+ new_coordinates.append(new_ring)
577
+
578
+ new_feature['geometry']['type'] = 'Polygon'
579
+ new_feature['geometry']['coordinates'] = new_coordinates
580
+
581
+ # Process properties
582
+ properties = feature.get('properties', {})
583
+ height = properties.get('height')
584
+
585
+ # If height is not available, estimate it based on building levels
586
+ if not height:
587
+ levels = properties.get('building:levels')
588
+ if levels:
589
+ if type(levels)==str:
590
+ # If levels is a string (invalid format), use default height
591
+ height = 10.0 # Default height in meters
592
+ else:
593
+ # Calculate height based on number of levels
594
+ height = float(levels) * 3.0 # Assume 3m per level
595
+ else:
596
+ # No level information available, use default height
597
+ height = 10.0 # Default height in meters
598
+
599
+ new_feature['properties']['height'] = float(height)
600
+ new_feature['properties']['confidence'] = -1.0 # Confidence score for height estimate
601
+
602
+ return new_feature
603
+
604
+
605
+ # Classification mapping defines the land cover/use classes and their associated tags
606
+ # The numbers (0-13) represent class codes used in the system
607
+ classification_mapping = {
608
+ 11: {'name': 'Road', 'tags': ['highway', 'road', 'path', 'track', 'street']},
609
+ 12: {'name': 'Building', 'tags': ['building', 'house', 'apartment', 'commercial_building', 'industrial_building']},
610
+ 10: {'name': 'Developed space', 'tags': ['industrial', 'retail', 'commercial', 'residential', 'construction', 'railway', 'parking', 'islet', 'island']},
611
+ 0: {'name': 'Bareland', 'tags': ['quarry', 'brownfield', 'bare_rock', 'scree', 'shingle', 'rock', 'sand', 'desert', 'landfill', 'beach']},
612
+ 1: {'name': 'Rangeland', 'tags': ['grass', 'meadow', 'grassland', 'heath', 'garden', 'park']},
613
+ 2: {'name': 'Shrub', 'tags': ['scrub', 'shrubland', 'bush', 'thicket']},
614
+ 3: {'name': 'Agriculture land', 'tags': ['farmland', 'orchard', 'vineyard', 'plant_nursery', 'greenhouse_horticulture', 'flowerbed', 'allotments', 'cropland']},
615
+ 4: {'name': 'Tree', 'tags': ['wood', 'forest', 'tree', 'tree_row', 'tree_canopy']},
616
+ 5: {'name': 'Moss and lichen', 'tags': ['moss', 'lichen', 'tundra_vegetation']},
617
+ 6: {'name': 'Wet land', 'tags': ['wetland', 'marsh', 'swamp', 'bog', 'fen', 'flooded_vegetation']},
618
+ 7: {'name': 'Mangrove', 'tags': ['mangrove', 'mangrove_forest', 'mangrove_swamp']},
619
+ 8: {'name': 'Water', 'tags': ['water', 'waterway', 'reservoir', 'basin', 'bay', 'ocean', 'sea', 'river', 'lake']},
620
+ 9: {'name': 'Snow and ice', 'tags': ['glacier', 'snow', 'ice', 'snowfield', 'ice_shelf']},
621
+ 13: {'name': 'No Data', 'tags': ['unknown', 'no_data', 'clouds', 'undefined']}
622
+ }
623
+
624
+ # Maps classification tags to specific OSM key-value pairs
625
+ # '*' means match any value for that key
626
+ tag_osm_key_value_mapping = {
627
+ # Road
628
+ 'highway': {'highway': '*'},
629
+ 'road': {'highway': '*'},
630
+ 'path': {'highway': 'path'},
631
+ 'track': {'highway': 'track'},
632
+ 'street': {'highway': '*'},
633
+
634
+ # Building
635
+ 'building': {'building': '*'},
636
+ 'house': {'building': 'house'},
637
+ 'apartment': {'building': 'apartments'},
638
+ 'commercial_building': {'building': 'commercial'},
639
+ 'industrial_building': {'building': 'industrial'},
640
+
641
+ # Developed space
642
+ 'industrial': {'landuse': 'industrial'},
643
+ 'retail': {'landuse': 'retail'},
644
+ 'commercial': {'landuse': 'commercial'},
645
+ 'residential': {'landuse': 'residential'},
646
+ 'construction': {'landuse': 'construction'},
647
+ 'railway': {'landuse': 'railway'},
648
+ 'parking': {'amenity': 'parking'},
649
+ 'islet': {'place': 'islet'},
650
+ 'island': {'place': 'island'},
651
+
652
+ # Bareland
653
+ 'quarry': {'landuse': 'quarry'},
654
+ 'brownfield': {'landuse': 'brownfield'},
655
+ 'bare_rock': {'natural': 'bare_rock'},
656
+ 'scree': {'natural': 'scree'},
657
+ 'shingle': {'natural': 'shingle'},
658
+ 'rock': {'natural': 'rock'},
659
+ 'sand': {'natural': 'sand'},
660
+ 'desert': {'natural': 'desert'},
661
+ 'landfill': {'landuse': 'landfill'},
662
+ 'beach': {'natural': 'beach'},
663
+
664
+ # Rangeland
665
+ 'grass': {'landuse': 'grass'},
666
+ 'meadow': {'landuse': 'meadow'},
667
+ 'grassland': {'natural': 'grassland'},
668
+ 'heath': {'natural': 'heath'},
669
+ 'garden': {'leisure': 'garden'},
670
+ 'park': {'leisure': 'park'},
671
+
672
+ # Shrub
673
+ 'scrub': {'natural': 'scrub'},
674
+ 'shrubland': {'natural': 'scrub'},
675
+ 'bush': {'natural': 'scrub'},
676
+ 'thicket': {'natural': 'scrub'},
677
+
678
+ # Agriculture land
679
+ 'farmland': {'landuse': 'farmland'},
680
+ 'orchard': {'landuse': 'orchard'},
681
+ 'vineyard': {'landuse': 'vineyard'},
682
+ 'plant_nursery': {'landuse': 'plant_nursery'},
683
+ 'greenhouse_horticulture': {'landuse': 'greenhouse_horticulture'},
684
+ 'flowerbed': {'landuse': 'flowerbed'},
685
+ 'allotments': {'landuse': 'allotments'},
686
+ 'cropland': {'landuse': 'farmland'},
687
+
688
+ # Tree
689
+ 'wood': {'natural': 'wood'},
690
+ 'forest': {'landuse': 'forest'},
691
+ 'tree': {'natural': 'tree'},
692
+ 'tree_row': {'natural': 'tree_row'},
693
+ 'tree_canopy': {'natural': 'tree_canopy'},
694
+
695
+ # Moss and lichen
696
+ 'moss': {'natural': 'fell'},
697
+ 'lichen': {'natural': 'fell'},
698
+ 'tundra_vegetation': {'natural': 'fell'},
699
+
700
+ # Wet land
701
+ 'wetland': {'natural': 'wetland'},
702
+ 'marsh': {'wetland': 'marsh'},
703
+ 'swamp': {'wetland': 'swamp'},
704
+ 'bog': {'wetland': 'bog'},
705
+ 'fen': {'wetland': 'fen'},
706
+ 'flooded_vegetation': {'natural': 'wetland'},
707
+
708
+ # Mangrove
709
+ 'mangrove': {'natural': 'wetland', 'wetland': 'mangrove'},
710
+ 'mangrove_forest': {'natural': 'wetland', 'wetland': 'mangrove'},
711
+ 'mangrove_swamp': {'natural': 'wetland', 'wetland': 'mangrove'},
712
+
713
+ # Water
714
+ 'water': {'natural': 'water'},
715
+ 'waterway': {'waterway': '*'},
716
+ 'reservoir': {'landuse': 'reservoir'},
717
+ 'basin': {'landuse': 'basin'},
718
+ 'bay': {'natural': 'bay'},
719
+ 'ocean': {'natural': 'water', 'water': 'ocean'},
720
+ 'sea': {'natural': 'water', 'water': 'sea'},
721
+ 'river': {'waterway': 'river'},
722
+ 'lake': {'natural': 'water', 'water': 'lake'},
723
+
724
+ # Snow and ice
725
+ 'glacier': {'natural': 'glacier'},
726
+ 'snow': {'natural': 'glacier'},
727
+ 'ice': {'natural': 'glacier'},
728
+ 'snowfield': {'natural': 'glacier'},
729
+ 'ice_shelf': {'natural': 'glacier'},
730
+
731
+ # No Data
732
+ 'unknown': {'FIXME': '*'},
733
+ 'no_data': {'FIXME': '*'},
734
+ 'clouds': {'natural': 'cloud'},
735
+ 'undefined': {'FIXME': '*'}
736
+ }
737
+
738
+ def get_classification(tags):
739
+ """Determine the classification code and name for a feature based on its OSM tags.
740
+
741
+ Args:
742
+ tags (dict): Dictionary of OSM tags
743
+
744
+ Returns:
745
+ tuple: (classification_code, classification_name) or (None, None) if no match
746
+ """
747
+ # Iterate through each classification code and its associated info
748
+ for code, info in classification_mapping.items():
749
+ # Check each tag associated with this classification
750
+ for tag in info['tags']:
751
+ osm_mappings = tag_osm_key_value_mapping.get(tag)
752
+ if osm_mappings:
753
+ # Check if the feature's tags match any of the OSM key-value pairs
754
+ for key, value in osm_mappings.items():
755
+ if key in tags:
756
+ if value == '*' or tags[key] == value:
757
+ return code, info['name']
758
+ # Special case for islets and islands
759
+ if tag in ['islet', 'island'] and tags.get('place') == tag:
760
+ return code, info['name']
761
+ # Special case for roads mapped as areas
762
+ if 'area:highway' in tags:
763
+ return 11, 'Road'
764
+ return None, None
765
+
766
+ def swap_coordinates(geom_mapping):
767
+ """Swap coordinates from (lon, lat) to (lat, lon) order.
768
+
769
+ Args:
770
+ geom_mapping (dict): GeoJSON geometry object
771
+
772
+ Returns:
773
+ dict: Geometry with swapped coordinates
774
+ """
775
+ coords = geom_mapping['coordinates']
776
+
777
+ def swap_coords(coord_list):
778
+ # Recursively swap coordinates for nested lists
779
+ if isinstance(coord_list[0], (list, tuple)):
780
+ return [swap_coords(c) for c in coord_list]
781
+ else:
782
+ # Keep original order since already (lon, lat)
783
+ return coord_list
784
+
785
+ geom_mapping['coordinates'] = swap_coords(coords)
786
+ return geom_mapping
787
+
788
+ def load_land_cover_gdf_from_osm(rectangle_vertices_ori):
789
+ """Load land cover data from OpenStreetMap within a given rectangular area.
790
+
791
+ Args:
792
+ rectangle_vertices_ori (list): List of (lon, lat) coordinates defining the rectangle
793
+
794
+ Returns:
795
+ GeoDataFrame: GeoDataFrame containing land cover classifications
796
+ """
797
+ # Close the rectangle polygon by adding first vertex at the end
798
+ rectangle_vertices = rectangle_vertices_ori.copy()
799
+ rectangle_vertices.append(rectangle_vertices_ori[0])
800
+
801
+ # Instead of using poly:"lat lon lat lon...", use area coordinates
802
+ min_lat = min(lat for lon, lat in rectangle_vertices)
803
+ max_lat = max(lat for lon, lat in rectangle_vertices)
804
+ min_lon = min(lon for lon, lat in rectangle_vertices)
805
+ max_lon = max(lon for lon, lat in rectangle_vertices)
806
+
807
+ # Initialize dictionary to store OSM keys and their allowed values
808
+ osm_keys_values = defaultdict(list)
809
+
810
+ # Build mapping of OSM keys to their possible values from classification mapping
811
+ for info in classification_mapping.values():
812
+ tags = info['tags']
813
+ for tag in tags:
814
+ osm_mappings = tag_osm_key_value_mapping.get(tag)
815
+ if osm_mappings:
816
+ for key, value in osm_mappings.items():
817
+ if value == '*':
818
+ osm_keys_values[key] = ['*'] # Match all values
819
+ else:
820
+ if osm_keys_values[key] != ['*'] and value not in osm_keys_values[key]:
821
+ osm_keys_values[key].append(value)
822
+
823
+ # Build Overpass API query parts for each key-value pair
824
+ query_parts = []
825
+ for key, values in osm_keys_values.items():
826
+ if values:
827
+ if values == ['*']:
828
+ # Query for any value of this key using bounding box
829
+ query_parts.append(f'way["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
830
+ query_parts.append(f'relation["{key}"]({min_lat},{min_lon},{max_lat},{max_lon});')
831
+ else:
832
+ # Remove duplicate values
833
+ values = list(set(values))
834
+ # Build regex pattern for specific values
835
+ values_regex = '|'.join(values)
836
+ query_parts.append(f'way["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
837
+ query_parts.append(f'relation["{key}"~"^{values_regex}$"]({min_lat},{min_lon},{max_lat},{max_lon});')
838
+
839
+ # Combine query parts into complete Overpass query
840
+ query_body = "\n ".join(query_parts)
841
+ query = (
842
+ "[out:json];\n"
843
+ "(\n"
844
+ f" {query_body}\n"
845
+ ");\n"
846
+ "out body;\n"
847
+ ">;\n"
848
+ "out skel qt;"
849
+ )
850
+
851
+ # Overpass API endpoint
852
+ overpass_url = "http://overpass-api.de/api/interpreter"
853
+
854
+ # Fetch data from Overpass API
855
+ print("Fetching data from Overpass API...")
856
+ response = requests.get(overpass_url, params={'data': query})
857
+ response.raise_for_status()
858
+ data = response.json()
859
+
860
+ # Convert OSM data to GeoJSON format using our custom converter instead of json2geojson
861
+ print("Converting data to GeoJSON format...")
862
+ geojson_data = osm_json_to_geojson(data)
863
+
864
+ # Create shapely polygon from rectangle vertices (in lon,lat order)
865
+ rectangle_polygon = Polygon(rectangle_vertices)
866
+
867
+ # Calculate center point for projection
868
+ center_lat = sum(lat for lon, lat in rectangle_vertices) / len(rectangle_vertices)
869
+ center_lon = sum(lon for lon, lat in rectangle_vertices) / len(rectangle_vertices)
870
+
871
+ # Set up coordinate reference systems for projection
872
+ wgs84 = pyproj.CRS('EPSG:4326') # Standard lat/lon
873
+ # Albers Equal Area projection centered on area of interest
874
+ aea = pyproj.CRS(proj='aea', lat_1=rectangle_polygon.bounds[1], lat_2=rectangle_polygon.bounds[3], lat_0=center_lat, lon_0=center_lon)
875
+
876
+ # Create transformers for projecting coordinates
877
+ project = pyproj.Transformer.from_crs(wgs84, aea, always_xy=True).transform
878
+ project_back = pyproj.Transformer.from_crs(aea, wgs84, always_xy=True).transform
879
+
880
+ # Lists to store geometries and properties for GeoDataFrame
881
+ geometries = []
882
+ properties = []
883
+
884
+ for feature in geojson_data['features']:
885
+ # Convert feature geometry to shapely object
886
+ geom = shape(feature['geometry'])
887
+ if not (geom.is_valid and geom.intersects(rectangle_polygon)):
888
+ continue
889
+
890
+ # Get classification for feature
891
+ tags = feature['properties'].get('tags', {})
892
+ classification_code, classification_name = get_classification(tags)
893
+ if classification_code is None:
894
+ continue
895
+
896
+ # Special handling for roads
897
+ if classification_code == 11:
898
+ highway_value = tags.get('highway', '')
899
+ # Skip minor paths and walkways
900
+ if highway_value in ['footway', 'path', 'pedestrian', 'steps', 'cycleway', 'bridleway']:
901
+ continue
902
+
903
+ # Determine road width for buffering
904
+ width_value = tags.get('width')
905
+ lanes_value = tags.get('lanes')
906
+ buffer_distance = None
907
+
908
+ # Calculate buffer distance based on width or number of lanes
909
+ if width_value is not None:
910
+ try:
911
+ width_meters = float(width_value)
912
+ buffer_distance = width_meters / 2
913
+ except ValueError:
914
+ pass
915
+ elif lanes_value is not None:
916
+ try:
917
+ num_lanes = float(lanes_value)
918
+ width_meters = num_lanes * 3.0 # 3m per lane
919
+ buffer_distance = width_meters / 2
920
+ except ValueError:
921
+ pass
922
+ else:
923
+ # Default road width
924
+ buffer_distance = 2.5 # 5m total width
925
+
926
+ if buffer_distance is None:
927
+ continue
928
+
929
+ # Buffer line features to create polygons
930
+ if geom.geom_type in ['LineString', 'MultiLineString']:
931
+ # Project to planar CRS, buffer, and project back
932
+ geom_proj = transform(project, geom)
933
+ buffered_geom_proj = geom_proj.buffer(buffer_distance)
934
+ buffered_geom = transform(project_back, buffered_geom_proj)
935
+ # Clip to rectangle
936
+ geom = buffered_geom.intersection(rectangle_polygon)
937
+ else:
938
+ continue
939
+
940
+ # Skip empty geometries
941
+ if geom.is_empty:
942
+ continue
943
+
944
+ # Add geometries and properties
945
+ if geom.geom_type == 'Polygon':
946
+ geometries.append(geom)
947
+ properties.append({'class': classification_name})
948
+ elif geom.geom_type == 'MultiPolygon':
949
+ for poly in geom.geoms:
950
+ geometries.append(poly)
951
+ properties.append({'class': classification_name})
952
+
953
+ # Create GeoDataFrame
954
+ gdf = gpd.GeoDataFrame(properties, geometry=geometries, crs="EPSG:4326")
647
955
  return gdf