voxcity 0.5.20__py3-none-any.whl → 0.5.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of voxcity might be problematic. Click here for more details.

@@ -1,785 +1,786 @@
1
- """
2
- Utility functions for geographic operations and coordinate transformations.
3
-
4
- This module provides various utility functions for working with geographic data,
5
- including coordinate transformations, distance calculations, geocoding, and building
6
- polygon processing. It supports operations such as:
7
-
8
- - Tile coordinate calculations and quadkey conversions
9
- - Geographic distance calculations (Haversine and geodetic)
10
- - Coordinate system transformations
11
- - Polygon and GeoDataFrame operations
12
- - Raster file processing and merging
13
- - Geocoding and reverse geocoding
14
- - Timezone and location information retrieval
15
- - Building polygon validation and processing
16
-
17
- The module uses several external libraries for geographic operations:
18
- - pyproj: For coordinate transformations and geodetic calculations
19
- - geopandas: For handling geographic data frames
20
- - rasterio: For raster file operations
21
- - shapely: For geometric operations
22
- - geopy: For geocoding services
23
- - timezonefinder: For timezone lookups
24
- """
25
-
26
- # Standard library imports
27
- import os
28
- import math
29
- from math import radians, sin, cos, sqrt, atan2
30
- from datetime import datetime
31
-
32
- # Third-party geographic processing libraries
33
- import numpy as np
34
- from pyproj import Geod, Transformer
35
- import geopandas as gpd
36
- import rasterio
37
- from rasterio.merge import merge
38
- from rasterio.warp import transform_bounds
39
- from rasterio.mask import mask
40
- from shapely.geometry import Polygon, box
41
- from fiona.crs import from_epsg
42
- from rtree import index
43
-
44
- # Geocoding and location services
45
- from geopy.geocoders import Nominatim
46
- from geopy.exc import GeocoderTimedOut, GeocoderServiceError
47
- from geopy.extra.rate_limiter import RateLimiter
48
- import reverse_geocoder as rg
49
- import pycountry
50
-
51
- # Timezone handling
52
- from timezonefinder import TimezoneFinder
53
- import pytz
54
-
55
- # Suppress rasterio warnings for non-georeferenced files
56
- import warnings
57
- warnings.filterwarnings("ignore", category=rasterio.errors.NotGeoreferencedWarning)
58
-
59
- # Global constants
60
- floor_height = 2.5 # Standard floor height in meters used for building height calculations
61
-
62
- def tile_from_lat_lon(lat, lon, level_of_detail):
63
- """
64
- Convert latitude/longitude coordinates to tile coordinates at a given zoom level.
65
- Uses the Web Mercator projection (EPSG:3857) commonly used in web mapping.
66
-
67
- Args:
68
- lat (float): Latitude in degrees (-90 to 90)
69
- lon (float): Longitude in degrees (-180 to 180)
70
- level_of_detail (int): Zoom level (0-23, where 0 is the entire world)
71
-
72
- Returns:
73
- tuple: (tile_x, tile_y) tile coordinates in the global tile grid
74
-
75
- Example:
76
- >>> tile_x, tile_y = tile_from_lat_lon(35.6762, 139.6503, 12) # Tokyo at zoom 12
77
- """
78
- # Convert latitude to radians and calculate sine
79
- sin_lat = math.sin(lat * math.pi / 180)
80
-
81
- # Convert longitude to normalized x coordinate (0-1)
82
- x = (lon + 180) / 360
83
-
84
- # Convert latitude to y coordinate using Mercator projection formula
85
- y = 0.5 - math.log((1 + sin_lat) / (1 - sin_lat)) / (4 * math.pi)
86
-
87
- # Calculate map size in pixels at this zoom level (256 * 2^zoom)
88
- map_size = 256 << level_of_detail
89
-
90
- # Convert x,y to tile coordinates
91
- tile_x = int(x * map_size / 256)
92
- tile_y = int(y * map_size / 256)
93
- return tile_x, tile_y
94
-
95
- def quadkey_to_tile(quadkey):
96
- """
97
- Convert a quadkey string to tile coordinates.
98
- A quadkey is a string of digits (0-3) that identifies a tile at a certain zoom level.
99
- Each digit in the quadkey represents a tile at a zoom level, with each subsequent digit
100
- representing a more detailed zoom level.
101
-
102
- The quadkey numbering scheme:
103
- - 0: Top-left quadrant
104
- - 1: Top-right quadrant
105
- - 2: Bottom-left quadrant
106
- - 3: Bottom-right quadrant
107
-
108
- Args:
109
- quadkey (str): Quadkey string (e.g., "120" for zoom level 3)
110
-
111
- Returns:
112
- tuple: (tile_x, tile_y, level_of_detail) tile coordinates and zoom level
113
-
114
- Example:
115
- >>> x, y, zoom = quadkey_to_tile("120") # Returns coordinates at zoom level 3
116
- """
117
- tile_x = tile_y = 0
118
- level_of_detail = len(quadkey)
119
-
120
- # Process each character in quadkey
121
- for i in range(level_of_detail):
122
- bit = level_of_detail - i - 1
123
- mask = 1 << bit
124
-
125
- # Quadkey digit to binary:
126
- # 0 = neither x nor y bit set
127
- # 1 = x bit set
128
- # 2 = y bit set
129
- # 3 = both x and y bits set
130
- if quadkey[i] == '1':
131
- tile_x |= mask
132
- elif quadkey[i] == '2':
133
- tile_y |= mask
134
- elif quadkey[i] == '3':
135
- tile_x |= mask
136
- tile_y |= mask
137
- return tile_x, tile_y, level_of_detail
138
-
139
- def initialize_geod():
140
- """
141
- Initialize a Geod object for geodetic calculations using WGS84 ellipsoid.
142
- The WGS84 ellipsoid (EPSG:4326) is the standard reference system used by GPS
143
- and most modern mapping applications.
144
-
145
- The Geod object provides methods for:
146
- - Forward geodetic calculations (direct)
147
- - Inverse geodetic calculations (inverse)
148
- - Area calculations
149
- - Line length calculations
150
-
151
- Returns:
152
- Geod: Initialized Geod object for WGS84 calculations
153
-
154
- Example:
155
- >>> geod = initialize_geod()
156
- >>> fwd_az, back_az, dist = geod.inv(lon1, lat1, lon2, lat2)
157
- """
158
- return Geod(ellps='WGS84')
159
-
160
- def calculate_distance(geod, lon1, lat1, lon2, lat2):
161
- """
162
- Calculate geodetic distance between two points on the Earth's surface.
163
- Uses inverse geodetic computation to find the shortest distance along the ellipsoid,
164
- which is more accurate than great circle (spherical) calculations.
165
-
166
- Args:
167
- geod (Geod): Geod object for calculations, initialized with WGS84
168
- lon1, lat1 (float): Coordinates of first point in decimal degrees
169
- lon2, lat2 (float): Coordinates of second point in decimal degrees
170
-
171
- Returns:
172
- float: Distance in meters between the two points along the ellipsoid
173
-
174
- Example:
175
- >>> geod = initialize_geod()
176
- >>> distance = calculate_distance(geod, 139.6503, 35.6762,
177
- ... -74.0060, 40.7128) # Tokyo to NYC
178
- """
179
- # inv() returns forward azimuth, back azimuth, and distance
180
- _, _, dist = geod.inv(lon1, lat1, lon2, lat2)
181
- return dist
182
-
183
- def normalize_to_one_meter(vector, distance_in_meters):
184
- """
185
- Normalize a vector to represent one meter in geographic space.
186
- Useful for creating unit vectors in geographic calculations, particularly
187
- when working with distance-based operations or scaling geographic features.
188
-
189
- Args:
190
- vector (numpy.ndarray): Vector to normalize, typically a direction vector
191
- distance_in_meters (float): Current distance in meters that the vector represents
192
-
193
- Returns:
194
- numpy.ndarray: Normalized vector where magnitude represents 1 meter
195
-
196
- Example:
197
- >>> direction = np.array([3.0, 4.0]) # Vector of length 5
198
- >>> unit_meter = normalize_to_one_meter(direction, 5.0)
199
- """
200
- return vector * (1 / distance_in_meters)
201
-
202
- def setup_transformer(from_crs, to_crs):
203
- """
204
- Set up a coordinate transformer between two Coordinate Reference Systems (CRS).
205
- The always_xy=True parameter ensures consistent handling of coordinate order
206
- by always using (x,y) or (longitude,latitude) order regardless of CRS definition.
207
-
208
- Common CRS codes:
209
- - EPSG:4326 - WGS84 (latitude/longitude)
210
- - EPSG:3857 - Web Mercator
211
- - EPSG:2263 - NY State Plane
212
-
213
- Args:
214
- from_crs: Source coordinate reference system (EPSG code, proj4 string, or CRS dict)
215
- to_crs: Target coordinate reference system (EPSG code, proj4 string, or CRS dict)
216
-
217
- Returns:
218
- Transformer: Initialized transformer object for coordinate conversion
219
-
220
- Example:
221
- >>> transformer = setup_transformer("EPSG:4326", "EPSG:3857")
222
- >>> x, y = transformer.transform(longitude, latitude)
223
- """
224
- return Transformer.from_crs(from_crs, to_crs, always_xy=True)
225
-
226
- def transform_coords(transformer, lon, lat):
227
- """
228
- Transform coordinates using provided transformer with error handling.
229
- Includes validation for infinite values that may result from invalid transformations
230
- or coordinates outside the valid range for the target CRS.
231
-
232
- Args:
233
- transformer (Transformer): Coordinate transformer from setup_transformer()
234
- lon, lat (float): Input coordinates in the source CRS
235
-
236
- Returns:
237
- tuple: (x, y) transformed coordinates in the target CRS, or (None, None) if transformation fails
238
-
239
- Example:
240
- >>> transformer = setup_transformer("EPSG:4326", "EPSG:3857")
241
- >>> x, y = transform_coords(transformer, -74.0060, 40.7128) # NYC coordinates
242
- >>> if x is not None:
243
- ... print(f"Transformed coordinates: ({x}, {y})")
244
- """
245
- try:
246
- x, y = transformer.transform(lon, lat)
247
- if np.isinf(x) or np.isinf(y):
248
- print(f"Transformation resulted in inf values for coordinates: {lon}, {lat}")
249
- return x, y
250
- except Exception as e:
251
- print(f"Error transforming coordinates {lon}, {lat}: {e}")
252
- return None, None
253
-
254
- def create_polygon(vertices):
255
- """
256
- Create a Shapely polygon from a list of vertices.
257
- Input vertices must be in (longitude, latitude) format as required by Shapely.
258
- The polygon will be automatically closed if the first and last vertices don't match.
259
-
260
- Args:
261
- vertices (list): List of (longitude, latitude) coordinate pairs forming the polygon.
262
- The coordinates should be in counter-clockwise order for exterior rings
263
- and clockwise order for interior rings (holes).
264
-
265
- Returns:
266
- Polygon: Shapely polygon object that can be used for spatial operations
267
-
268
- Example:
269
- >>> vertices = [(0, 0), (1, 0), (1, 1), (0, 1)] # Square
270
- >>> polygon = create_polygon(vertices)
271
- >>> print(f"Polygon area: {polygon.area}")
272
- """
273
- return Polygon(vertices)
274
-
275
- def create_geodataframe(polygon, crs=4326):
276
- """
277
- Create a GeoDataFrame from a Shapely polygon.
278
- Default CRS is WGS84 (EPSG:4326) for geographic coordinates.
279
- The GeoDataFrame provides additional functionality for spatial operations,
280
- data analysis, and export to various geographic formats.
281
-
282
- Args:
283
- polygon (Polygon): Shapely polygon object to convert
284
- crs (int): Coordinate reference system EPSG code (default: 4326 for WGS84)
285
-
286
- Returns:
287
- GeoDataFrame: GeoDataFrame containing the polygon with specified CRS
288
-
289
- Example:
290
- >>> vertices = [(0, 0), (1, 0), (1, 1), (0, 1)]
291
- >>> polygon = create_polygon(vertices)
292
- >>> gdf = create_geodataframe(polygon)
293
- >>> gdf.to_file("polygon.geojson", driver="GeoJSON")
294
- """
295
- return gpd.GeoDataFrame({'geometry': [polygon]}, crs=from_epsg(crs))
296
-
297
- def haversine_distance(lon1, lat1, lon2, lat2):
298
- """
299
- Calculate great-circle distance between two points using Haversine formula.
300
- This is an approximation that treats the Earth as a perfect sphere.
301
-
302
- Args:
303
- lon1, lat1 (float): Coordinates of first point
304
- lon2, lat2 (float): Coordinates of second point
305
-
306
- Returns:
307
- float: Distance in kilometers
308
- """
309
- R = 6371 # Earth's radius in kilometers
310
-
311
- # Convert all coordinates to radians
312
- lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
313
-
314
- # Calculate differences
315
- dlat = lat2 - lat1
316
- dlon = lon2 - lon1
317
-
318
- # Haversine formula
319
- a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
320
- c = 2 * atan2(sqrt(a), sqrt(1-a))
321
- return R * c
322
-
323
- def get_raster_bbox(raster_path):
324
- """
325
- Get the bounding box of a raster file in its native coordinate system.
326
- Returns a rectangular polygon representing the spatial extent of the raster,
327
- which can be used for spatial queries and intersection tests.
328
-
329
- Args:
330
- raster_path (str): Path to the raster file (GeoTIFF, IMG, etc.)
331
-
332
- Returns:
333
- box: Shapely box representing the raster bounds in the raster's CRS
334
-
335
- Example:
336
- >>> bbox = get_raster_bbox("elevation.tif")
337
- >>> print(f"Raster extent: {bbox.bounds}") # (minx, miny, maxx, maxy)
338
- """
339
- with rasterio.open(raster_path) as src:
340
- bounds = src.bounds
341
- return box(bounds.left, bounds.bottom, bounds.right, bounds.top)
342
-
343
- def raster_intersects_polygon(raster_path, polygon):
344
- """
345
- Check if a raster file's extent intersects with a given polygon.
346
- Automatically handles coordinate system transformations by converting
347
- the raster bounds to WGS84 (EPSG:4326) if needed before the intersection test.
348
-
349
- Args:
350
- raster_path (str): Path to the raster file to check
351
- polygon (Polygon): Shapely polygon to test intersection with (in WGS84)
352
-
353
- Returns:
354
- bool: True if raster intersects or contains the polygon, False otherwise
355
-
356
- Example:
357
- >>> aoi = create_polygon([(lon1, lat1), (lon2, lat2), ...]) # Area of interest
358
- >>> if raster_intersects_polygon("dem.tif", aoi):
359
- ... print("Raster covers the area of interest")
360
- """
361
- with rasterio.open(raster_path) as src:
362
- bounds = src.bounds
363
- # Transform bounds to WGS84 if raster is in different CRS
364
- if src.crs.to_epsg() != 4326:
365
- bounds = transform_bounds(src.crs, 'EPSG:4326', *bounds)
366
- raster_bbox = box(*bounds)
367
- intersects = raster_bbox.intersects(polygon) or polygon.intersects(raster_bbox)
368
- return intersects
369
-
370
- def save_raster(input_path, output_path):
371
- """
372
- Create a copy of a raster file at a new location.
373
- Performs a direct file copy without any transformation or modification,
374
- preserving all metadata, georeferencing, and pixel values.
375
-
376
- Args:
377
- input_path (str): Source raster file path
378
- output_path (str): Destination path for the copied raster
379
-
380
- Example:
381
- >>> save_raster("original.tif", "backup/copy.tif")
382
- >>> print("Copied original file to: backup/copy.tif")
383
- """
384
- import shutil
385
- shutil.copy(input_path, output_path)
386
- print(f"Copied original file to: {output_path}")
387
-
388
- def merge_geotiffs(geotiff_files, output_dir):
389
- """
390
- Merge multiple GeoTIFF files into a single mosaic.
391
- Handles edge matching and overlapping areas between adjacent rasters.
392
- The output will have the same coordinate system and data type as the input files.
393
-
394
- Important considerations:
395
- - All input files should have the same coordinate system
396
- - All input files should have the same data type
397
- - Overlapping areas are handled by taking the first value encountered
398
-
399
- Args:
400
- geotiff_files (list): List of paths to GeoTIFF files to merge
401
- output_dir (str): Directory where the merged output will be saved
402
-
403
- Example:
404
- >>> files = ["tile1.tif", "tile2.tif", "tile3.tif"]
405
- >>> merge_geotiffs(files, "output_directory")
406
- >>> print("Merged output saved to: output_directory/lulc.tif")
407
- """
408
- if not geotiff_files:
409
- return
410
-
411
- # Open all valid GeoTIFF files
412
- src_files_to_mosaic = [rasterio.open(file) for file in geotiff_files if os.path.exists(file)]
413
-
414
- if src_files_to_mosaic:
415
- try:
416
- # Merge rasters into a single mosaic and get output transform
417
- mosaic, out_trans = merge(src_files_to_mosaic)
418
-
419
- # Copy metadata from first raster and update for merged output
420
- out_meta = src_files_to_mosaic[0].meta.copy()
421
- out_meta.update({
422
- "driver": "GTiff",
423
- "height": mosaic.shape[1],
424
- "width": mosaic.shape[2],
425
- "transform": out_trans
426
- })
427
-
428
- # Save merged raster to output file
429
- merged_path = os.path.join(output_dir, "lulc.tif")
430
- with rasterio.open(merged_path, "w", **out_meta) as dest:
431
- dest.write(mosaic)
432
-
433
- print(f"Merged output saved to: {merged_path}")
434
- except Exception as e:
435
- print(f"Error merging files: {e}")
436
- else:
437
- print("No valid files to merge.")
438
-
439
- # Clean up by closing all opened files
440
- for src in src_files_to_mosaic:
441
- src.close()
442
-
443
- def convert_format_lat_lon(input_coords):
444
- """
445
- Convert coordinate format and close polygon.
446
- Input coordinates are already in [lon, lat] format.
447
-
448
- Args:
449
- input_coords (list): List of [lon, lat] coordinates
450
-
451
- Returns:
452
- list: List of [lon, lat] coordinates with first point repeated at end
453
- """
454
- # Create list with coordinates in same order
455
- output_coords = input_coords.copy()
456
- # Close polygon by repeating first point at end
457
- output_coords.append(output_coords[0])
458
- return output_coords
459
-
460
- def get_coordinates_from_cityname(place_name):
461
- """
462
- Geocode a city name to get its coordinates using OpenStreetMap's Nominatim service.
463
- Includes rate limiting and error handling to comply with Nominatim's usage policy.
464
-
465
- Note:
466
- - Results may vary based on the specificity of the place name
467
- - For better results, include country or state information
468
- - Service has usage limits and may timeout
469
-
470
- Args:
471
- place_name (str): Name of the city to geocode (e.g., "Tokyo, Japan")
472
-
473
- Returns:
474
- tuple: (latitude, longitude) coordinates or None if geocoding fails
475
-
476
- Example:
477
- >>> coords = get_coordinates_from_cityname("Paris, France")
478
- >>> if coords:
479
- ... lat, lon = coords
480
- ... print(f"Paris coordinates: {lat}, {lon}")
481
- """
482
- # Initialize geocoder with user agent
483
- geolocator = Nominatim(user_agent="my_geocoding_script")
484
-
485
- try:
486
- # Attempt to geocode the place name
487
- location = geolocator.geocode(place_name)
488
-
489
- if location:
490
- return (location.latitude, location.longitude)
491
- else:
492
- return None
493
- except (GeocoderTimedOut, GeocoderServiceError):
494
- print(f"Error: Geocoding service timed out or encountered an error for {place_name}")
495
- return None
496
-
497
- def get_city_country_name_from_rectangle(coordinates):
498
- """
499
- Get the city and country name for a location defined by a rectangle.
500
- Uses reverse geocoding to find the nearest named place to the rectangle's center.
501
-
502
- The function:
503
- 1. Calculates the center point of the rectangle
504
- 2. Performs reverse geocoding with rate limiting
505
- 3. Extracts city and country information from the result
506
-
507
- Args:
508
- coordinates (list): List of (longitude, latitude) coordinates defining the rectangle
509
-
510
- Returns:
511
- str: String in format "city/ country" or None if lookup fails
512
-
513
- Example:
514
- >>> coords = [(139.65, 35.67), (139.66, 35.67),
515
- ... (139.66, 35.68), (139.65, 35.68)]
516
- >>> location = get_city_country_name_from_rectangle(coords)
517
- >>> print(f"Location: {location}") # e.g., "Shibuya/ Japan"
518
- """
519
- # Calculate center point of rectangle
520
- longitudes = [coord[0] for coord in coordinates]
521
- latitudes = [coord[1] for coord in coordinates]
522
- center_lon = sum(longitudes) / len(longitudes)
523
- center_lat = sum(latitudes) / len(latitudes)
524
- center_coord = (center_lat, center_lon)
525
-
526
- # Initialize geocoder with rate limiting to avoid hitting API limits
527
- geolocator = Nominatim(user_agent="your_app_name (your_email@example.com)")
528
- reverse = RateLimiter(geolocator.reverse, min_delay_seconds=2, error_wait_seconds=5, max_retries=3)
529
-
530
- try:
531
- # Attempt reverse geocoding of center coordinates
532
- location = reverse(center_coord, language='en')
533
- if location:
534
- address = location.raw['address']
535
- # Try multiple address fields to find city name, falling back to county if needed
536
- city = address.get('city', '') or address.get('town', '') or address.get('village', '') or address.get('county', '')
537
- country = address.get('country', '')
538
- return f"{city}/ {country}"
539
- else:
540
- print("Location not found")
541
- except Exception as e:
542
- print(f"Error retrieving location for {center_coord}: {e}")
543
-
544
- def get_timezone_info(rectangle_coords):
545
- """
546
- Get timezone and central meridian information for a location.
547
- Uses the rectangle's center point to determine the local timezone and
548
- calculates the central meridian based on the UTC offset.
549
-
550
- The function provides:
551
- 1. Local timezone identifier (e.g., "America/New_York")
552
- 2. UTC offset (e.g., "UTC-04:00")
553
- 3. Central meridian longitude for the timezone
554
-
555
- Args:
556
- rectangle_coords (list): List of (longitude, latitude) coordinates defining the area
557
-
558
- Returns:
559
- tuple: (timezone string with UTC offset, central meridian longitude string)
560
-
561
- Raises:
562
- ValueError: If timezone cannot be determined for the given location
563
-
564
- Example:
565
- >>> coords = [(139.65, 35.67), (139.66, 35.67),
566
- ... (139.66, 35.68), (139.65, 35.68)]
567
- >>> tz, meridian = get_timezone_info(coords)
568
- >>> print(f"Timezone: {tz}, Meridian: {meridian}") # e.g., "UTC+09:00, 135.00000"
569
- """
570
- # Calculate center point of rectangle
571
- longitudes = [coord[0] for coord in rectangle_coords]
572
- latitudes = [coord[1] for coord in rectangle_coords]
573
- center_lon = sum(longitudes) / len(longitudes)
574
- center_lat = sum(latitudes) / len(latitudes)
575
-
576
- # Find timezone at center coordinates
577
- tf = TimezoneFinder()
578
- timezone_str = tf.timezone_at(lng=center_lon, lat=center_lat)
579
-
580
- if timezone_str:
581
- # Get current time in local timezone to calculate offset
582
- timezone = pytz.timezone(timezone_str)
583
- now = datetime.now(timezone)
584
- offset_seconds = now.utcoffset().total_seconds()
585
- offset_hours = offset_seconds / 3600
586
-
587
- # Format timezone offset and calculate central meridian
588
- utc_offset = f"UTC{offset_hours:+.2f}"
589
- timezone_longitude = offset_hours * 15 # Each hour offset = 15 degrees longitude
590
- timezone_longitude_str = f"{timezone_longitude:.5f}"
591
-
592
- return utc_offset, timezone_longitude_str
593
- else:
594
- raise ValueError("Time zone not found for the given location.")
595
-
596
- def validate_polygon_coordinates(geometry):
597
- """
598
- Validate and ensure proper closure of polygon coordinate rings.
599
- Performs validation and correction of GeoJSON polygon geometries according to
600
- the GeoJSON specification requirements.
601
-
602
- Validation checks:
603
- 1. Geometry type (Polygon or MultiPolygon)
604
- 2. Ring closure (first point equals last point)
605
- 3. Minimum number of points (4, including closure)
606
-
607
- Args:
608
- geometry (dict): GeoJSON geometry object with 'type' and 'coordinates' properties
609
-
610
- Returns:
611
- bool: True if polygon coordinates are valid or were successfully corrected,
612
- False if validation failed
613
-
614
- Example:
615
- >>> geom = {
616
- ... "type": "Polygon",
617
- ... "coordinates": [[[0,0], [1,0], [1,1], [0,1]]] # Not closed
618
- ... }
619
- >>> if validate_polygon_coordinates(geom):
620
- ... print("Polygon is valid") # Will close the ring automatically
621
- """
622
- if geometry['type'] == 'Polygon':
623
- for ring in geometry['coordinates']:
624
- # Ensure polygon is closed by checking/adding first point at end
625
- if ring[0] != ring[-1]:
626
- ring.append(ring[0]) # Close the ring
627
- # Check minimum points needed for valid polygon (3 points + closing point)
628
- if len(ring) < 4:
629
- return False
630
- return True
631
- elif geometry['type'] == 'MultiPolygon':
632
- for polygon in geometry['coordinates']:
633
- for ring in polygon:
634
- if ring[0] != ring[-1]:
635
- ring.append(ring[0]) # Close the ring
636
- if len(ring) < 4:
637
- return False
638
- return True
639
- else:
640
- return False
641
-
642
- def create_building_polygons(filtered_buildings):
643
- """
644
- Create building polygons with properties from filtered GeoJSON features.
645
- Processes a list of GeoJSON building features to create Shapely polygons
646
- with associated height and other properties, while also building a spatial index.
647
-
648
- Processing steps:
649
- 1. Extract and validate coordinates
650
- 2. Create Shapely polygons
651
- 3. Process building properties (height, levels, etc.)
652
- 4. Build spatial index for efficient querying
653
-
654
- Height calculation rules:
655
- - Use explicit height if available
656
- - Calculate from levels * floor_height if height not available
657
- - Calculate from floors * floor_height if levels not available
658
- - Use NaN if no height information available
659
-
660
- Args:
661
- filtered_buildings (list): List of GeoJSON building features with properties
662
-
663
- Returns:
664
- tuple: (
665
- list of tuples (polygon, height, min_height, is_inner, feature_id),
666
- rtree spatial index for the polygons
667
- )
668
-
669
- Example:
670
- >>> buildings = [
671
- ... {
672
- ... "type": "Feature",
673
- ... "geometry": {"type": "Polygon", "coordinates": [...]},
674
- ... "properties": {"height": 30, "levels": 10}
675
- ... },
676
- ... # ... more buildings ...
677
- ... ]
678
- >>> polygons, spatial_idx = create_building_polygons(buildings)
679
- """
680
- building_polygons = []
681
- idx = index.Index()
682
- valid_count = 0
683
- count = 0
684
-
685
- # Find highest existing ID to avoid duplicates
686
- id_list = []
687
- for i, building in enumerate(filtered_buildings):
688
- if building['properties'].get('id') is not None:
689
- id_list.append(building['properties']['id'])
690
- if len(id_list) > 0:
691
- id_count = max(id_list)+1
692
- else:
693
- id_count = 1
694
-
695
- for building in filtered_buildings:
696
- try:
697
- # Handle potential nested coordinate tuples
698
- coords = building['geometry']['coordinates'][0]
699
- # Flatten coordinates if they're nested tuples
700
- if isinstance(coords[0], tuple):
701
- coords = [list(c) for c in coords]
702
- elif isinstance(coords[0][0], tuple):
703
- coords = [list(c[0]) for c in coords]
704
-
705
- # Create polygon from coordinates
706
- polygon = Polygon(coords)
707
-
708
- # Skip invalid geometries
709
- if not polygon.is_valid:
710
- print(f"Warning: Skipping invalid polygon geometry")
711
- continue
712
-
713
- height = building['properties'].get('height')
714
- levels = building['properties'].get('levels')
715
- floors = building['properties'].get('num_floors')
716
- min_height = building['properties'].get('min_height')
717
- min_level = building['properties'].get('min_level')
718
- min_floor = building['properties'].get('min_floor')
719
-
720
- if (height is None) or (height<=0):
721
- if levels is not None:
722
- height = floor_height * levels
723
- elif floors is not None:
724
- height = floor_height * floors
725
- else:
726
- count += 1
727
- height = np.nan
728
-
729
- if (min_height is None) or (min_height<=0):
730
- if min_level is not None:
731
- min_height = floor_height * float(min_level)
732
- elif min_floor is not None:
733
- min_height = floor_height * float(min_floor)
734
- else:
735
- min_height = 0
736
-
737
- if building['properties'].get('id') is not None:
738
- feature_id = building['properties']['id']
739
- else:
740
- feature_id = id_count
741
- id_count += 1
742
-
743
- if building['properties'].get('is_inner') is not None:
744
- is_inner = building['properties']['is_inner']
745
- else:
746
- is_inner = False
747
-
748
- building_polygons.append((polygon, height, min_height, is_inner, feature_id))
749
- idx.insert(valid_count, polygon.bounds)
750
- valid_count += 1
751
-
752
- except Exception as e:
753
- print(f"Warning: Skipping invalid building geometry: {e}")
754
- continue
755
-
756
- return building_polygons, idx
757
-
758
- def get_country_name(lon, lat):
759
- """
760
- Get country name from coordinates using reverse geocoding.
761
- Uses a local database for fast reverse geocoding to country level,
762
- then converts the country code to full name using pycountry.
763
-
764
- Args:
765
- lon (float): Longitude in decimal degrees
766
- lat (float): Latitude in decimal degrees
767
-
768
- Returns:
769
- str: Full country name or None if lookup fails
770
-
771
- Example:
772
- >>> country = get_country_name(139.6503, 35.6762)
773
- >>> print(f"Country: {country}") # "Japan"
774
- """
775
- # Use reverse geocoder to get country code
776
- results = rg.search((lat, lon))
777
- country_code = results[0]['cc']
778
-
779
- # Convert country code to full name using pycountry
780
- country = pycountry.countries.get(alpha_2=country_code)
781
-
782
- if country:
783
- return country.name
784
- else:
1
+ """
2
+ Utility functions for geographic operations and coordinate transformations.
3
+
4
+ This module provides various utility functions for working with geographic data,
5
+ including coordinate transformations, distance calculations, geocoding, and building
6
+ polygon processing. It supports operations such as:
7
+
8
+ - Tile coordinate calculations and quadkey conversions
9
+ - Geographic distance calculations (Haversine and geodetic)
10
+ - Coordinate system transformations
11
+ - Polygon and GeoDataFrame operations
12
+ - Raster file processing and merging
13
+ - Geocoding and reverse geocoding
14
+ - Timezone and location information retrieval
15
+ - Building polygon validation and processing
16
+
17
+ The module uses several external libraries for geographic operations:
18
+ - pyproj: For coordinate transformations and geodetic calculations
19
+ - geopandas: For handling geographic data frames
20
+ - rasterio: For raster file operations
21
+ - shapely: For geometric operations
22
+ - geopy: For geocoding services
23
+ - timezonefinder: For timezone lookups
24
+ """
25
+
26
+ # Standard library imports
27
+ import os
28
+ import math
29
+ from math import radians, sin, cos, sqrt, atan2
30
+ from datetime import datetime
31
+
32
+ # Third-party geographic processing libraries
33
+ import numpy as np
34
+ from pyproj import Geod, Transformer
35
+ import geopandas as gpd
36
+ import rasterio
37
+ from rasterio.merge import merge
38
+ from rasterio.warp import transform_bounds
39
+ from rasterio.mask import mask
40
+ from shapely.geometry import Polygon, box
41
+ from fiona.crs import from_epsg
42
+ from rtree import index
43
+
44
+ # Geocoding and location services
45
+ from geopy.geocoders import Nominatim
46
+ from geopy.exc import GeocoderTimedOut, GeocoderServiceError
47
+ from geopy.extra.rate_limiter import RateLimiter
48
+ import reverse_geocoder as rg
49
+ import pycountry
50
+
51
+ # Timezone handling
52
+ from timezonefinder import TimezoneFinder
53
+ import pytz
54
+
55
+ # Suppress rasterio warnings for non-georeferenced files
56
+ import warnings
57
+ warnings.filterwarnings("ignore", category=rasterio.errors.NotGeoreferencedWarning)
58
+
59
+ # Global constants
60
+ floor_height = 2.5 # Standard floor height in meters used for building height calculations
61
+
62
+ def tile_from_lat_lon(lat, lon, level_of_detail):
63
+ """
64
+ Convert latitude/longitude coordinates to tile coordinates at a given zoom level.
65
+ Uses the Web Mercator projection (EPSG:3857) commonly used in web mapping.
66
+
67
+ Args:
68
+ lat (float): Latitude in degrees (-90 to 90)
69
+ lon (float): Longitude in degrees (-180 to 180)
70
+ level_of_detail (int): Zoom level (0-23, where 0 is the entire world)
71
+
72
+ Returns:
73
+ tuple: (tile_x, tile_y) tile coordinates in the global tile grid
74
+
75
+ Example:
76
+ >>> tile_x, tile_y = tile_from_lat_lon(35.6762, 139.6503, 12) # Tokyo at zoom 12
77
+ """
78
+ # Convert latitude to radians and calculate sine
79
+ sin_lat = math.sin(lat * math.pi / 180)
80
+
81
+ # Convert longitude to normalized x coordinate (0-1)
82
+ x = (lon + 180) / 360
83
+
84
+ # Convert latitude to y coordinate using Mercator projection formula
85
+ y = 0.5 - math.log((1 + sin_lat) / (1 - sin_lat)) / (4 * math.pi)
86
+
87
+ # Calculate map size in pixels at this zoom level (256 * 2^zoom)
88
+ map_size = 256 << level_of_detail
89
+
90
+ # Convert x,y to tile coordinates
91
+ tile_x = int(x * map_size / 256)
92
+ tile_y = int(y * map_size / 256)
93
+ return tile_x, tile_y
94
+
95
+ def quadkey_to_tile(quadkey):
96
+ """
97
+ Convert a quadkey string to tile coordinates.
98
+ A quadkey is a string of digits (0-3) that identifies a tile at a certain zoom level.
99
+ Each digit in the quadkey represents a tile at a zoom level, with each subsequent digit
100
+ representing a more detailed zoom level.
101
+
102
+ The quadkey numbering scheme:
103
+ - 0: Top-left quadrant
104
+ - 1: Top-right quadrant
105
+ - 2: Bottom-left quadrant
106
+ - 3: Bottom-right quadrant
107
+
108
+ Args:
109
+ quadkey (str): Quadkey string (e.g., "120" for zoom level 3)
110
+
111
+ Returns:
112
+ tuple: (tile_x, tile_y, level_of_detail) tile coordinates and zoom level
113
+
114
+ Example:
115
+ >>> x, y, zoom = quadkey_to_tile("120") # Returns coordinates at zoom level 3
116
+ """
117
+ tile_x = tile_y = 0
118
+ level_of_detail = len(quadkey)
119
+
120
+ # Process each character in quadkey
121
+ for i in range(level_of_detail):
122
+ bit = level_of_detail - i - 1
123
+ mask = 1 << bit
124
+
125
+ # Quadkey digit to binary:
126
+ # 0 = neither x nor y bit set
127
+ # 1 = x bit set
128
+ # 2 = y bit set
129
+ # 3 = both x and y bits set
130
+ if quadkey[i] == '1':
131
+ tile_x |= mask
132
+ elif quadkey[i] == '2':
133
+ tile_y |= mask
134
+ elif quadkey[i] == '3':
135
+ tile_x |= mask
136
+ tile_y |= mask
137
+ return tile_x, tile_y, level_of_detail
138
+
139
+ def initialize_geod():
140
+ """
141
+ Initialize a Geod object for geodetic calculations using WGS84 ellipsoid.
142
+ The WGS84 ellipsoid (EPSG:4326) is the standard reference system used by GPS
143
+ and most modern mapping applications.
144
+
145
+ The Geod object provides methods for:
146
+ - Forward geodetic calculations (direct)
147
+ - Inverse geodetic calculations (inverse)
148
+ - Area calculations
149
+ - Line length calculations
150
+
151
+ Returns:
152
+ Geod: Initialized Geod object for WGS84 calculations
153
+
154
+ Example:
155
+ >>> geod = initialize_geod()
156
+ >>> fwd_az, back_az, dist = geod.inv(lon1, lat1, lon2, lat2)
157
+ """
158
+ return Geod(ellps='WGS84')
159
+
160
+ def calculate_distance(geod, lon1, lat1, lon2, lat2):
161
+ """
162
+ Calculate geodetic distance between two points on the Earth's surface.
163
+ Uses inverse geodetic computation to find the shortest distance along the ellipsoid,
164
+ which is more accurate than great circle (spherical) calculations.
165
+
166
+ Args:
167
+ geod (Geod): Geod object for calculations, initialized with WGS84
168
+ lon1, lat1 (float): Coordinates of first point in decimal degrees
169
+ lon2, lat2 (float): Coordinates of second point in decimal degrees
170
+
171
+ Returns:
172
+ float: Distance in meters between the two points along the ellipsoid
173
+
174
+ Example:
175
+ >>> geod = initialize_geod()
176
+ >>> distance = calculate_distance(geod, 139.6503, 35.6762,
177
+ ... -74.0060, 40.7128) # Tokyo to NYC
178
+ """
179
+ # inv() returns forward azimuth, back azimuth, and distance
180
+ _, _, dist = geod.inv(lon1, lat1, lon2, lat2)
181
+ return dist
182
+
183
+ def normalize_to_one_meter(vector, distance_in_meters):
184
+ """
185
+ Normalize a vector to represent one meter in geographic space.
186
+ Useful for creating unit vectors in geographic calculations, particularly
187
+ when working with distance-based operations or scaling geographic features.
188
+
189
+ Args:
190
+ vector (numpy.ndarray): Vector to normalize, typically a direction vector
191
+ distance_in_meters (float): Current distance in meters that the vector represents
192
+
193
+ Returns:
194
+ numpy.ndarray: Normalized vector where magnitude represents 1 meter
195
+
196
+ Example:
197
+ >>> direction = np.array([3.0, 4.0]) # Vector of length 5
198
+ >>> unit_meter = normalize_to_one_meter(direction, 5.0)
199
+ """
200
+ return vector * (1 / distance_in_meters)
201
+
202
+ def setup_transformer(from_crs, to_crs):
203
+ """
204
+ Set up a coordinate transformer between two Coordinate Reference Systems (CRS).
205
+ The always_xy=True parameter ensures consistent handling of coordinate order
206
+ by always using (x,y) or (longitude,latitude) order regardless of CRS definition.
207
+
208
+ Common CRS codes:
209
+ - EPSG:4326 - WGS84 (latitude/longitude)
210
+ - EPSG:3857 - Web Mercator
211
+ - EPSG:2263 - NY State Plane
212
+
213
+ Args:
214
+ from_crs: Source coordinate reference system (EPSG code, proj4 string, or CRS dict)
215
+ to_crs: Target coordinate reference system (EPSG code, proj4 string, or CRS dict)
216
+
217
+ Returns:
218
+ Transformer: Initialized transformer object for coordinate conversion
219
+
220
+ Example:
221
+ >>> transformer = setup_transformer("EPSG:4326", "EPSG:3857")
222
+ >>> x, y = transformer.transform(longitude, latitude)
223
+ """
224
+ return Transformer.from_crs(from_crs, to_crs, always_xy=True)
225
+
226
+ def transform_coords(transformer, lon, lat):
227
+ """
228
+ Transform coordinates using provided transformer with error handling.
229
+ Includes validation for infinite values that may result from invalid transformations
230
+ or coordinates outside the valid range for the target CRS.
231
+
232
+ Args:
233
+ transformer (Transformer): Coordinate transformer from setup_transformer()
234
+ lon, lat (float): Input coordinates in the source CRS
235
+
236
+ Returns:
237
+ tuple: (x, y) transformed coordinates in the target CRS, or (None, None) if transformation fails
238
+
239
+ Example:
240
+ >>> transformer = setup_transformer("EPSG:4326", "EPSG:3857")
241
+ >>> x, y = transform_coords(transformer, -74.0060, 40.7128) # NYC coordinates
242
+ >>> if x is not None:
243
+ ... print(f"Transformed coordinates: ({x}, {y})")
244
+ """
245
+ try:
246
+ x, y = transformer.transform(lon, lat)
247
+ if np.isinf(x) or np.isinf(y):
248
+ print(f"Transformation resulted in inf values for coordinates: {lon}, {lat}")
249
+ return x, y
250
+ except Exception as e:
251
+ print(f"Error transforming coordinates {lon}, {lat}: {e}")
252
+ return None, None
253
+
254
+ def create_polygon(vertices):
255
+ """
256
+ Create a Shapely polygon from a list of vertices.
257
+ Input vertices must be in (longitude, latitude) format as required by Shapely.
258
+ The polygon will be automatically closed if the first and last vertices don't match.
259
+
260
+ Args:
261
+ vertices (list): List of (longitude, latitude) coordinate pairs forming the polygon.
262
+ The coordinates should be in counter-clockwise order for exterior rings
263
+ and clockwise order for interior rings (holes).
264
+
265
+ Returns:
266
+ Polygon: Shapely polygon object that can be used for spatial operations
267
+
268
+ Example:
269
+ >>> vertices = [(0, 0), (1, 0), (1, 1), (0, 1)] # Square
270
+ >>> polygon = create_polygon(vertices)
271
+ >>> print(f"Polygon area: {polygon.area}")
272
+ """
273
+ return Polygon(vertices)
274
+
275
+ def create_geodataframe(polygon, crs=4326):
276
+ """
277
+ Create a GeoDataFrame from a Shapely polygon.
278
+ Default CRS is WGS84 (EPSG:4326) for geographic coordinates.
279
+ The GeoDataFrame provides additional functionality for spatial operations,
280
+ data analysis, and export to various geographic formats.
281
+
282
+ Args:
283
+ polygon (Polygon): Shapely polygon object to convert
284
+ crs (int): Coordinate reference system EPSG code (default: 4326 for WGS84)
285
+
286
+ Returns:
287
+ GeoDataFrame: GeoDataFrame containing the polygon with specified CRS
288
+
289
+ Example:
290
+ >>> vertices = [(0, 0), (1, 0), (1, 1), (0, 1)]
291
+ >>> polygon = create_polygon(vertices)
292
+ >>> gdf = create_geodataframe(polygon)
293
+ >>> gdf.to_file("polygon.geojson", driver="GeoJSON")
294
+ """
295
+ return gpd.GeoDataFrame({'geometry': [polygon]}, crs=from_epsg(crs))
296
+
297
+ def haversine_distance(lon1, lat1, lon2, lat2):
298
+ """
299
+ Calculate great-circle distance between two points using Haversine formula.
300
+ This is an approximation that treats the Earth as a perfect sphere.
301
+
302
+ Args:
303
+ lon1, lat1 (float): Coordinates of first point
304
+ lon2, lat2 (float): Coordinates of second point
305
+
306
+ Returns:
307
+ float: Distance in kilometers
308
+ """
309
+ R = 6371 # Earth's radius in kilometers
310
+
311
+ # Convert all coordinates to radians
312
+ lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
313
+
314
+ # Calculate differences
315
+ dlat = lat2 - lat1
316
+ dlon = lon2 - lon1
317
+
318
+ # Haversine formula
319
+ a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
320
+ c = 2 * atan2(sqrt(a), sqrt(1-a))
321
+ return R * c
322
+
323
+ def get_raster_bbox(raster_path):
324
+ """
325
+ Get the bounding box of a raster file in its native coordinate system.
326
+ Returns a rectangular polygon representing the spatial extent of the raster,
327
+ which can be used for spatial queries and intersection tests.
328
+
329
+ Args:
330
+ raster_path (str): Path to the raster file (GeoTIFF, IMG, etc.)
331
+
332
+ Returns:
333
+ box: Shapely box representing the raster bounds in the raster's CRS
334
+
335
+ Example:
336
+ >>> bbox = get_raster_bbox("elevation.tif")
337
+ >>> print(f"Raster extent: {bbox.bounds}") # (minx, miny, maxx, maxy)
338
+ """
339
+ with rasterio.open(raster_path) as src:
340
+ bounds = src.bounds
341
+ return box(bounds.left, bounds.bottom, bounds.right, bounds.top)
342
+
343
+ def raster_intersects_polygon(raster_path, polygon):
344
+ """
345
+ Check if a raster file's extent intersects with a given polygon.
346
+ Automatically handles coordinate system transformations by converting
347
+ the raster bounds to WGS84 (EPSG:4326) if needed before the intersection test.
348
+
349
+ Args:
350
+ raster_path (str): Path to the raster file to check
351
+ polygon (Polygon): Shapely polygon to test intersection with (in WGS84)
352
+
353
+ Returns:
354
+ bool: True if raster intersects or contains the polygon, False otherwise
355
+
356
+ Example:
357
+ >>> aoi = create_polygon([(lon1, lat1), (lon2, lat2), ...]) # Area of interest
358
+ >>> if raster_intersects_polygon("dem.tif", aoi):
359
+ ... print("Raster covers the area of interest")
360
+ """
361
+ with rasterio.open(raster_path) as src:
362
+ bounds = src.bounds
363
+ # Transform bounds to WGS84 if raster is in different CRS
364
+ if src.crs.to_epsg() != 4326:
365
+ bounds = transform_bounds(src.crs, 'EPSG:4326', *bounds)
366
+ raster_bbox = box(*bounds)
367
+ intersects = raster_bbox.intersects(polygon) or polygon.intersects(raster_bbox)
368
+ return intersects
369
+
370
+ def save_raster(input_path, output_path):
371
+ """
372
+ Create a copy of a raster file at a new location.
373
+ Performs a direct file copy without any transformation or modification,
374
+ preserving all metadata, georeferencing, and pixel values.
375
+
376
+ Args:
377
+ input_path (str): Source raster file path
378
+ output_path (str): Destination path for the copied raster
379
+
380
+ Example:
381
+ >>> save_raster("original.tif", "backup/copy.tif")
382
+ >>> print("Copied original file to: backup/copy.tif")
383
+ """
384
+ import shutil
385
+ shutil.copy(input_path, output_path)
386
+ print(f"Copied original file to: {output_path}")
387
+
388
+ def merge_geotiffs(geotiff_files, output_dir):
389
+ """
390
+ Merge multiple GeoTIFF files into a single mosaic.
391
+ Handles edge matching and overlapping areas between adjacent rasters.
392
+ The output will have the same coordinate system and data type as the input files.
393
+
394
+ Important considerations:
395
+ - All input files should have the same coordinate system
396
+ - All input files should have the same data type
397
+ - Overlapping areas are handled by taking the first value encountered
398
+
399
+ Args:
400
+ geotiff_files (list): List of paths to GeoTIFF files to merge
401
+ output_dir (str): Directory where the merged output will be saved
402
+
403
+ Example:
404
+ >>> files = ["tile1.tif", "tile2.tif", "tile3.tif"]
405
+ >>> merge_geotiffs(files, "output_directory")
406
+ >>> print("Merged output saved to: output_directory/lulc.tif")
407
+ """
408
+ if not geotiff_files:
409
+ return
410
+
411
+ # Open all valid GeoTIFF files
412
+ src_files_to_mosaic = [rasterio.open(file) for file in geotiff_files if os.path.exists(file)]
413
+
414
+ if src_files_to_mosaic:
415
+ try:
416
+ # Merge rasters into a single mosaic and get output transform
417
+ mosaic, out_trans = merge(src_files_to_mosaic)
418
+
419
+ # Copy metadata from first raster and update for merged output
420
+ out_meta = src_files_to_mosaic[0].meta.copy()
421
+ out_meta.update({
422
+ "driver": "GTiff",
423
+ "height": mosaic.shape[1],
424
+ "width": mosaic.shape[2],
425
+ "transform": out_trans
426
+ })
427
+
428
+ # Save merged raster to output file
429
+ merged_path = os.path.join(output_dir, "lulc.tif")
430
+ with rasterio.open(merged_path, "w", **out_meta) as dest:
431
+ dest.write(mosaic)
432
+
433
+ print(f"Merged output saved to: {merged_path}")
434
+ except Exception as e:
435
+ print(f"Error merging files: {e}")
436
+ else:
437
+ print("No valid files to merge.")
438
+
439
+ # Clean up by closing all opened files
440
+ for src in src_files_to_mosaic:
441
+ src.close()
442
+
443
+ def convert_format_lat_lon(input_coords):
444
+ """
445
+ Convert coordinate format and close polygon.
446
+ Input coordinates are already in [lon, lat] format.
447
+
448
+ Args:
449
+ input_coords (list): List of [lon, lat] coordinates
450
+
451
+ Returns:
452
+ list: List of [lon, lat] coordinates with first point repeated at end
453
+ """
454
+ # Create list with coordinates in same order
455
+ output_coords = input_coords.copy()
456
+ # Close polygon by repeating first point at end
457
+ output_coords.append(output_coords[0])
458
+ return output_coords
459
+
460
+ def get_coordinates_from_cityname(place_name):
461
+ """
462
+ Geocode a city name to get its coordinates using OpenStreetMap's Nominatim service.
463
+ Includes rate limiting and error handling to comply with Nominatim's usage policy.
464
+
465
+ Note:
466
+ - Results may vary based on the specificity of the place name
467
+ - For better results, include country or state information
468
+ - Service has usage limits and may timeout
469
+
470
+ Args:
471
+ place_name (str): Name of the city to geocode (e.g., "Tokyo, Japan")
472
+
473
+ Returns:
474
+ tuple: (latitude, longitude) coordinates or None if geocoding fails
475
+
476
+ Example:
477
+ >>> coords = get_coordinates_from_cityname("Paris, France")
478
+ >>> if coords:
479
+ ... lat, lon = coords
480
+ ... print(f"Paris coordinates: {lat}, {lon}")
481
+ """
482
+ # Initialize geocoder with user agent
483
+ geolocator = Nominatim(user_agent="my_geocoding_script")
484
+
485
+ try:
486
+ # Attempt to geocode the place name
487
+ location = geolocator.geocode(place_name)
488
+
489
+ if location:
490
+ return (location.latitude, location.longitude)
491
+ else:
492
+ return None
493
+ except (GeocoderTimedOut, GeocoderServiceError):
494
+ print(f"Error: Geocoding service timed out or encountered an error for {place_name}")
495
+ return None
496
+
497
+ def get_city_country_name_from_rectangle(coordinates):
498
+ """
499
+ Get the city and country name for a location defined by a rectangle.
500
+ Uses reverse geocoding to find the nearest named place to the rectangle's center.
501
+
502
+ The function:
503
+ 1. Calculates the center point of the rectangle
504
+ 2. Performs reverse geocoding with rate limiting
505
+ 3. Extracts city and country information from the result
506
+
507
+ Args:
508
+ coordinates (list): List of (longitude, latitude) coordinates defining the rectangle
509
+
510
+ Returns:
511
+ str: String in format "city/ country" or fallback value if lookup fails
512
+
513
+ Example:
514
+ >>> coords = [(139.65, 35.67), (139.66, 35.67),
515
+ ... (139.66, 35.68), (139.65, 35.68)]
516
+ >>> location = get_city_country_name_from_rectangle(coords)
517
+ >>> print(f"Location: {location}") # e.g., "Shibuya/ Japan"
518
+ """
519
+ # Calculate center point of rectangle
520
+ longitudes = [coord[0] for coord in coordinates]
521
+ latitudes = [coord[1] for coord in coordinates]
522
+ center_lon = sum(longitudes) / len(longitudes)
523
+ center_lat = sum(latitudes) / len(latitudes)
524
+ center_coord = (center_lat, center_lon)
525
+
526
+ # Initialize geocoder with rate limiting to avoid hitting API limits
527
+ geolocator = Nominatim(user_agent="your_app_name (your_email@example.com)")
528
+ reverse = RateLimiter(geolocator.reverse, min_delay_seconds=2, error_wait_seconds=5, max_retries=3)
529
+
530
+ try:
531
+ # Attempt reverse geocoding of center coordinates
532
+ location = reverse(center_coord, language='en')
533
+ if location:
534
+ address = location.raw['address']
535
+ # Try multiple address fields to find city name, falling back to county if needed
536
+ city = address.get('city', '') or address.get('town', '') or address.get('village', '') or address.get('county', '')
537
+ country = address.get('country', '')
538
+ return f"{city}/ {country}"
539
+ else:
540
+ print("Location not found")
541
+ return "Unknown Location/ Unknown Country"
542
+ except Exception as e:
543
+ print(f"Error retrieving location for {center_coord}: {e}")
544
+ return "Unknown Location/ Unknown Country"
545
+
546
+ def get_timezone_info(rectangle_coords):
547
+ """
548
+ Get timezone and central meridian information for a location.
549
+ Uses the rectangle's center point to determine the local timezone and
550
+ calculates the central meridian based on the UTC offset.
551
+
552
+ The function provides:
553
+ 1. Local timezone identifier (e.g., "America/New_York")
554
+ 2. UTC offset (e.g., "UTC-04:00")
555
+ 3. Central meridian longitude for the timezone
556
+
557
+ Args:
558
+ rectangle_coords (list): List of (longitude, latitude) coordinates defining the area
559
+
560
+ Returns:
561
+ tuple: (timezone string with UTC offset, central meridian longitude string)
562
+
563
+ Example:
564
+ >>> coords = [(139.65, 35.67), (139.66, 35.67),
565
+ ... (139.66, 35.68), (139.65, 35.68)]
566
+ >>> tz, meridian = get_timezone_info(coords)
567
+ >>> print(f"Timezone: {tz}, Meridian: {meridian}") # e.g., "UTC+09:00, 135.00000"
568
+ """
569
+ # Calculate center point of rectangle
570
+ longitudes = [coord[0] for coord in rectangle_coords]
571
+ latitudes = [coord[1] for coord in rectangle_coords]
572
+ center_lon = sum(longitudes) / len(longitudes)
573
+ center_lat = sum(latitudes) / len(latitudes)
574
+
575
+ # Find timezone at center coordinates
576
+ tf = TimezoneFinder()
577
+ timezone_str = tf.timezone_at(lng=center_lon, lat=center_lat)
578
+
579
+ if timezone_str:
580
+ # Get current time in local timezone to calculate offset
581
+ timezone = pytz.timezone(timezone_str)
582
+ now = datetime.now(timezone)
583
+ offset_seconds = now.utcoffset().total_seconds()
584
+ offset_hours = offset_seconds / 3600
585
+
586
+ # Format timezone offset and calculate central meridian
587
+ utc_offset = f"UTC{offset_hours:+.2f}"
588
+ timezone_longitude = offset_hours * 15 # Each hour offset = 15 degrees longitude
589
+ timezone_longitude_str = f"{timezone_longitude:.5f}"
590
+
591
+ return utc_offset, timezone_longitude_str
592
+ else:
593
+ # Return fallback values if timezone cannot be determined
594
+ print("Warning: Timezone not found for the given location, using UTC+00:00")
595
+ return "UTC+00:00", "0.00000"
596
+
597
+ def validate_polygon_coordinates(geometry):
598
+ """
599
+ Validate and ensure proper closure of polygon coordinate rings.
600
+ Performs validation and correction of GeoJSON polygon geometries according to
601
+ the GeoJSON specification requirements.
602
+
603
+ Validation checks:
604
+ 1. Geometry type (Polygon or MultiPolygon)
605
+ 2. Ring closure (first point equals last point)
606
+ 3. Minimum number of points (4, including closure)
607
+
608
+ Args:
609
+ geometry (dict): GeoJSON geometry object with 'type' and 'coordinates' properties
610
+
611
+ Returns:
612
+ bool: True if polygon coordinates are valid or were successfully corrected,
613
+ False if validation failed
614
+
615
+ Example:
616
+ >>> geom = {
617
+ ... "type": "Polygon",
618
+ ... "coordinates": [[[0,0], [1,0], [1,1], [0,1]]] # Not closed
619
+ ... }
620
+ >>> if validate_polygon_coordinates(geom):
621
+ ... print("Polygon is valid") # Will close the ring automatically
622
+ """
623
+ if geometry['type'] == 'Polygon':
624
+ for ring in geometry['coordinates']:
625
+ # Ensure polygon is closed by checking/adding first point at end
626
+ if ring[0] != ring[-1]:
627
+ ring.append(ring[0]) # Close the ring
628
+ # Check minimum points needed for valid polygon (3 points + closing point)
629
+ if len(ring) < 4:
630
+ return False
631
+ return True
632
+ elif geometry['type'] == 'MultiPolygon':
633
+ for polygon in geometry['coordinates']:
634
+ for ring in polygon:
635
+ if ring[0] != ring[-1]:
636
+ ring.append(ring[0]) # Close the ring
637
+ if len(ring) < 4:
638
+ return False
639
+ return True
640
+ else:
641
+ return False
642
+
643
+ def create_building_polygons(filtered_buildings):
644
+ """
645
+ Create building polygons with properties from filtered GeoJSON features.
646
+ Processes a list of GeoJSON building features to create Shapely polygons
647
+ with associated height and other properties, while also building a spatial index.
648
+
649
+ Processing steps:
650
+ 1. Extract and validate coordinates
651
+ 2. Create Shapely polygons
652
+ 3. Process building properties (height, levels, etc.)
653
+ 4. Build spatial index for efficient querying
654
+
655
+ Height calculation rules:
656
+ - Use explicit height if available
657
+ - Calculate from levels * floor_height if height not available
658
+ - Calculate from floors * floor_height if levels not available
659
+ - Use NaN if no height information available
660
+
661
+ Args:
662
+ filtered_buildings (list): List of GeoJSON building features with properties
663
+
664
+ Returns:
665
+ tuple: (
666
+ list of tuples (polygon, height, min_height, is_inner, feature_id),
667
+ rtree spatial index for the polygons
668
+ )
669
+
670
+ Example:
671
+ >>> buildings = [
672
+ ... {
673
+ ... "type": "Feature",
674
+ ... "geometry": {"type": "Polygon", "coordinates": [...]},
675
+ ... "properties": {"height": 30, "levels": 10}
676
+ ... },
677
+ ... # ... more buildings ...
678
+ ... ]
679
+ >>> polygons, spatial_idx = create_building_polygons(buildings)
680
+ """
681
+ building_polygons = []
682
+ idx = index.Index()
683
+ valid_count = 0
684
+ count = 0
685
+
686
+ # Find highest existing ID to avoid duplicates
687
+ id_list = []
688
+ for i, building in enumerate(filtered_buildings):
689
+ if building['properties'].get('id') is not None:
690
+ id_list.append(building['properties']['id'])
691
+ if len(id_list) > 0:
692
+ id_count = max(id_list)+1
693
+ else:
694
+ id_count = 1
695
+
696
+ for building in filtered_buildings:
697
+ try:
698
+ # Handle potential nested coordinate tuples
699
+ coords = building['geometry']['coordinates'][0]
700
+ # Flatten coordinates if they're nested tuples
701
+ if isinstance(coords[0], tuple):
702
+ coords = [list(c) for c in coords]
703
+ elif isinstance(coords[0][0], tuple):
704
+ coords = [list(c[0]) for c in coords]
705
+
706
+ # Create polygon from coordinates
707
+ polygon = Polygon(coords)
708
+
709
+ # Skip invalid geometries
710
+ if not polygon.is_valid:
711
+ print(f"Warning: Skipping invalid polygon geometry")
712
+ continue
713
+
714
+ height = building['properties'].get('height')
715
+ levels = building['properties'].get('levels')
716
+ floors = building['properties'].get('num_floors')
717
+ min_height = building['properties'].get('min_height')
718
+ min_level = building['properties'].get('min_level')
719
+ min_floor = building['properties'].get('min_floor')
720
+
721
+ if (height is None) or (height<=0):
722
+ if levels is not None:
723
+ height = floor_height * levels
724
+ elif floors is not None:
725
+ height = floor_height * floors
726
+ else:
727
+ count += 1
728
+ height = np.nan
729
+
730
+ if (min_height is None) or (min_height<=0):
731
+ if min_level is not None:
732
+ min_height = floor_height * float(min_level)
733
+ elif min_floor is not None:
734
+ min_height = floor_height * float(min_floor)
735
+ else:
736
+ min_height = 0
737
+
738
+ if building['properties'].get('id') is not None:
739
+ feature_id = building['properties']['id']
740
+ else:
741
+ feature_id = id_count
742
+ id_count += 1
743
+
744
+ if building['properties'].get('is_inner') is not None:
745
+ is_inner = building['properties']['is_inner']
746
+ else:
747
+ is_inner = False
748
+
749
+ building_polygons.append((polygon, height, min_height, is_inner, feature_id))
750
+ idx.insert(valid_count, polygon.bounds)
751
+ valid_count += 1
752
+
753
+ except Exception as e:
754
+ print(f"Warning: Skipping invalid building geometry: {e}")
755
+ continue
756
+
757
+ return building_polygons, idx
758
+
759
+ def get_country_name(lon, lat):
760
+ """
761
+ Get country name from coordinates using reverse geocoding.
762
+ Uses a local database for fast reverse geocoding to country level,
763
+ then converts the country code to full name using pycountry.
764
+
765
+ Args:
766
+ lon (float): Longitude in decimal degrees
767
+ lat (float): Latitude in decimal degrees
768
+
769
+ Returns:
770
+ str: Full country name or None if lookup fails
771
+
772
+ Example:
773
+ >>> country = get_country_name(139.6503, 35.6762)
774
+ >>> print(f"Country: {country}") # "Japan"
775
+ """
776
+ # Use reverse geocoder to get country code
777
+ results = rg.search((lat, lon))
778
+ country_code = results[0]['cc']
779
+
780
+ # Convert country code to full name using pycountry
781
+ country = pycountry.countries.get(alpha_2=country_code)
782
+
783
+ if country:
784
+ return country.name
785
+ else:
785
786
  return None