voxcity 0.6.15__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. voxcity/__init__.py +14 -8
  2. voxcity/downloader/__init__.py +2 -1
  3. voxcity/downloader/citygml.py +32 -18
  4. voxcity/downloader/gba.py +210 -0
  5. voxcity/downloader/gee.py +5 -1
  6. voxcity/downloader/mbfp.py +1 -1
  7. voxcity/downloader/oemj.py +80 -8
  8. voxcity/downloader/osm.py +23 -7
  9. voxcity/downloader/overture.py +26 -1
  10. voxcity/downloader/utils.py +73 -73
  11. voxcity/errors.py +30 -0
  12. voxcity/exporter/__init__.py +13 -4
  13. voxcity/exporter/cityles.py +633 -535
  14. voxcity/exporter/envimet.py +728 -708
  15. voxcity/exporter/magicavoxel.py +334 -297
  16. voxcity/exporter/netcdf.py +238 -0
  17. voxcity/exporter/obj.py +1481 -655
  18. voxcity/generator/__init__.py +44 -0
  19. voxcity/generator/api.py +675 -0
  20. voxcity/generator/grids.py +379 -0
  21. voxcity/generator/io.py +94 -0
  22. voxcity/generator/pipeline.py +282 -0
  23. voxcity/generator/voxelizer.py +380 -0
  24. voxcity/geoprocessor/__init__.py +75 -6
  25. voxcity/geoprocessor/conversion.py +153 -0
  26. voxcity/geoprocessor/draw.py +62 -12
  27. voxcity/geoprocessor/heights.py +199 -0
  28. voxcity/geoprocessor/io.py +101 -0
  29. voxcity/geoprocessor/merge_utils.py +91 -0
  30. voxcity/geoprocessor/mesh.py +806 -790
  31. voxcity/geoprocessor/network.py +708 -679
  32. voxcity/geoprocessor/overlap.py +84 -0
  33. voxcity/geoprocessor/raster/__init__.py +82 -0
  34. voxcity/geoprocessor/raster/buildings.py +428 -0
  35. voxcity/geoprocessor/raster/canopy.py +258 -0
  36. voxcity/geoprocessor/raster/core.py +150 -0
  37. voxcity/geoprocessor/raster/export.py +93 -0
  38. voxcity/geoprocessor/raster/landcover.py +156 -0
  39. voxcity/geoprocessor/raster/raster.py +110 -0
  40. voxcity/geoprocessor/selection.py +85 -0
  41. voxcity/geoprocessor/utils.py +18 -14
  42. voxcity/models.py +113 -0
  43. voxcity/simulator/common/__init__.py +22 -0
  44. voxcity/simulator/common/geometry.py +98 -0
  45. voxcity/simulator/common/raytracing.py +450 -0
  46. voxcity/simulator/solar/__init__.py +43 -0
  47. voxcity/simulator/solar/integration.py +336 -0
  48. voxcity/simulator/solar/kernels.py +62 -0
  49. voxcity/simulator/solar/radiation.py +648 -0
  50. voxcity/simulator/solar/temporal.py +434 -0
  51. voxcity/simulator/view.py +36 -2286
  52. voxcity/simulator/visibility/__init__.py +29 -0
  53. voxcity/simulator/visibility/landmark.py +392 -0
  54. voxcity/simulator/visibility/view.py +508 -0
  55. voxcity/utils/logging.py +61 -0
  56. voxcity/utils/orientation.py +51 -0
  57. voxcity/utils/weather/__init__.py +26 -0
  58. voxcity/utils/weather/epw.py +146 -0
  59. voxcity/utils/weather/files.py +36 -0
  60. voxcity/utils/weather/onebuilding.py +486 -0
  61. voxcity/visualizer/__init__.py +24 -0
  62. voxcity/visualizer/builder.py +43 -0
  63. voxcity/visualizer/grids.py +141 -0
  64. voxcity/visualizer/maps.py +187 -0
  65. voxcity/visualizer/palette.py +228 -0
  66. voxcity/visualizer/renderer.py +928 -0
  67. {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info}/METADATA +113 -36
  68. voxcity-0.7.0.dist-info/RECORD +77 -0
  69. {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info}/WHEEL +1 -1
  70. voxcity/generator.py +0 -1137
  71. voxcity/geoprocessor/grid.py +0 -1568
  72. voxcity/geoprocessor/polygon.py +0 -1344
  73. voxcity/simulator/solar.py +0 -2329
  74. voxcity/utils/visualization.py +0 -2660
  75. voxcity/utils/weather.py +0 -817
  76. voxcity-0.6.15.dist-info/RECORD +0 -37
  77. {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info/licenses}/AUTHORS.rst +0 -0
  78. {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,146 @@
1
+ from pathlib import Path
2
+ from typing import Tuple, Union
3
+ import pandas as pd
4
+
5
+
6
+ def process_epw(epw_path: Union[str, Path]) -> Tuple[pd.DataFrame, dict]:
7
+ """
8
+ Process an EPW file into a pandas DataFrame and header metadata.
9
+ """
10
+ columns = [
11
+ 'Year', 'Month', 'Day', 'Hour', 'Minute',
12
+ 'Data Source and Uncertainty Flags',
13
+ 'Dry Bulb Temperature', 'Dew Point Temperature',
14
+ 'Relative Humidity', 'Atmospheric Station Pressure',
15
+ 'Extraterrestrial Horizontal Radiation',
16
+ 'Extraterrestrial Direct Normal Radiation',
17
+ 'Horizontal Infrared Radiation Intensity',
18
+ 'Global Horizontal Radiation',
19
+ 'Direct Normal Radiation', 'Diffuse Horizontal Radiation',
20
+ 'Global Horizontal Illuminance',
21
+ 'Direct Normal Illuminance', 'Diffuse Horizontal Illuminance',
22
+ 'Zenith Luminance', 'Wind Direction', 'Wind Speed',
23
+ 'Total Sky Cover', 'Opaque Sky Cover', 'Visibility',
24
+ 'Ceiling Height', 'Present Weather Observation',
25
+ 'Present Weather Codes', 'Precipitable Water',
26
+ 'Aerosol Optical Depth', 'Snow Depth',
27
+ 'Days Since Last Snowfall', 'Albedo',
28
+ 'Liquid Precipitation Depth', 'Liquid Precipitation Quantity'
29
+ ]
30
+
31
+ with open(epw_path, 'r') as f:
32
+ lines = f.readlines()
33
+
34
+ headers = {
35
+ 'LOCATION': lines[0].strip(),
36
+ 'DESIGN_CONDITIONS': lines[1].strip(),
37
+ 'TYPICAL_EXTREME_PERIODS': lines[2].strip(),
38
+ 'GROUND_TEMPERATURES': lines[3].strip(),
39
+ 'HOLIDAYS_DAYLIGHT_SAVINGS': lines[4].strip(),
40
+ 'COMMENTS_1': lines[5].strip(),
41
+ 'COMMENTS_2': lines[6].strip(),
42
+ 'DATA_PERIODS': lines[7].strip()
43
+ }
44
+
45
+ location = headers['LOCATION'].split(',')
46
+ if len(location) >= 10:
47
+ headers['LOCATION'] = {
48
+ 'City': location[1].strip(),
49
+ 'State': location[2].strip(),
50
+ 'Country': location[3].strip(),
51
+ 'Data Source': location[4].strip(),
52
+ 'WMO': location[5].strip(),
53
+ 'Latitude': float(location[6]),
54
+ 'Longitude': float(location[7]),
55
+ 'Time Zone': float(location[8]),
56
+ 'Elevation': float(location[9])
57
+ }
58
+
59
+ data = [line.strip().split(',') for line in lines[8:]]
60
+ df = pd.DataFrame(data, columns=columns)
61
+
62
+ numeric_columns = [
63
+ 'Year', 'Month', 'Day', 'Hour', 'Minute',
64
+ 'Dry Bulb Temperature', 'Dew Point Temperature',
65
+ 'Relative Humidity', 'Atmospheric Station Pressure',
66
+ 'Extraterrestrial Horizontal Radiation',
67
+ 'Extraterrestrial Direct Normal Radiation',
68
+ 'Horizontal Infrared Radiation Intensity',
69
+ 'Global Horizontal Radiation',
70
+ 'Direct Normal Radiation', 'Diffuse Horizontal Radiation',
71
+ 'Global Horizontal Illuminance',
72
+ 'Direct Normal Illuminance', 'Diffuse Horizontal Illuminance',
73
+ 'Zenith Luminance', 'Wind Direction', 'Wind Speed',
74
+ 'Total Sky Cover', 'Opaque Sky Cover', 'Visibility',
75
+ 'Ceiling Height', 'Precipitable Water',
76
+ 'Aerosol Optical Depth', 'Snow Depth',
77
+ 'Days Since Last Snowfall', 'Albedo',
78
+ 'Liquid Precipitation Depth', 'Liquid Precipitation Quantity'
79
+ ]
80
+ for col in numeric_columns:
81
+ df[col] = pd.to_numeric(df[col], errors='coerce')
82
+
83
+ df['datetime'] = pd.to_datetime({
84
+ 'year': df['Year'],
85
+ 'month': df['Month'],
86
+ 'day': df['Day'],
87
+ 'hour': df['Hour'] - 1,
88
+ 'minute': df['Minute']
89
+ })
90
+ df.set_index('datetime', inplace=True)
91
+ return df, headers
92
+
93
+
94
+ def read_epw_for_solar_simulation(epw_file_path):
95
+ """
96
+ Read EPW file specifically for solar simulation purposes.
97
+ Returns (df[DNI,DHI], lon, lat, tz, elevation_m).
98
+ """
99
+ epw_path_obj = Path(epw_file_path)
100
+ if not epw_path_obj.exists() or not epw_path_obj.is_file():
101
+ raise FileNotFoundError(f"EPW file not found: {epw_file_path}")
102
+
103
+ with open(epw_path_obj, 'r', encoding='utf-8') as f:
104
+ lines = f.readlines()
105
+
106
+ location_line = None
107
+ for line in lines:
108
+ if line.startswith("LOCATION"):
109
+ location_line = line.strip().split(',')
110
+ break
111
+ if location_line is None:
112
+ raise ValueError("Could not find LOCATION line in EPW file.")
113
+
114
+ lat = float(location_line[6])
115
+ lon = float(location_line[7])
116
+ tz = float(location_line[8])
117
+ elevation_m = float(location_line[9])
118
+
119
+ data_start_index = None
120
+ for i, line in enumerate(lines):
121
+ vals = line.strip().split(',')
122
+ if i >= 8 and len(vals) > 30:
123
+ data_start_index = i
124
+ break
125
+ if data_start_index is None:
126
+ raise ValueError("Could not find start of weather data lines in EPW file.")
127
+
128
+ data = []
129
+ for l in lines[data_start_index:]:
130
+ vals = l.strip().split(',')
131
+ if len(vals) < 15:
132
+ continue
133
+ year = int(vals[0])
134
+ month = int(vals[1])
135
+ day = int(vals[2])
136
+ hour = int(vals[3]) - 1
137
+ dni = float(vals[14])
138
+ dhi = float(vals[15])
139
+ timestamp = pd.Timestamp(year, month, day, hour)
140
+ data.append([timestamp, dni, dhi])
141
+
142
+ df = pd.DataFrame(data, columns=['time', 'DNI', 'DHI']).set_index('time')
143
+ df = df.sort_index()
144
+ return df, lon, lat, tz, elevation_m
145
+
146
+
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+ import os
3
+ import zipfile
4
+
5
+
6
+ def safe_rename(src: Path, dst: Path) -> Path:
7
+ """
8
+ Safely rename a file, handling existing files by adding a number suffix.
9
+ """
10
+ if not dst.exists():
11
+ src.rename(dst)
12
+ return dst
13
+ base = dst.stem
14
+ ext = dst.suffix
15
+ counter = 1
16
+ while True:
17
+ new_dst = dst.with_name(f"{base}_{counter}{ext}")
18
+ if not new_dst.exists():
19
+ src.rename(new_dst)
20
+ return new_dst
21
+ counter += 1
22
+
23
+
24
+ def safe_extract(zip_ref: zipfile.ZipFile, filename: str, extract_dir: Path) -> Path:
25
+ """
26
+ Safely extract a file from zip, handling existing files.
27
+ """
28
+ try:
29
+ zip_ref.extract(filename, extract_dir)
30
+ return extract_dir / filename
31
+ except FileExistsError:
32
+ temp_name = f"temp_{os.urandom(4).hex()}_{filename}"
33
+ zip_ref.extract(filename, extract_dir, temp_name)
34
+ return extract_dir / temp_name
35
+
36
+
@@ -0,0 +1,486 @@
1
+ from typing import Optional, Dict, List, Tuple, Union
2
+ from pathlib import Path
3
+ import requests
4
+ import re
5
+ import xml.etree.ElementTree as ET
6
+ import json
7
+ import zipfile
8
+
9
+ from .files import safe_extract, safe_rename
10
+ from .epw import process_epw
11
+
12
+
13
+ def get_nearest_epw_from_climate_onebuilding(longitude: float, latitude: float, output_dir: str = "./", max_distance: Optional[float] = None,
14
+ extract_zip: bool = True, load_data: bool = True, region: Optional[Union[str, List[str]]] = None,
15
+ allow_insecure_ssl: bool = False, allow_http_fallback: bool = False,
16
+ ssl_verify: Union[bool, str] = True) -> Tuple[Optional[str], Optional["pd.DataFrame"], Optional[Dict]]:
17
+ """
18
+ Download and process EPW weather file from Climate.OneBuilding.Org based on coordinates.
19
+ """
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ # --- KML sources and region helpers (unchanged from monolith) ---
24
+ KML_SOURCES = {
25
+ "Africa": "https://climate.onebuilding.org/sources/Region1_Africa_TMYx_EPW_Processing_locations.kml",
26
+ "Asia": "https://climate.onebuilding.org/sources/Region2_Asia_TMYx_EPW_Processing_locations.kml",
27
+ "Japan": "https://climate.onebuilding.org/sources/JGMY_EPW_Processing_locations.kml",
28
+ "India": "https://climate.onebuilding.org/sources/ITMY_EPW_Processing_locations.kml",
29
+ "CSWD": "https://climate.onebuilding.org/sources/CSWD_EPW_Processing_locations.kml",
30
+ "CityUHK": "https://climate.onebuilding.org/sources/CityUHK_EPW_Processing_locations.kml",
31
+ "PHIKO": "https://climate.onebuilding.org/sources/PHIKO_EPW_Processing_locations.kml",
32
+ "South_America": "https://climate.onebuilding.org/sources/Region3_South_America_TMYx_EPW_Processing_locations.kml",
33
+ "Argentina": "https://climate.onebuilding.org/sources/ArgTMY_EPW_Processing_locations.kml",
34
+ "INMET_TRY": "https://climate.onebuilding.org/sources/INMET_TRY_EPW_Processing_locations.kml",
35
+ "AMTUes": "https://climate.onebuilding.org/sources/AMTUes_EPW_Processing_locations.kml",
36
+ "BrazFuture": "https://climate.onebuilding.org/sources/BrazFuture_EPW_Processing_locations.kml",
37
+ "Canada": "https://climate.onebuilding.org/sources/Region4_Canada_TMYx_EPW_Processing_locations.kml",
38
+ "USA": "https://climate.onebuilding.org/sources/Region4_USA_TMYx_EPW_Processing_locations.kml",
39
+ "Caribbean": "https://climate.onebuilding.org/sources/Region4_NA_CA_Caribbean_TMYx_EPW_Processing_locations.kml",
40
+ "Southwest_Pacific": "https://climate.onebuilding.org/sources/Region5_Southwest_Pacific_TMYx_EPW_Processing_locations.kml",
41
+ "Europe": "https://climate.onebuilding.org/sources/Region6_Europe_TMYx_EPW_Processing_locations.kml",
42
+ "Antarctica": "https://climate.onebuilding.org/sources/Region7_Antarctica_TMYx_EPW_Processing_locations.kml",
43
+ }
44
+
45
+ REGION_DATASET_GROUPS = {
46
+ "Africa": ["Africa"],
47
+ "Asia": ["Asia", "Japan", "India", "CSWD", "CityUHK", "PHIKO"],
48
+ "South_America": ["South_America", "Argentina", "INMET_TRY", "AMTUes", "BrazFuture"],
49
+ "North_and_Central_America": ["North_and_Central_America", "Canada", "USA", "Caribbean"],
50
+ "Southwest_Pacific": ["Southwest_Pacific"],
51
+ "Europe": ["Europe"],
52
+ "Antarctica": ["Antarctica"],
53
+ }
54
+
55
+ REGION_BOUNDS = {
56
+ "Africa": {"lon_min": -25, "lon_max": 80, "lat_min": -55, "lat_max": 45},
57
+ "Asia": {"lon_min": 20, "lon_max": 180, "lat_min": -10, "lat_max": 80},
58
+ "Japan": {"lon_min": 127, "lon_max": 146, "lat_min": 24, "lat_max": 46},
59
+ "India": {"lon_min": 68, "lon_max": 97, "lat_min": 6, "lat_max": 36},
60
+ "South_America": {"lon_min": -92, "lon_max": -20, "lat_min": -60, "lat_max": 15},
61
+ "Argentina": {"lon_min": -75, "lon_max": -53, "lat_min": -55, "lat_max": -22},
62
+ "North_and_Central_America": {"lon_min": -180, "lon_max": 20, "lat_min": -10, "lat_max": 85},
63
+ "Canada": {"lon_min": -141, "lon_max": -52, "lat_min": 42, "lat_max": 83},
64
+ "USA": {"lon_min": -170, "lon_max": -65, "lat_min": 20, "lat_max": 72},
65
+ "Caribbean": {"lon_min": -90, "lon_max": -59, "lat_min": 10, "lat_max": 27},
66
+ "Southwest_Pacific": {"boxes": [
67
+ {"lon_min": 90, "lon_max": 180, "lat_min": -50, "lat_max": 25},
68
+ {"lon_min": -180, "lon_max": -140, "lat_min": -50, "lat_max": 25},
69
+ ]},
70
+ "Europe": {"lon_min": -75, "lon_max": 60, "lat_min": 25, "lat_max": 85},
71
+ "Antarctica": {"lon_min": -180, "lon_max": 180, "lat_min": -90, "lat_max": -60}
72
+ }
73
+
74
+ def detect_regions(lon: float, lat: float) -> List[str]:
75
+ matching_regions = []
76
+
77
+ lon_adjusted = lon
78
+ if lon < -180:
79
+ lon_adjusted = lon + 360
80
+ elif lon > 180:
81
+ lon_adjusted = lon - 360
82
+
83
+ def _in_box(bx: Dict[str, float], lon_v: float, lat_v: float) -> bool:
84
+ return (bx["lon_min"] <= lon_v <= bx["lon_max"] and bx["lat_min"] <= lat_v <= bx["lat_max"])
85
+
86
+ for region_name, bounds in REGION_BOUNDS.items():
87
+ if "boxes" in bounds:
88
+ for bx in bounds["boxes"]:
89
+ if _in_box(bx, lon_adjusted, lat):
90
+ matching_regions.append(region_name)
91
+ break
92
+ else:
93
+ if _in_box(bounds, lon_adjusted, lat):
94
+ matching_regions.append(region_name)
95
+
96
+ if not matching_regions:
97
+ region_distances = []
98
+ def _box_distance(bx: Dict[str, float]) -> float:
99
+ lon_dist = 0
100
+ if lon_adjusted < bx["lon_min"]:
101
+ lon_dist = bx["lon_min"] - lon_adjusted
102
+ elif lon_adjusted > bx["lon_max"]:
103
+ lon_dist = lon_adjusted - bx["lon_max"]
104
+ lat_dist = 0
105
+ if lat < bx["lat_min"]:
106
+ lat_dist = bx["lat_min"] - lat
107
+ elif lat > bx["lat_max"]:
108
+ lat_dist = lat - bx["lat_max"]
109
+ return (lon_dist**2 + lat_dist**2)**0.5
110
+ for region_name, bounds in REGION_BOUNDS.items():
111
+ if "boxes" in bounds:
112
+ d = min(_box_distance(bx) for bx in bounds["boxes"])
113
+ else:
114
+ d = _box_distance(bounds)
115
+ region_distances.append((region_name, d))
116
+ closest_regions = sorted(region_distances, key=lambda x: x[1])[:3]
117
+ matching_regions = [r[0] for r in closest_regions]
118
+ return matching_regions
119
+
120
+ def try_decode(content: bytes) -> str:
121
+ encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
122
+ for encoding in encodings:
123
+ try:
124
+ return content.decode(encoding)
125
+ except UnicodeDecodeError:
126
+ continue
127
+ return content.decode('utf-8', errors='replace')
128
+
129
+ def clean_xml(content: str) -> str:
130
+ content = content.replace('&ntilde;', 'n').replace('&Ntilde;', 'N').replace('ñ', 'n').replace('Ñ', 'N')
131
+ content = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]', '', content)
132
+ return content
133
+
134
+ def parse_coordinates(point_text: str) -> Tuple[float, float, float]:
135
+ try:
136
+ coords = point_text.strip().split(',')
137
+ if len(coords) >= 2:
138
+ lon, lat = map(float, coords[:2])
139
+ elevation = float(coords[2]) if len(coords) > 2 else 0
140
+ return lat, lon, elevation
141
+ except (ValueError, IndexError):
142
+ pass
143
+ return None
144
+
145
+ def parse_station_from_description(desc: str, point_coords: Optional[Tuple[float, float, float]] = None) -> Dict:
146
+ if not desc:
147
+ return None
148
+ url_match = re.search(r'URL (https://.*?\.zip)', desc)
149
+ if not url_match:
150
+ return None
151
+ url = url_match.group(1)
152
+ coord_match = re.search(r'([NS]) (\d+)&deg;\s*(\d+\.\d+)'.encode('utf-8').decode('utf-8') + r"'.*?([EW]) (\d+)&deg;\s*(\d+\.\d+)'", desc)
153
+ if coord_match:
154
+ ns, lat_deg, lat_min, ew, lon_deg, lon_min = coord_match.groups()
155
+ lat = float(lat_deg) + float(lat_min)/60
156
+ if ns == 'S':
157
+ lat = -lat
158
+ lon = float(lon_deg) + float(lon_min)/60
159
+ if ew == 'W':
160
+ lon = -lon
161
+ elif point_coords:
162
+ lat, lon, _ = point_coords
163
+ else:
164
+ return None
165
+ def extract_value(pattern: str, default: str = None) -> str:
166
+ match = re.search(pattern, desc)
167
+ return match.group(1) if match else default
168
+ metadata = {
169
+ 'url': url,
170
+ 'longitude': lon,
171
+ 'latitude': lat,
172
+ 'elevation': int(extract_value(r'Elevation <b>(-?\d+)</b>', '0')),
173
+ 'name': extract_value(r'<b>(.*?)</b>'),
174
+ 'wmo': extract_value(r'WMO <b>(\d+)</b>'),
175
+ 'climate_zone': extract_value(r'Climate Zone <b>(.*?)</b>'),
176
+ 'period': extract_value(r'Period of Record=(\d{4}-\d{4})'),
177
+ 'heating_db': extract_value(r'99% Heating DB <b>(.*?)</b>'),
178
+ 'cooling_db': extract_value(r'1% Cooling DB <b>(.*?)</b>'),
179
+ 'hdd18': extract_value(r'HDD18 <b>(\d+)</b>'),
180
+ 'cdd10': extract_value(r'CDD10 <b>(\d+)</b>'),
181
+ 'time_zone': extract_value(r'Time Zone {GMT <b>([-+]?\d+\.\d+)</b>')
182
+ }
183
+ return metadata
184
+
185
+ def haversine_distance(lon1: float, lat1: float, lon2: float, lat2: float) -> float:
186
+ from math import radians, sin, cos, sqrt, atan2
187
+ R = 6371
188
+ lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
189
+ dlat = lat2 - lat1
190
+ dlon = lon2 - lon1
191
+ a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
192
+ c = 2 * atan2(sqrt(a), sqrt(1-a))
193
+ return R * c
194
+
195
+ def try_download_station_zip(original_url: str, timeout_s: int = 30) -> Optional[bytes]:
196
+ def candidate_urls(url: str) -> List[str]:
197
+ urls = [url]
198
+ if "/TUR_Turkey/" in url:
199
+ urls.append(url.replace("/TUR_Turkey/", "/TUR_Turkiye/"))
200
+ if "/TUR_Turkiye/" in url:
201
+ urls.append(url.replace("/TUR_Turkiye/", "/TUR_Turkey/"))
202
+ m = re.search(r"(.*_TMYx)(?:\.(\d{4}-\d{4}))?\.zip$", url)
203
+ if m:
204
+ base = m.group(1)
205
+ variants = [
206
+ f"{base}.2009-2023.zip",
207
+ f"{base}.2007-2021.zip",
208
+ f"{base}.zip",
209
+ f"{base}.2004-2018.zip",
210
+ ]
211
+ for v in variants:
212
+ if v not in urls:
213
+ urls.append(v)
214
+ extra = []
215
+ for v in variants:
216
+ if "/TUR_Turkey/" in url:
217
+ extra.append(v.replace("/TUR_Turkey/", "/TUR_Turkiye/"))
218
+ if "/TUR_Turkiye/" in url:
219
+ extra.append(v.replace("/TUR_Turkiye/", "/TUR_Turkey/"))
220
+ for v in extra:
221
+ if v not in urls:
222
+ urls.append(v)
223
+ return urls
224
+
225
+ tried = set()
226
+ for u in candidate_urls(original_url):
227
+ if u in tried:
228
+ continue
229
+ tried.add(u)
230
+ try:
231
+ resp = requests.get(u, timeout=timeout_s, verify=ssl_verify)
232
+ resp.raise_for_status()
233
+ return resp.content
234
+ except requests.exceptions.SSLError:
235
+ if allow_insecure_ssl:
236
+ try:
237
+ resp = requests.get(u, timeout=timeout_s, verify=False)
238
+ resp.raise_for_status()
239
+ return resp.content
240
+ except requests.exceptions.RequestException:
241
+ if allow_http_fallback and u.lower().startswith("https://"):
242
+ insecure_url = "http://" + u.split("://", 1)[1]
243
+ try:
244
+ resp = requests.get(insecure_url, timeout=timeout_s)
245
+ resp.raise_for_status()
246
+ return resp.content
247
+ except requests.exceptions.RequestException:
248
+ pass
249
+ continue
250
+ else:
251
+ if allow_http_fallback and u.lower().startswith("https://"):
252
+ insecure_url = "http://" + u.split("://", 1)[1]
253
+ try:
254
+ resp = requests.get(insecure_url, timeout=timeout_s)
255
+ resp.raise_for_status()
256
+ return resp.content
257
+ except requests.exceptions.RequestException:
258
+ pass
259
+ continue
260
+ except requests.exceptions.HTTPError as he:
261
+ if getattr(he.response, "status_code", None) == 404:
262
+ continue
263
+ else:
264
+ raise
265
+ except requests.exceptions.RequestException:
266
+ continue
267
+ return None
268
+
269
+ try:
270
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
271
+
272
+ regions_to_scan = {}
273
+ def _add_selection(selection_name: str, mapping: Dict[str, str], out: Dict[str, str]):
274
+ if selection_name in REGION_DATASET_GROUPS:
275
+ for key in REGION_DATASET_GROUPS[selection_name]:
276
+ if key in KML_SOURCES:
277
+ out[key] = KML_SOURCES[key]
278
+ elif selection_name in KML_SOURCES:
279
+ out[selection_name] = KML_SOURCES[selection_name]
280
+ else:
281
+ valid = sorted(list(REGION_DATASET_GROUPS.keys()) + list(KML_SOURCES.keys()))
282
+ raise ValueError(f"Invalid region/dataset: '{selection_name}'. Valid options include: {', '.join(valid)}")
283
+
284
+ if region is None:
285
+ detected_regions = detect_regions(longitude, latitude)
286
+ if detected_regions:
287
+ print(f"Auto-detected regions: {', '.join(detected_regions)}")
288
+ for r in detected_regions:
289
+ _add_selection(r, KML_SOURCES, regions_to_scan)
290
+ else:
291
+ print("Could not determine region from coordinates. Scanning all regions.")
292
+ regions_to_scan = dict(KML_SOURCES)
293
+ elif isinstance(region, str):
294
+ if region.lower() == "all":
295
+ regions_to_scan = dict(KML_SOURCES)
296
+ else:
297
+ _add_selection(region, KML_SOURCES, regions_to_scan)
298
+ else:
299
+ for r in region:
300
+ _add_selection(r, KML_SOURCES, regions_to_scan)
301
+
302
+ print("Fetching weather station data from Climate.OneBuilding.Org...")
303
+ all_stations = []
304
+ scanned_urls = set()
305
+ for region_name, url in regions_to_scan.items():
306
+ if url in scanned_urls:
307
+ continue
308
+ scanned_urls.add(url)
309
+ print(f"Scanning {region_name}...")
310
+ stations = []
311
+ try:
312
+ try:
313
+ response = requests.get(url, timeout=30, verify=ssl_verify)
314
+ response.raise_for_status()
315
+ except requests.exceptions.SSLError:
316
+ if allow_insecure_ssl:
317
+ try:
318
+ response = requests.get(url, timeout=30, verify=False)
319
+ response.raise_for_status()
320
+ except requests.exceptions.RequestException:
321
+ if allow_http_fallback and url.lower().startswith("https://"):
322
+ insecure_url = "http://" + url.split("://", 1)[1]
323
+ response = requests.get(insecure_url, timeout=30)
324
+ response.raise_for_status()
325
+ else:
326
+ raise
327
+ else:
328
+ if allow_http_fallback and url.lower().startswith("https://"):
329
+ insecure_url = "http://" + url.split("://", 1)[1]
330
+ response = requests.get(insecure_url, timeout=30)
331
+ response.raise_for_status()
332
+ else:
333
+ raise
334
+ content = try_decode(response.content)
335
+ content = clean_xml(content)
336
+ try:
337
+ root = ET.fromstring(content.encode('utf-8'))
338
+ except ET.ParseError as e:
339
+ print(f"Error parsing KML file {url}: {e}")
340
+ root = None
341
+ if root is not None:
342
+ ns = {'kml': 'http://earth.google.com/kml/2.1'}
343
+ for placemark in root.findall('.//kml:Placemark', ns):
344
+ name = placemark.find('kml:name', ns)
345
+ desc = placemark.find('kml:description', ns)
346
+ point = placemark.find('.//kml:Point/kml:coordinates', ns)
347
+ if desc is None or not desc.text or "Data Source" not in desc.text:
348
+ continue
349
+ point_coords = None
350
+ if point is not None and point.text:
351
+ point_coords = parse_coordinates(point.text)
352
+ station_data = parse_station_from_description(desc.text, point_coords)
353
+ if station_data:
354
+ station_data['name'] = name.text if name is not None else "Unknown"
355
+ station_data['kml_source'] = url
356
+ stations.append(station_data)
357
+ except requests.exceptions.RequestException as e:
358
+ print(f"Error accessing KML file {url}: {e}")
359
+ except Exception as e:
360
+ print(f"Error processing KML file {url}: {e}")
361
+
362
+ all_stations.extend(stations)
363
+ print(f"Found {len(stations)} stations in {region_name}")
364
+
365
+ print(f"\nTotal stations found: {len(all_stations)}")
366
+ if not all_stations:
367
+ if not (isinstance(region, str) and region.lower() == "all"):
368
+ print("No stations found from detected/selected regions. Falling back to global scan...")
369
+ regions_to_scan = dict(KML_SOURCES)
370
+ all_stations = []
371
+ scanned_urls = set()
372
+ for region_name, url in regions_to_scan.items():
373
+ if url in scanned_urls:
374
+ continue
375
+ scanned_urls.add(url)
376
+ print(f"Scanning {region_name}...")
377
+ # re-use logic above
378
+ try:
379
+ response = requests.get(url, timeout=30, verify=ssl_verify)
380
+ response.raise_for_status()
381
+ content = try_decode(response.content)
382
+ content = clean_xml(content)
383
+ root = ET.fromstring(content.encode('utf-8'))
384
+ ns = {'kml': 'http://earth.google.com/kml/2.1'}
385
+ for placemark in root.findall('.//kml:Placemark', ns):
386
+ name = placemark.find('kml:name', ns)
387
+ desc = placemark.find('kml:description', ns)
388
+ point = placemark.find('.//kml:Point/kml:coordinates', ns)
389
+ if desc is None or not desc.text or "Data Source" not in desc.text:
390
+ continue
391
+ point_coords = None
392
+ if point is not None and point.text:
393
+ point_coords = parse_coordinates(point.text)
394
+ station_data = parse_station_from_description(desc.text, point_coords)
395
+ if station_data:
396
+ station_data['name'] = name.text if name is not None else "Unknown"
397
+ station_data['kml_source'] = url
398
+ all_stations.append(station_data)
399
+ print(f"Found {len(all_stations)} stations in {region_name}")
400
+ except Exception:
401
+ pass
402
+ print(f"\nTotal stations found after global scan: {len(all_stations)}")
403
+ if not all_stations:
404
+ raise ValueError("No weather stations found")
405
+
406
+ stations_with_distances = [
407
+ (station, haversine_distance(longitude, latitude, station['longitude'], station['latitude']))
408
+ for station in all_stations
409
+ ]
410
+ if max_distance is not None:
411
+ close_stations = [
412
+ (station, distance) for station, distance in stations_with_distances if distance <= max_distance
413
+ ]
414
+ if not close_stations:
415
+ closest_station, min_distance = min(stations_with_distances, key=lambda x: x[1])
416
+ print(f"\nNo stations found within {max_distance} km. Closest station is {min_distance:.1f} km away.")
417
+ print("Using closest available station.")
418
+ stations_with_distances = [(closest_station, min_distance)]
419
+ else:
420
+ stations_with_distances = close_stations
421
+
422
+ nearest_station, distance = min(stations_with_distances, key=lambda x: x[1])
423
+ print(f"\nDownloading EPW file for {nearest_station['name']}...")
424
+ archive_bytes = try_download_station_zip(nearest_station['url'], timeout_s=30)
425
+ if archive_bytes is None:
426
+ raise ValueError(f"Failed to download EPW archive from station URL and fallbacks: {nearest_station['url']}")
427
+
428
+ temp_dir = Path(output_dir) / "temp"
429
+ temp_dir.mkdir(parents=True, exist_ok=True)
430
+ zip_file = temp_dir / "weather_data.zip"
431
+ with open(zip_file, 'wb') as f:
432
+ f.write(archive_bytes)
433
+
434
+ final_epw = None
435
+ try:
436
+ if extract_zip:
437
+ with zipfile.ZipFile(zip_file, 'r') as zip_ref:
438
+ epw_files = [f for f in zip_ref.namelist() if f.lower().endswith('.epw')]
439
+ if not epw_files:
440
+ raise ValueError("No EPW file found in the downloaded archive")
441
+ epw_filename = epw_files[0]
442
+ extracted_epw = safe_extract(zip_ref, epw_filename, temp_dir)
443
+ final_epw = Path(output_dir) / f"{nearest_station['name'].replace(' ', '_').replace(',', '').lower()}.epw"
444
+ final_epw = safe_rename(extracted_epw, final_epw)
445
+ finally:
446
+ try:
447
+ if zip_file.exists():
448
+ zip_file.unlink()
449
+ if temp_dir.exists() and not any(temp_dir.iterdir()):
450
+ temp_dir.rmdir()
451
+ except Exception as e:
452
+ print(f"Warning: Could not clean up temporary files: {e}")
453
+
454
+ if final_epw is None:
455
+ raise ValueError("Failed to extract EPW file")
456
+
457
+ metadata_file = final_epw.with_suffix('.json')
458
+ with open(metadata_file, 'w') as f:
459
+ json.dump(nearest_station, f, indent=2)
460
+
461
+ print(f"\nDownloaded EPW file for {nearest_station['name']}")
462
+ print(f"Distance: {distance:.2f} km")
463
+ print(f"Station coordinates: {nearest_station['longitude']}, {nearest_station['latitude']}")
464
+ if nearest_station.get('wmo'):
465
+ print(f"WMO: {nearest_station['wmo']}")
466
+ if nearest_station.get('climate_zone'):
467
+ print(f"Climate zone: {nearest_station['climate_zone']}")
468
+ if nearest_station.get('period'):
469
+ print(f"Data period: {nearest_station['period']}")
470
+ print(f"Files saved:")
471
+ print(f"- EPW: {final_epw}")
472
+ print(f"- Metadata: {metadata_file}")
473
+
474
+ df = None
475
+ headers = None
476
+ if load_data:
477
+ print("\nLoading EPW data...")
478
+ df, headers = process_epw(final_epw)
479
+ print(f"Loaded {len(df)} hourly records")
480
+
481
+ return str(final_epw), df, headers
482
+ except Exception as e:
483
+ print(f"Error processing data: {e}")
484
+ return None, None, None
485
+
486
+