voxcity 0.6.15__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voxcity/__init__.py +14 -8
- voxcity/downloader/__init__.py +2 -1
- voxcity/downloader/citygml.py +32 -18
- voxcity/downloader/gba.py +210 -0
- voxcity/downloader/gee.py +5 -1
- voxcity/downloader/mbfp.py +1 -1
- voxcity/downloader/oemj.py +80 -8
- voxcity/downloader/osm.py +23 -7
- voxcity/downloader/overture.py +26 -1
- voxcity/downloader/utils.py +73 -73
- voxcity/errors.py +30 -0
- voxcity/exporter/__init__.py +13 -4
- voxcity/exporter/cityles.py +633 -535
- voxcity/exporter/envimet.py +728 -708
- voxcity/exporter/magicavoxel.py +334 -297
- voxcity/exporter/netcdf.py +238 -0
- voxcity/exporter/obj.py +1481 -655
- voxcity/generator/__init__.py +44 -0
- voxcity/generator/api.py +675 -0
- voxcity/generator/grids.py +379 -0
- voxcity/generator/io.py +94 -0
- voxcity/generator/pipeline.py +282 -0
- voxcity/generator/voxelizer.py +380 -0
- voxcity/geoprocessor/__init__.py +75 -6
- voxcity/geoprocessor/conversion.py +153 -0
- voxcity/geoprocessor/draw.py +62 -12
- voxcity/geoprocessor/heights.py +199 -0
- voxcity/geoprocessor/io.py +101 -0
- voxcity/geoprocessor/merge_utils.py +91 -0
- voxcity/geoprocessor/mesh.py +806 -790
- voxcity/geoprocessor/network.py +708 -679
- voxcity/geoprocessor/overlap.py +84 -0
- voxcity/geoprocessor/raster/__init__.py +82 -0
- voxcity/geoprocessor/raster/buildings.py +428 -0
- voxcity/geoprocessor/raster/canopy.py +258 -0
- voxcity/geoprocessor/raster/core.py +150 -0
- voxcity/geoprocessor/raster/export.py +93 -0
- voxcity/geoprocessor/raster/landcover.py +156 -0
- voxcity/geoprocessor/raster/raster.py +110 -0
- voxcity/geoprocessor/selection.py +85 -0
- voxcity/geoprocessor/utils.py +18 -14
- voxcity/models.py +113 -0
- voxcity/simulator/common/__init__.py +22 -0
- voxcity/simulator/common/geometry.py +98 -0
- voxcity/simulator/common/raytracing.py +450 -0
- voxcity/simulator/solar/__init__.py +43 -0
- voxcity/simulator/solar/integration.py +336 -0
- voxcity/simulator/solar/kernels.py +62 -0
- voxcity/simulator/solar/radiation.py +648 -0
- voxcity/simulator/solar/temporal.py +434 -0
- voxcity/simulator/view.py +36 -2286
- voxcity/simulator/visibility/__init__.py +29 -0
- voxcity/simulator/visibility/landmark.py +392 -0
- voxcity/simulator/visibility/view.py +508 -0
- voxcity/utils/logging.py +61 -0
- voxcity/utils/orientation.py +51 -0
- voxcity/utils/weather/__init__.py +26 -0
- voxcity/utils/weather/epw.py +146 -0
- voxcity/utils/weather/files.py +36 -0
- voxcity/utils/weather/onebuilding.py +486 -0
- voxcity/visualizer/__init__.py +24 -0
- voxcity/visualizer/builder.py +43 -0
- voxcity/visualizer/grids.py +141 -0
- voxcity/visualizer/maps.py +187 -0
- voxcity/visualizer/palette.py +228 -0
- voxcity/visualizer/renderer.py +928 -0
- {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info}/METADATA +113 -36
- voxcity-0.7.0.dist-info/RECORD +77 -0
- {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info}/WHEEL +1 -1
- voxcity/generator.py +0 -1137
- voxcity/geoprocessor/grid.py +0 -1568
- voxcity/geoprocessor/polygon.py +0 -1344
- voxcity/simulator/solar.py +0 -2329
- voxcity/utils/visualization.py +0 -2660
- voxcity/utils/weather.py +0 -817
- voxcity-0.6.15.dist-info/RECORD +0 -37
- {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info/licenses}/AUTHORS.rst +0 -0
- {voxcity-0.6.15.dist-info → voxcity-0.7.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Tuple, Union
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def process_epw(epw_path: Union[str, Path]) -> Tuple[pd.DataFrame, dict]:
|
|
7
|
+
"""
|
|
8
|
+
Process an EPW file into a pandas DataFrame and header metadata.
|
|
9
|
+
"""
|
|
10
|
+
columns = [
|
|
11
|
+
'Year', 'Month', 'Day', 'Hour', 'Minute',
|
|
12
|
+
'Data Source and Uncertainty Flags',
|
|
13
|
+
'Dry Bulb Temperature', 'Dew Point Temperature',
|
|
14
|
+
'Relative Humidity', 'Atmospheric Station Pressure',
|
|
15
|
+
'Extraterrestrial Horizontal Radiation',
|
|
16
|
+
'Extraterrestrial Direct Normal Radiation',
|
|
17
|
+
'Horizontal Infrared Radiation Intensity',
|
|
18
|
+
'Global Horizontal Radiation',
|
|
19
|
+
'Direct Normal Radiation', 'Diffuse Horizontal Radiation',
|
|
20
|
+
'Global Horizontal Illuminance',
|
|
21
|
+
'Direct Normal Illuminance', 'Diffuse Horizontal Illuminance',
|
|
22
|
+
'Zenith Luminance', 'Wind Direction', 'Wind Speed',
|
|
23
|
+
'Total Sky Cover', 'Opaque Sky Cover', 'Visibility',
|
|
24
|
+
'Ceiling Height', 'Present Weather Observation',
|
|
25
|
+
'Present Weather Codes', 'Precipitable Water',
|
|
26
|
+
'Aerosol Optical Depth', 'Snow Depth',
|
|
27
|
+
'Days Since Last Snowfall', 'Albedo',
|
|
28
|
+
'Liquid Precipitation Depth', 'Liquid Precipitation Quantity'
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
with open(epw_path, 'r') as f:
|
|
32
|
+
lines = f.readlines()
|
|
33
|
+
|
|
34
|
+
headers = {
|
|
35
|
+
'LOCATION': lines[0].strip(),
|
|
36
|
+
'DESIGN_CONDITIONS': lines[1].strip(),
|
|
37
|
+
'TYPICAL_EXTREME_PERIODS': lines[2].strip(),
|
|
38
|
+
'GROUND_TEMPERATURES': lines[3].strip(),
|
|
39
|
+
'HOLIDAYS_DAYLIGHT_SAVINGS': lines[4].strip(),
|
|
40
|
+
'COMMENTS_1': lines[5].strip(),
|
|
41
|
+
'COMMENTS_2': lines[6].strip(),
|
|
42
|
+
'DATA_PERIODS': lines[7].strip()
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
location = headers['LOCATION'].split(',')
|
|
46
|
+
if len(location) >= 10:
|
|
47
|
+
headers['LOCATION'] = {
|
|
48
|
+
'City': location[1].strip(),
|
|
49
|
+
'State': location[2].strip(),
|
|
50
|
+
'Country': location[3].strip(),
|
|
51
|
+
'Data Source': location[4].strip(),
|
|
52
|
+
'WMO': location[5].strip(),
|
|
53
|
+
'Latitude': float(location[6]),
|
|
54
|
+
'Longitude': float(location[7]),
|
|
55
|
+
'Time Zone': float(location[8]),
|
|
56
|
+
'Elevation': float(location[9])
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
data = [line.strip().split(',') for line in lines[8:]]
|
|
60
|
+
df = pd.DataFrame(data, columns=columns)
|
|
61
|
+
|
|
62
|
+
numeric_columns = [
|
|
63
|
+
'Year', 'Month', 'Day', 'Hour', 'Minute',
|
|
64
|
+
'Dry Bulb Temperature', 'Dew Point Temperature',
|
|
65
|
+
'Relative Humidity', 'Atmospheric Station Pressure',
|
|
66
|
+
'Extraterrestrial Horizontal Radiation',
|
|
67
|
+
'Extraterrestrial Direct Normal Radiation',
|
|
68
|
+
'Horizontal Infrared Radiation Intensity',
|
|
69
|
+
'Global Horizontal Radiation',
|
|
70
|
+
'Direct Normal Radiation', 'Diffuse Horizontal Radiation',
|
|
71
|
+
'Global Horizontal Illuminance',
|
|
72
|
+
'Direct Normal Illuminance', 'Diffuse Horizontal Illuminance',
|
|
73
|
+
'Zenith Luminance', 'Wind Direction', 'Wind Speed',
|
|
74
|
+
'Total Sky Cover', 'Opaque Sky Cover', 'Visibility',
|
|
75
|
+
'Ceiling Height', 'Precipitable Water',
|
|
76
|
+
'Aerosol Optical Depth', 'Snow Depth',
|
|
77
|
+
'Days Since Last Snowfall', 'Albedo',
|
|
78
|
+
'Liquid Precipitation Depth', 'Liquid Precipitation Quantity'
|
|
79
|
+
]
|
|
80
|
+
for col in numeric_columns:
|
|
81
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
82
|
+
|
|
83
|
+
df['datetime'] = pd.to_datetime({
|
|
84
|
+
'year': df['Year'],
|
|
85
|
+
'month': df['Month'],
|
|
86
|
+
'day': df['Day'],
|
|
87
|
+
'hour': df['Hour'] - 1,
|
|
88
|
+
'minute': df['Minute']
|
|
89
|
+
})
|
|
90
|
+
df.set_index('datetime', inplace=True)
|
|
91
|
+
return df, headers
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def read_epw_for_solar_simulation(epw_file_path):
|
|
95
|
+
"""
|
|
96
|
+
Read EPW file specifically for solar simulation purposes.
|
|
97
|
+
Returns (df[DNI,DHI], lon, lat, tz, elevation_m).
|
|
98
|
+
"""
|
|
99
|
+
epw_path_obj = Path(epw_file_path)
|
|
100
|
+
if not epw_path_obj.exists() or not epw_path_obj.is_file():
|
|
101
|
+
raise FileNotFoundError(f"EPW file not found: {epw_file_path}")
|
|
102
|
+
|
|
103
|
+
with open(epw_path_obj, 'r', encoding='utf-8') as f:
|
|
104
|
+
lines = f.readlines()
|
|
105
|
+
|
|
106
|
+
location_line = None
|
|
107
|
+
for line in lines:
|
|
108
|
+
if line.startswith("LOCATION"):
|
|
109
|
+
location_line = line.strip().split(',')
|
|
110
|
+
break
|
|
111
|
+
if location_line is None:
|
|
112
|
+
raise ValueError("Could not find LOCATION line in EPW file.")
|
|
113
|
+
|
|
114
|
+
lat = float(location_line[6])
|
|
115
|
+
lon = float(location_line[7])
|
|
116
|
+
tz = float(location_line[8])
|
|
117
|
+
elevation_m = float(location_line[9])
|
|
118
|
+
|
|
119
|
+
data_start_index = None
|
|
120
|
+
for i, line in enumerate(lines):
|
|
121
|
+
vals = line.strip().split(',')
|
|
122
|
+
if i >= 8 and len(vals) > 30:
|
|
123
|
+
data_start_index = i
|
|
124
|
+
break
|
|
125
|
+
if data_start_index is None:
|
|
126
|
+
raise ValueError("Could not find start of weather data lines in EPW file.")
|
|
127
|
+
|
|
128
|
+
data = []
|
|
129
|
+
for l in lines[data_start_index:]:
|
|
130
|
+
vals = l.strip().split(',')
|
|
131
|
+
if len(vals) < 15:
|
|
132
|
+
continue
|
|
133
|
+
year = int(vals[0])
|
|
134
|
+
month = int(vals[1])
|
|
135
|
+
day = int(vals[2])
|
|
136
|
+
hour = int(vals[3]) - 1
|
|
137
|
+
dni = float(vals[14])
|
|
138
|
+
dhi = float(vals[15])
|
|
139
|
+
timestamp = pd.Timestamp(year, month, day, hour)
|
|
140
|
+
data.append([timestamp, dni, dhi])
|
|
141
|
+
|
|
142
|
+
df = pd.DataFrame(data, columns=['time', 'DNI', 'DHI']).set_index('time')
|
|
143
|
+
df = df.sort_index()
|
|
144
|
+
return df, lon, lat, tz, elevation_m
|
|
145
|
+
|
|
146
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import os
|
|
3
|
+
import zipfile
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def safe_rename(src: Path, dst: Path) -> Path:
|
|
7
|
+
"""
|
|
8
|
+
Safely rename a file, handling existing files by adding a number suffix.
|
|
9
|
+
"""
|
|
10
|
+
if not dst.exists():
|
|
11
|
+
src.rename(dst)
|
|
12
|
+
return dst
|
|
13
|
+
base = dst.stem
|
|
14
|
+
ext = dst.suffix
|
|
15
|
+
counter = 1
|
|
16
|
+
while True:
|
|
17
|
+
new_dst = dst.with_name(f"{base}_{counter}{ext}")
|
|
18
|
+
if not new_dst.exists():
|
|
19
|
+
src.rename(new_dst)
|
|
20
|
+
return new_dst
|
|
21
|
+
counter += 1
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def safe_extract(zip_ref: zipfile.ZipFile, filename: str, extract_dir: Path) -> Path:
|
|
25
|
+
"""
|
|
26
|
+
Safely extract a file from zip, handling existing files.
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
zip_ref.extract(filename, extract_dir)
|
|
30
|
+
return extract_dir / filename
|
|
31
|
+
except FileExistsError:
|
|
32
|
+
temp_name = f"temp_{os.urandom(4).hex()}_{filename}"
|
|
33
|
+
zip_ref.extract(filename, extract_dir, temp_name)
|
|
34
|
+
return extract_dir / temp_name
|
|
35
|
+
|
|
36
|
+
|
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
from typing import Optional, Dict, List, Tuple, Union
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import requests
|
|
4
|
+
import re
|
|
5
|
+
import xml.etree.ElementTree as ET
|
|
6
|
+
import json
|
|
7
|
+
import zipfile
|
|
8
|
+
|
|
9
|
+
from .files import safe_extract, safe_rename
|
|
10
|
+
from .epw import process_epw
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_nearest_epw_from_climate_onebuilding(longitude: float, latitude: float, output_dir: str = "./", max_distance: Optional[float] = None,
|
|
14
|
+
extract_zip: bool = True, load_data: bool = True, region: Optional[Union[str, List[str]]] = None,
|
|
15
|
+
allow_insecure_ssl: bool = False, allow_http_fallback: bool = False,
|
|
16
|
+
ssl_verify: Union[bool, str] = True) -> Tuple[Optional[str], Optional["pd.DataFrame"], Optional[Dict]]:
|
|
17
|
+
"""
|
|
18
|
+
Download and process EPW weather file from Climate.OneBuilding.Org based on coordinates.
|
|
19
|
+
"""
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
# --- KML sources and region helpers (unchanged from monolith) ---
|
|
24
|
+
KML_SOURCES = {
|
|
25
|
+
"Africa": "https://climate.onebuilding.org/sources/Region1_Africa_TMYx_EPW_Processing_locations.kml",
|
|
26
|
+
"Asia": "https://climate.onebuilding.org/sources/Region2_Asia_TMYx_EPW_Processing_locations.kml",
|
|
27
|
+
"Japan": "https://climate.onebuilding.org/sources/JGMY_EPW_Processing_locations.kml",
|
|
28
|
+
"India": "https://climate.onebuilding.org/sources/ITMY_EPW_Processing_locations.kml",
|
|
29
|
+
"CSWD": "https://climate.onebuilding.org/sources/CSWD_EPW_Processing_locations.kml",
|
|
30
|
+
"CityUHK": "https://climate.onebuilding.org/sources/CityUHK_EPW_Processing_locations.kml",
|
|
31
|
+
"PHIKO": "https://climate.onebuilding.org/sources/PHIKO_EPW_Processing_locations.kml",
|
|
32
|
+
"South_America": "https://climate.onebuilding.org/sources/Region3_South_America_TMYx_EPW_Processing_locations.kml",
|
|
33
|
+
"Argentina": "https://climate.onebuilding.org/sources/ArgTMY_EPW_Processing_locations.kml",
|
|
34
|
+
"INMET_TRY": "https://climate.onebuilding.org/sources/INMET_TRY_EPW_Processing_locations.kml",
|
|
35
|
+
"AMTUes": "https://climate.onebuilding.org/sources/AMTUes_EPW_Processing_locations.kml",
|
|
36
|
+
"BrazFuture": "https://climate.onebuilding.org/sources/BrazFuture_EPW_Processing_locations.kml",
|
|
37
|
+
"Canada": "https://climate.onebuilding.org/sources/Region4_Canada_TMYx_EPW_Processing_locations.kml",
|
|
38
|
+
"USA": "https://climate.onebuilding.org/sources/Region4_USA_TMYx_EPW_Processing_locations.kml",
|
|
39
|
+
"Caribbean": "https://climate.onebuilding.org/sources/Region4_NA_CA_Caribbean_TMYx_EPW_Processing_locations.kml",
|
|
40
|
+
"Southwest_Pacific": "https://climate.onebuilding.org/sources/Region5_Southwest_Pacific_TMYx_EPW_Processing_locations.kml",
|
|
41
|
+
"Europe": "https://climate.onebuilding.org/sources/Region6_Europe_TMYx_EPW_Processing_locations.kml",
|
|
42
|
+
"Antarctica": "https://climate.onebuilding.org/sources/Region7_Antarctica_TMYx_EPW_Processing_locations.kml",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
REGION_DATASET_GROUPS = {
|
|
46
|
+
"Africa": ["Africa"],
|
|
47
|
+
"Asia": ["Asia", "Japan", "India", "CSWD", "CityUHK", "PHIKO"],
|
|
48
|
+
"South_America": ["South_America", "Argentina", "INMET_TRY", "AMTUes", "BrazFuture"],
|
|
49
|
+
"North_and_Central_America": ["North_and_Central_America", "Canada", "USA", "Caribbean"],
|
|
50
|
+
"Southwest_Pacific": ["Southwest_Pacific"],
|
|
51
|
+
"Europe": ["Europe"],
|
|
52
|
+
"Antarctica": ["Antarctica"],
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
REGION_BOUNDS = {
|
|
56
|
+
"Africa": {"lon_min": -25, "lon_max": 80, "lat_min": -55, "lat_max": 45},
|
|
57
|
+
"Asia": {"lon_min": 20, "lon_max": 180, "lat_min": -10, "lat_max": 80},
|
|
58
|
+
"Japan": {"lon_min": 127, "lon_max": 146, "lat_min": 24, "lat_max": 46},
|
|
59
|
+
"India": {"lon_min": 68, "lon_max": 97, "lat_min": 6, "lat_max": 36},
|
|
60
|
+
"South_America": {"lon_min": -92, "lon_max": -20, "lat_min": -60, "lat_max": 15},
|
|
61
|
+
"Argentina": {"lon_min": -75, "lon_max": -53, "lat_min": -55, "lat_max": -22},
|
|
62
|
+
"North_and_Central_America": {"lon_min": -180, "lon_max": 20, "lat_min": -10, "lat_max": 85},
|
|
63
|
+
"Canada": {"lon_min": -141, "lon_max": -52, "lat_min": 42, "lat_max": 83},
|
|
64
|
+
"USA": {"lon_min": -170, "lon_max": -65, "lat_min": 20, "lat_max": 72},
|
|
65
|
+
"Caribbean": {"lon_min": -90, "lon_max": -59, "lat_min": 10, "lat_max": 27},
|
|
66
|
+
"Southwest_Pacific": {"boxes": [
|
|
67
|
+
{"lon_min": 90, "lon_max": 180, "lat_min": -50, "lat_max": 25},
|
|
68
|
+
{"lon_min": -180, "lon_max": -140, "lat_min": -50, "lat_max": 25},
|
|
69
|
+
]},
|
|
70
|
+
"Europe": {"lon_min": -75, "lon_max": 60, "lat_min": 25, "lat_max": 85},
|
|
71
|
+
"Antarctica": {"lon_min": -180, "lon_max": 180, "lat_min": -90, "lat_max": -60}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def detect_regions(lon: float, lat: float) -> List[str]:
|
|
75
|
+
matching_regions = []
|
|
76
|
+
|
|
77
|
+
lon_adjusted = lon
|
|
78
|
+
if lon < -180:
|
|
79
|
+
lon_adjusted = lon + 360
|
|
80
|
+
elif lon > 180:
|
|
81
|
+
lon_adjusted = lon - 360
|
|
82
|
+
|
|
83
|
+
def _in_box(bx: Dict[str, float], lon_v: float, lat_v: float) -> bool:
|
|
84
|
+
return (bx["lon_min"] <= lon_v <= bx["lon_max"] and bx["lat_min"] <= lat_v <= bx["lat_max"])
|
|
85
|
+
|
|
86
|
+
for region_name, bounds in REGION_BOUNDS.items():
|
|
87
|
+
if "boxes" in bounds:
|
|
88
|
+
for bx in bounds["boxes"]:
|
|
89
|
+
if _in_box(bx, lon_adjusted, lat):
|
|
90
|
+
matching_regions.append(region_name)
|
|
91
|
+
break
|
|
92
|
+
else:
|
|
93
|
+
if _in_box(bounds, lon_adjusted, lat):
|
|
94
|
+
matching_regions.append(region_name)
|
|
95
|
+
|
|
96
|
+
if not matching_regions:
|
|
97
|
+
region_distances = []
|
|
98
|
+
def _box_distance(bx: Dict[str, float]) -> float:
|
|
99
|
+
lon_dist = 0
|
|
100
|
+
if lon_adjusted < bx["lon_min"]:
|
|
101
|
+
lon_dist = bx["lon_min"] - lon_adjusted
|
|
102
|
+
elif lon_adjusted > bx["lon_max"]:
|
|
103
|
+
lon_dist = lon_adjusted - bx["lon_max"]
|
|
104
|
+
lat_dist = 0
|
|
105
|
+
if lat < bx["lat_min"]:
|
|
106
|
+
lat_dist = bx["lat_min"] - lat
|
|
107
|
+
elif lat > bx["lat_max"]:
|
|
108
|
+
lat_dist = lat - bx["lat_max"]
|
|
109
|
+
return (lon_dist**2 + lat_dist**2)**0.5
|
|
110
|
+
for region_name, bounds in REGION_BOUNDS.items():
|
|
111
|
+
if "boxes" in bounds:
|
|
112
|
+
d = min(_box_distance(bx) for bx in bounds["boxes"])
|
|
113
|
+
else:
|
|
114
|
+
d = _box_distance(bounds)
|
|
115
|
+
region_distances.append((region_name, d))
|
|
116
|
+
closest_regions = sorted(region_distances, key=lambda x: x[1])[:3]
|
|
117
|
+
matching_regions = [r[0] for r in closest_regions]
|
|
118
|
+
return matching_regions
|
|
119
|
+
|
|
120
|
+
def try_decode(content: bytes) -> str:
|
|
121
|
+
encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
|
|
122
|
+
for encoding in encodings:
|
|
123
|
+
try:
|
|
124
|
+
return content.decode(encoding)
|
|
125
|
+
except UnicodeDecodeError:
|
|
126
|
+
continue
|
|
127
|
+
return content.decode('utf-8', errors='replace')
|
|
128
|
+
|
|
129
|
+
def clean_xml(content: str) -> str:
|
|
130
|
+
content = content.replace('ñ', 'n').replace('Ñ', 'N').replace('ñ', 'n').replace('Ñ', 'N')
|
|
131
|
+
content = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]', '', content)
|
|
132
|
+
return content
|
|
133
|
+
|
|
134
|
+
def parse_coordinates(point_text: str) -> Tuple[float, float, float]:
|
|
135
|
+
try:
|
|
136
|
+
coords = point_text.strip().split(',')
|
|
137
|
+
if len(coords) >= 2:
|
|
138
|
+
lon, lat = map(float, coords[:2])
|
|
139
|
+
elevation = float(coords[2]) if len(coords) > 2 else 0
|
|
140
|
+
return lat, lon, elevation
|
|
141
|
+
except (ValueError, IndexError):
|
|
142
|
+
pass
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
def parse_station_from_description(desc: str, point_coords: Optional[Tuple[float, float, float]] = None) -> Dict:
|
|
146
|
+
if not desc:
|
|
147
|
+
return None
|
|
148
|
+
url_match = re.search(r'URL (https://.*?\.zip)', desc)
|
|
149
|
+
if not url_match:
|
|
150
|
+
return None
|
|
151
|
+
url = url_match.group(1)
|
|
152
|
+
coord_match = re.search(r'([NS]) (\d+)°\s*(\d+\.\d+)'.encode('utf-8').decode('utf-8') + r"'.*?([EW]) (\d+)°\s*(\d+\.\d+)'", desc)
|
|
153
|
+
if coord_match:
|
|
154
|
+
ns, lat_deg, lat_min, ew, lon_deg, lon_min = coord_match.groups()
|
|
155
|
+
lat = float(lat_deg) + float(lat_min)/60
|
|
156
|
+
if ns == 'S':
|
|
157
|
+
lat = -lat
|
|
158
|
+
lon = float(lon_deg) + float(lon_min)/60
|
|
159
|
+
if ew == 'W':
|
|
160
|
+
lon = -lon
|
|
161
|
+
elif point_coords:
|
|
162
|
+
lat, lon, _ = point_coords
|
|
163
|
+
else:
|
|
164
|
+
return None
|
|
165
|
+
def extract_value(pattern: str, default: str = None) -> str:
|
|
166
|
+
match = re.search(pattern, desc)
|
|
167
|
+
return match.group(1) if match else default
|
|
168
|
+
metadata = {
|
|
169
|
+
'url': url,
|
|
170
|
+
'longitude': lon,
|
|
171
|
+
'latitude': lat,
|
|
172
|
+
'elevation': int(extract_value(r'Elevation <b>(-?\d+)</b>', '0')),
|
|
173
|
+
'name': extract_value(r'<b>(.*?)</b>'),
|
|
174
|
+
'wmo': extract_value(r'WMO <b>(\d+)</b>'),
|
|
175
|
+
'climate_zone': extract_value(r'Climate Zone <b>(.*?)</b>'),
|
|
176
|
+
'period': extract_value(r'Period of Record=(\d{4}-\d{4})'),
|
|
177
|
+
'heating_db': extract_value(r'99% Heating DB <b>(.*?)</b>'),
|
|
178
|
+
'cooling_db': extract_value(r'1% Cooling DB <b>(.*?)</b>'),
|
|
179
|
+
'hdd18': extract_value(r'HDD18 <b>(\d+)</b>'),
|
|
180
|
+
'cdd10': extract_value(r'CDD10 <b>(\d+)</b>'),
|
|
181
|
+
'time_zone': extract_value(r'Time Zone {GMT <b>([-+]?\d+\.\d+)</b>')
|
|
182
|
+
}
|
|
183
|
+
return metadata
|
|
184
|
+
|
|
185
|
+
def haversine_distance(lon1: float, lat1: float, lon2: float, lat2: float) -> float:
|
|
186
|
+
from math import radians, sin, cos, sqrt, atan2
|
|
187
|
+
R = 6371
|
|
188
|
+
lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
|
|
189
|
+
dlat = lat2 - lat1
|
|
190
|
+
dlon = lon2 - lon1
|
|
191
|
+
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
|
192
|
+
c = 2 * atan2(sqrt(a), sqrt(1-a))
|
|
193
|
+
return R * c
|
|
194
|
+
|
|
195
|
+
def try_download_station_zip(original_url: str, timeout_s: int = 30) -> Optional[bytes]:
|
|
196
|
+
def candidate_urls(url: str) -> List[str]:
|
|
197
|
+
urls = [url]
|
|
198
|
+
if "/TUR_Turkey/" in url:
|
|
199
|
+
urls.append(url.replace("/TUR_Turkey/", "/TUR_Turkiye/"))
|
|
200
|
+
if "/TUR_Turkiye/" in url:
|
|
201
|
+
urls.append(url.replace("/TUR_Turkiye/", "/TUR_Turkey/"))
|
|
202
|
+
m = re.search(r"(.*_TMYx)(?:\.(\d{4}-\d{4}))?\.zip$", url)
|
|
203
|
+
if m:
|
|
204
|
+
base = m.group(1)
|
|
205
|
+
variants = [
|
|
206
|
+
f"{base}.2009-2023.zip",
|
|
207
|
+
f"{base}.2007-2021.zip",
|
|
208
|
+
f"{base}.zip",
|
|
209
|
+
f"{base}.2004-2018.zip",
|
|
210
|
+
]
|
|
211
|
+
for v in variants:
|
|
212
|
+
if v not in urls:
|
|
213
|
+
urls.append(v)
|
|
214
|
+
extra = []
|
|
215
|
+
for v in variants:
|
|
216
|
+
if "/TUR_Turkey/" in url:
|
|
217
|
+
extra.append(v.replace("/TUR_Turkey/", "/TUR_Turkiye/"))
|
|
218
|
+
if "/TUR_Turkiye/" in url:
|
|
219
|
+
extra.append(v.replace("/TUR_Turkiye/", "/TUR_Turkey/"))
|
|
220
|
+
for v in extra:
|
|
221
|
+
if v not in urls:
|
|
222
|
+
urls.append(v)
|
|
223
|
+
return urls
|
|
224
|
+
|
|
225
|
+
tried = set()
|
|
226
|
+
for u in candidate_urls(original_url):
|
|
227
|
+
if u in tried:
|
|
228
|
+
continue
|
|
229
|
+
tried.add(u)
|
|
230
|
+
try:
|
|
231
|
+
resp = requests.get(u, timeout=timeout_s, verify=ssl_verify)
|
|
232
|
+
resp.raise_for_status()
|
|
233
|
+
return resp.content
|
|
234
|
+
except requests.exceptions.SSLError:
|
|
235
|
+
if allow_insecure_ssl:
|
|
236
|
+
try:
|
|
237
|
+
resp = requests.get(u, timeout=timeout_s, verify=False)
|
|
238
|
+
resp.raise_for_status()
|
|
239
|
+
return resp.content
|
|
240
|
+
except requests.exceptions.RequestException:
|
|
241
|
+
if allow_http_fallback and u.lower().startswith("https://"):
|
|
242
|
+
insecure_url = "http://" + u.split("://", 1)[1]
|
|
243
|
+
try:
|
|
244
|
+
resp = requests.get(insecure_url, timeout=timeout_s)
|
|
245
|
+
resp.raise_for_status()
|
|
246
|
+
return resp.content
|
|
247
|
+
except requests.exceptions.RequestException:
|
|
248
|
+
pass
|
|
249
|
+
continue
|
|
250
|
+
else:
|
|
251
|
+
if allow_http_fallback and u.lower().startswith("https://"):
|
|
252
|
+
insecure_url = "http://" + u.split("://", 1)[1]
|
|
253
|
+
try:
|
|
254
|
+
resp = requests.get(insecure_url, timeout=timeout_s)
|
|
255
|
+
resp.raise_for_status()
|
|
256
|
+
return resp.content
|
|
257
|
+
except requests.exceptions.RequestException:
|
|
258
|
+
pass
|
|
259
|
+
continue
|
|
260
|
+
except requests.exceptions.HTTPError as he:
|
|
261
|
+
if getattr(he.response, "status_code", None) == 404:
|
|
262
|
+
continue
|
|
263
|
+
else:
|
|
264
|
+
raise
|
|
265
|
+
except requests.exceptions.RequestException:
|
|
266
|
+
continue
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
271
|
+
|
|
272
|
+
regions_to_scan = {}
|
|
273
|
+
def _add_selection(selection_name: str, mapping: Dict[str, str], out: Dict[str, str]):
|
|
274
|
+
if selection_name in REGION_DATASET_GROUPS:
|
|
275
|
+
for key in REGION_DATASET_GROUPS[selection_name]:
|
|
276
|
+
if key in KML_SOURCES:
|
|
277
|
+
out[key] = KML_SOURCES[key]
|
|
278
|
+
elif selection_name in KML_SOURCES:
|
|
279
|
+
out[selection_name] = KML_SOURCES[selection_name]
|
|
280
|
+
else:
|
|
281
|
+
valid = sorted(list(REGION_DATASET_GROUPS.keys()) + list(KML_SOURCES.keys()))
|
|
282
|
+
raise ValueError(f"Invalid region/dataset: '{selection_name}'. Valid options include: {', '.join(valid)}")
|
|
283
|
+
|
|
284
|
+
if region is None:
|
|
285
|
+
detected_regions = detect_regions(longitude, latitude)
|
|
286
|
+
if detected_regions:
|
|
287
|
+
print(f"Auto-detected regions: {', '.join(detected_regions)}")
|
|
288
|
+
for r in detected_regions:
|
|
289
|
+
_add_selection(r, KML_SOURCES, regions_to_scan)
|
|
290
|
+
else:
|
|
291
|
+
print("Could not determine region from coordinates. Scanning all regions.")
|
|
292
|
+
regions_to_scan = dict(KML_SOURCES)
|
|
293
|
+
elif isinstance(region, str):
|
|
294
|
+
if region.lower() == "all":
|
|
295
|
+
regions_to_scan = dict(KML_SOURCES)
|
|
296
|
+
else:
|
|
297
|
+
_add_selection(region, KML_SOURCES, regions_to_scan)
|
|
298
|
+
else:
|
|
299
|
+
for r in region:
|
|
300
|
+
_add_selection(r, KML_SOURCES, regions_to_scan)
|
|
301
|
+
|
|
302
|
+
print("Fetching weather station data from Climate.OneBuilding.Org...")
|
|
303
|
+
all_stations = []
|
|
304
|
+
scanned_urls = set()
|
|
305
|
+
for region_name, url in regions_to_scan.items():
|
|
306
|
+
if url in scanned_urls:
|
|
307
|
+
continue
|
|
308
|
+
scanned_urls.add(url)
|
|
309
|
+
print(f"Scanning {region_name}...")
|
|
310
|
+
stations = []
|
|
311
|
+
try:
|
|
312
|
+
try:
|
|
313
|
+
response = requests.get(url, timeout=30, verify=ssl_verify)
|
|
314
|
+
response.raise_for_status()
|
|
315
|
+
except requests.exceptions.SSLError:
|
|
316
|
+
if allow_insecure_ssl:
|
|
317
|
+
try:
|
|
318
|
+
response = requests.get(url, timeout=30, verify=False)
|
|
319
|
+
response.raise_for_status()
|
|
320
|
+
except requests.exceptions.RequestException:
|
|
321
|
+
if allow_http_fallback and url.lower().startswith("https://"):
|
|
322
|
+
insecure_url = "http://" + url.split("://", 1)[1]
|
|
323
|
+
response = requests.get(insecure_url, timeout=30)
|
|
324
|
+
response.raise_for_status()
|
|
325
|
+
else:
|
|
326
|
+
raise
|
|
327
|
+
else:
|
|
328
|
+
if allow_http_fallback and url.lower().startswith("https://"):
|
|
329
|
+
insecure_url = "http://" + url.split("://", 1)[1]
|
|
330
|
+
response = requests.get(insecure_url, timeout=30)
|
|
331
|
+
response.raise_for_status()
|
|
332
|
+
else:
|
|
333
|
+
raise
|
|
334
|
+
content = try_decode(response.content)
|
|
335
|
+
content = clean_xml(content)
|
|
336
|
+
try:
|
|
337
|
+
root = ET.fromstring(content.encode('utf-8'))
|
|
338
|
+
except ET.ParseError as e:
|
|
339
|
+
print(f"Error parsing KML file {url}: {e}")
|
|
340
|
+
root = None
|
|
341
|
+
if root is not None:
|
|
342
|
+
ns = {'kml': 'http://earth.google.com/kml/2.1'}
|
|
343
|
+
for placemark in root.findall('.//kml:Placemark', ns):
|
|
344
|
+
name = placemark.find('kml:name', ns)
|
|
345
|
+
desc = placemark.find('kml:description', ns)
|
|
346
|
+
point = placemark.find('.//kml:Point/kml:coordinates', ns)
|
|
347
|
+
if desc is None or not desc.text or "Data Source" not in desc.text:
|
|
348
|
+
continue
|
|
349
|
+
point_coords = None
|
|
350
|
+
if point is not None and point.text:
|
|
351
|
+
point_coords = parse_coordinates(point.text)
|
|
352
|
+
station_data = parse_station_from_description(desc.text, point_coords)
|
|
353
|
+
if station_data:
|
|
354
|
+
station_data['name'] = name.text if name is not None else "Unknown"
|
|
355
|
+
station_data['kml_source'] = url
|
|
356
|
+
stations.append(station_data)
|
|
357
|
+
except requests.exceptions.RequestException as e:
|
|
358
|
+
print(f"Error accessing KML file {url}: {e}")
|
|
359
|
+
except Exception as e:
|
|
360
|
+
print(f"Error processing KML file {url}: {e}")
|
|
361
|
+
|
|
362
|
+
all_stations.extend(stations)
|
|
363
|
+
print(f"Found {len(stations)} stations in {region_name}")
|
|
364
|
+
|
|
365
|
+
print(f"\nTotal stations found: {len(all_stations)}")
|
|
366
|
+
if not all_stations:
|
|
367
|
+
if not (isinstance(region, str) and region.lower() == "all"):
|
|
368
|
+
print("No stations found from detected/selected regions. Falling back to global scan...")
|
|
369
|
+
regions_to_scan = dict(KML_SOURCES)
|
|
370
|
+
all_stations = []
|
|
371
|
+
scanned_urls = set()
|
|
372
|
+
for region_name, url in regions_to_scan.items():
|
|
373
|
+
if url in scanned_urls:
|
|
374
|
+
continue
|
|
375
|
+
scanned_urls.add(url)
|
|
376
|
+
print(f"Scanning {region_name}...")
|
|
377
|
+
# re-use logic above
|
|
378
|
+
try:
|
|
379
|
+
response = requests.get(url, timeout=30, verify=ssl_verify)
|
|
380
|
+
response.raise_for_status()
|
|
381
|
+
content = try_decode(response.content)
|
|
382
|
+
content = clean_xml(content)
|
|
383
|
+
root = ET.fromstring(content.encode('utf-8'))
|
|
384
|
+
ns = {'kml': 'http://earth.google.com/kml/2.1'}
|
|
385
|
+
for placemark in root.findall('.//kml:Placemark', ns):
|
|
386
|
+
name = placemark.find('kml:name', ns)
|
|
387
|
+
desc = placemark.find('kml:description', ns)
|
|
388
|
+
point = placemark.find('.//kml:Point/kml:coordinates', ns)
|
|
389
|
+
if desc is None or not desc.text or "Data Source" not in desc.text:
|
|
390
|
+
continue
|
|
391
|
+
point_coords = None
|
|
392
|
+
if point is not None and point.text:
|
|
393
|
+
point_coords = parse_coordinates(point.text)
|
|
394
|
+
station_data = parse_station_from_description(desc.text, point_coords)
|
|
395
|
+
if station_data:
|
|
396
|
+
station_data['name'] = name.text if name is not None else "Unknown"
|
|
397
|
+
station_data['kml_source'] = url
|
|
398
|
+
all_stations.append(station_data)
|
|
399
|
+
print(f"Found {len(all_stations)} stations in {region_name}")
|
|
400
|
+
except Exception:
|
|
401
|
+
pass
|
|
402
|
+
print(f"\nTotal stations found after global scan: {len(all_stations)}")
|
|
403
|
+
if not all_stations:
|
|
404
|
+
raise ValueError("No weather stations found")
|
|
405
|
+
|
|
406
|
+
stations_with_distances = [
|
|
407
|
+
(station, haversine_distance(longitude, latitude, station['longitude'], station['latitude']))
|
|
408
|
+
for station in all_stations
|
|
409
|
+
]
|
|
410
|
+
if max_distance is not None:
|
|
411
|
+
close_stations = [
|
|
412
|
+
(station, distance) for station, distance in stations_with_distances if distance <= max_distance
|
|
413
|
+
]
|
|
414
|
+
if not close_stations:
|
|
415
|
+
closest_station, min_distance = min(stations_with_distances, key=lambda x: x[1])
|
|
416
|
+
print(f"\nNo stations found within {max_distance} km. Closest station is {min_distance:.1f} km away.")
|
|
417
|
+
print("Using closest available station.")
|
|
418
|
+
stations_with_distances = [(closest_station, min_distance)]
|
|
419
|
+
else:
|
|
420
|
+
stations_with_distances = close_stations
|
|
421
|
+
|
|
422
|
+
nearest_station, distance = min(stations_with_distances, key=lambda x: x[1])
|
|
423
|
+
print(f"\nDownloading EPW file for {nearest_station['name']}...")
|
|
424
|
+
archive_bytes = try_download_station_zip(nearest_station['url'], timeout_s=30)
|
|
425
|
+
if archive_bytes is None:
|
|
426
|
+
raise ValueError(f"Failed to download EPW archive from station URL and fallbacks: {nearest_station['url']}")
|
|
427
|
+
|
|
428
|
+
temp_dir = Path(output_dir) / "temp"
|
|
429
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
430
|
+
zip_file = temp_dir / "weather_data.zip"
|
|
431
|
+
with open(zip_file, 'wb') as f:
|
|
432
|
+
f.write(archive_bytes)
|
|
433
|
+
|
|
434
|
+
final_epw = None
|
|
435
|
+
try:
|
|
436
|
+
if extract_zip:
|
|
437
|
+
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
|
|
438
|
+
epw_files = [f for f in zip_ref.namelist() if f.lower().endswith('.epw')]
|
|
439
|
+
if not epw_files:
|
|
440
|
+
raise ValueError("No EPW file found in the downloaded archive")
|
|
441
|
+
epw_filename = epw_files[0]
|
|
442
|
+
extracted_epw = safe_extract(zip_ref, epw_filename, temp_dir)
|
|
443
|
+
final_epw = Path(output_dir) / f"{nearest_station['name'].replace(' ', '_').replace(',', '').lower()}.epw"
|
|
444
|
+
final_epw = safe_rename(extracted_epw, final_epw)
|
|
445
|
+
finally:
|
|
446
|
+
try:
|
|
447
|
+
if zip_file.exists():
|
|
448
|
+
zip_file.unlink()
|
|
449
|
+
if temp_dir.exists() and not any(temp_dir.iterdir()):
|
|
450
|
+
temp_dir.rmdir()
|
|
451
|
+
except Exception as e:
|
|
452
|
+
print(f"Warning: Could not clean up temporary files: {e}")
|
|
453
|
+
|
|
454
|
+
if final_epw is None:
|
|
455
|
+
raise ValueError("Failed to extract EPW file")
|
|
456
|
+
|
|
457
|
+
metadata_file = final_epw.with_suffix('.json')
|
|
458
|
+
with open(metadata_file, 'w') as f:
|
|
459
|
+
json.dump(nearest_station, f, indent=2)
|
|
460
|
+
|
|
461
|
+
print(f"\nDownloaded EPW file for {nearest_station['name']}")
|
|
462
|
+
print(f"Distance: {distance:.2f} km")
|
|
463
|
+
print(f"Station coordinates: {nearest_station['longitude']}, {nearest_station['latitude']}")
|
|
464
|
+
if nearest_station.get('wmo'):
|
|
465
|
+
print(f"WMO: {nearest_station['wmo']}")
|
|
466
|
+
if nearest_station.get('climate_zone'):
|
|
467
|
+
print(f"Climate zone: {nearest_station['climate_zone']}")
|
|
468
|
+
if nearest_station.get('period'):
|
|
469
|
+
print(f"Data period: {nearest_station['period']}")
|
|
470
|
+
print(f"Files saved:")
|
|
471
|
+
print(f"- EPW: {final_epw}")
|
|
472
|
+
print(f"- Metadata: {metadata_file}")
|
|
473
|
+
|
|
474
|
+
df = None
|
|
475
|
+
headers = None
|
|
476
|
+
if load_data:
|
|
477
|
+
print("\nLoading EPW data...")
|
|
478
|
+
df, headers = process_epw(final_epw)
|
|
479
|
+
print(f"Loaded {len(df)} hourly records")
|
|
480
|
+
|
|
481
|
+
return str(final_epw), df, headers
|
|
482
|
+
except Exception as e:
|
|
483
|
+
print(f"Error processing data: {e}")
|
|
484
|
+
return None, None, None
|
|
485
|
+
|
|
486
|
+
|