voxcity 0.6.26__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. voxcity/__init__.py +10 -4
  2. voxcity/downloader/__init__.py +2 -1
  3. voxcity/downloader/gba.py +210 -0
  4. voxcity/downloader/gee.py +5 -1
  5. voxcity/downloader/mbfp.py +1 -1
  6. voxcity/downloader/oemj.py +80 -8
  7. voxcity/downloader/utils.py +73 -73
  8. voxcity/errors.py +30 -0
  9. voxcity/exporter/__init__.py +9 -1
  10. voxcity/exporter/cityles.py +129 -34
  11. voxcity/exporter/envimet.py +51 -26
  12. voxcity/exporter/magicavoxel.py +42 -5
  13. voxcity/exporter/netcdf.py +27 -0
  14. voxcity/exporter/obj.py +103 -28
  15. voxcity/generator/__init__.py +47 -0
  16. voxcity/generator/api.py +721 -0
  17. voxcity/generator/grids.py +381 -0
  18. voxcity/generator/io.py +94 -0
  19. voxcity/generator/pipeline.py +282 -0
  20. voxcity/generator/update.py +429 -0
  21. voxcity/generator/voxelizer.py +392 -0
  22. voxcity/geoprocessor/__init__.py +75 -6
  23. voxcity/geoprocessor/conversion.py +153 -0
  24. voxcity/geoprocessor/draw.py +1488 -1169
  25. voxcity/geoprocessor/heights.py +199 -0
  26. voxcity/geoprocessor/io.py +101 -0
  27. voxcity/geoprocessor/merge_utils.py +91 -0
  28. voxcity/geoprocessor/mesh.py +26 -10
  29. voxcity/geoprocessor/network.py +35 -6
  30. voxcity/geoprocessor/overlap.py +84 -0
  31. voxcity/geoprocessor/raster/__init__.py +82 -0
  32. voxcity/geoprocessor/raster/buildings.py +435 -0
  33. voxcity/geoprocessor/raster/canopy.py +258 -0
  34. voxcity/geoprocessor/raster/core.py +150 -0
  35. voxcity/geoprocessor/raster/export.py +93 -0
  36. voxcity/geoprocessor/raster/landcover.py +159 -0
  37. voxcity/geoprocessor/raster/raster.py +110 -0
  38. voxcity/geoprocessor/selection.py +85 -0
  39. voxcity/geoprocessor/utils.py +824 -820
  40. voxcity/models.py +113 -0
  41. voxcity/simulator/common/__init__.py +22 -0
  42. voxcity/simulator/common/geometry.py +98 -0
  43. voxcity/simulator/common/raytracing.py +450 -0
  44. voxcity/simulator/solar/__init__.py +66 -0
  45. voxcity/simulator/solar/integration.py +336 -0
  46. voxcity/simulator/solar/kernels.py +62 -0
  47. voxcity/simulator/solar/radiation.py +648 -0
  48. voxcity/simulator/solar/sky.py +668 -0
  49. voxcity/simulator/solar/temporal.py +792 -0
  50. voxcity/simulator/view.py +36 -2286
  51. voxcity/simulator/visibility/__init__.py +29 -0
  52. voxcity/simulator/visibility/landmark.py +392 -0
  53. voxcity/simulator/visibility/view.py +508 -0
  54. voxcity/utils/__init__.py +11 -0
  55. voxcity/utils/classes.py +194 -0
  56. voxcity/utils/lc.py +80 -39
  57. voxcity/utils/logging.py +61 -0
  58. voxcity/utils/orientation.py +51 -0
  59. voxcity/utils/shape.py +230 -0
  60. voxcity/utils/weather/__init__.py +26 -0
  61. voxcity/utils/weather/epw.py +146 -0
  62. voxcity/utils/weather/files.py +36 -0
  63. voxcity/utils/weather/onebuilding.py +486 -0
  64. voxcity/visualizer/__init__.py +24 -0
  65. voxcity/visualizer/builder.py +43 -0
  66. voxcity/visualizer/grids.py +141 -0
  67. voxcity/visualizer/maps.py +187 -0
  68. voxcity/visualizer/palette.py +228 -0
  69. voxcity/visualizer/renderer.py +1145 -0
  70. {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/METADATA +162 -48
  71. voxcity-1.0.2.dist-info/RECORD +81 -0
  72. voxcity/generator.py +0 -1302
  73. voxcity/geoprocessor/grid.py +0 -1739
  74. voxcity/geoprocessor/polygon.py +0 -1344
  75. voxcity/simulator/solar.py +0 -2339
  76. voxcity/utils/visualization.py +0 -2849
  77. voxcity/utils/weather.py +0 -1038
  78. voxcity-0.6.26.dist-info/RECORD +0 -38
  79. {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/WHEEL +0 -0
  80. {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/licenses/AUTHORS.rst +0 -0
  81. {voxcity-0.6.26.dist-info → voxcity-1.0.2.dist-info}/licenses/LICENSE +0 -0
voxcity/utils/weather.py DELETED
@@ -1,1038 +0,0 @@
1
- """
2
- Weather data utilities for VoxelCity.
3
-
4
- This module provides functionality to download and process Energyplus Weather (EPW) files
5
- from Climate.OneBuilding.Org based on geographical coordinates. It includes utilities for:
6
-
7
- - Automatically finding the nearest weather station to given coordinates
8
- - Downloading EPW files from various global regions
9
- - Processing EPW files into pandas DataFrames for analysis
10
- - Extracting solar radiation data for solar simulations
11
-
12
- The main function get_nearest_epw_from_climate_onebuilding() provides a comprehensive
13
- solution for obtaining weather data for any global location by automatically detecting
14
- the appropriate region and finding the closest available weather station.
15
- """
16
-
17
- import requests
18
- import xml.etree.ElementTree as ET
19
- import re
20
- from math import radians, sin, cos, sqrt, atan2
21
- from pathlib import Path
22
- from typing import Optional, Dict, List, Tuple, Union
23
- import json
24
- import zipfile
25
- import pandas as pd
26
- import io
27
- import os
28
- import numpy as np
29
- from datetime import datetime
30
-
31
- # =============================================================================
32
- # FILE HANDLING UTILITIES
33
- # =============================================================================
34
-
35
- def safe_rename(src: Path, dst: Path) -> Path:
36
- """
37
- Safely rename a file, handling existing files by adding a number suffix.
38
-
39
- This function prevents file conflicts by automatically generating unique filenames
40
- when the target destination already exists. It appends incremental numbers to
41
- the base filename until a unique name is found.
42
-
43
- Args:
44
- src: Source file path
45
- dst: Destination file path
46
-
47
- Returns:
48
- Path: Final destination path used
49
- """
50
- # If destination doesn't exist, simple rename
51
- if not dst.exists():
52
- src.rename(dst)
53
- return dst
54
-
55
- # If file exists, add number suffix
56
- base = dst.stem
57
- ext = dst.suffix
58
- counter = 1
59
- # Keep incrementing counter until we find a name that doesn't exist
60
- while True:
61
- new_dst = dst.with_name(f"{base}_{counter}{ext}")
62
- if not new_dst.exists():
63
- src.rename(new_dst)
64
- return new_dst
65
- counter += 1
66
-
67
- def safe_extract(zip_ref: zipfile.ZipFile, filename: str, extract_dir: Path) -> Path:
68
- """
69
- Safely extract a file from zip, handling existing files.
70
-
71
- This function handles the case where a file with the same name already exists
72
- in the extraction directory by using a temporary filename with random suffix.
73
-
74
- Args:
75
- zip_ref: Open ZipFile reference
76
- filename: Name of file to extract
77
- extract_dir: Directory to extract to
78
-
79
- Returns:
80
- Path: Path to extracted file
81
- """
82
- try:
83
- zip_ref.extract(filename, extract_dir)
84
- return extract_dir / filename
85
- except FileExistsError:
86
- # If file exists, extract to temporary name and return path
87
- temp_name = f"temp_{os.urandom(4).hex()}_{filename}"
88
- zip_ref.extract(filename, extract_dir, temp_name)
89
- return extract_dir / temp_name
90
-
91
- # =============================================================================
92
- # EPW FILE PROCESSING
93
- # =============================================================================
94
-
95
- def process_epw(epw_path: Union[str, Path]) -> Tuple[pd.DataFrame, Dict]:
96
- """
97
- Process an EPW file into a pandas DataFrame.
98
-
99
- EPW (EnergyPlus Weather) files contain standardized weather data in a specific format.
100
- The first 8 lines contain metadata, followed by 8760 lines of hourly weather data
101
- for a typical meteorological year.
102
-
103
- Args:
104
- epw_path: Path to the EPW file
105
-
106
- Returns:
107
- Tuple containing:
108
- - DataFrame with hourly weather data indexed by datetime
109
- - Dictionary with EPW header metadata including location information
110
- """
111
- # EPW column names (these are standardized across all EPW files)
112
- columns = [
113
- 'Year', 'Month', 'Day', 'Hour', 'Minute',
114
- 'Data Source and Uncertainty Flags',
115
- 'Dry Bulb Temperature', 'Dew Point Temperature',
116
- 'Relative Humidity', 'Atmospheric Station Pressure',
117
- 'Extraterrestrial Horizontal Radiation',
118
- 'Extraterrestrial Direct Normal Radiation',
119
- 'Horizontal Infrared Radiation Intensity',
120
- 'Global Horizontal Radiation',
121
- 'Direct Normal Radiation', 'Diffuse Horizontal Radiation',
122
- 'Global Horizontal Illuminance',
123
- 'Direct Normal Illuminance', 'Diffuse Horizontal Illuminance',
124
- 'Zenith Luminance', 'Wind Direction', 'Wind Speed',
125
- 'Total Sky Cover', 'Opaque Sky Cover', 'Visibility',
126
- 'Ceiling Height', 'Present Weather Observation',
127
- 'Present Weather Codes', 'Precipitable Water',
128
- 'Aerosol Optical Depth', 'Snow Depth',
129
- 'Days Since Last Snowfall', 'Albedo',
130
- 'Liquid Precipitation Depth', 'Liquid Precipitation Quantity'
131
- ]
132
-
133
- # Read EPW file - EPW files are always in comma-separated format
134
- with open(epw_path, 'r') as f:
135
- lines = f.readlines()
136
-
137
- # Extract header metadata (first 8 lines contain standardized metadata)
138
- headers = {
139
- 'LOCATION': lines[0].strip(),
140
- 'DESIGN_CONDITIONS': lines[1].strip(),
141
- 'TYPICAL_EXTREME_PERIODS': lines[2].strip(),
142
- 'GROUND_TEMPERATURES': lines[3].strip(),
143
- 'HOLIDAYS_DAYLIGHT_SAVINGS': lines[4].strip(),
144
- 'COMMENTS_1': lines[5].strip(),
145
- 'COMMENTS_2': lines[6].strip(),
146
- 'DATA_PERIODS': lines[7].strip()
147
- }
148
-
149
- # Parse location data from first header line
150
- # Format: LOCATION,City,State,Country,Source,WMO,Latitude,Longitude,TimeZone,Elevation
151
- location = headers['LOCATION'].split(',')
152
- if len(location) >= 10:
153
- headers['LOCATION'] = {
154
- 'City': location[1].strip(),
155
- 'State': location[2].strip(),
156
- 'Country': location[3].strip(),
157
- 'Data Source': location[4].strip(),
158
- 'WMO': location[5].strip(),
159
- 'Latitude': float(location[6]),
160
- 'Longitude': float(location[7]),
161
- 'Time Zone': float(location[8]),
162
- 'Elevation': float(location[9])
163
- }
164
-
165
- # Create DataFrame from weather data (skipping 8 header lines)
166
- data = [line.strip().split(',') for line in lines[8:]]
167
- df = pd.DataFrame(data, columns=columns)
168
-
169
- # Convert numeric columns to appropriate data types
170
- # All weather parameters should be numeric except uncertainty flags and weather codes
171
- numeric_columns = [
172
- 'Year', 'Month', 'Day', 'Hour', 'Minute',
173
- 'Dry Bulb Temperature', 'Dew Point Temperature',
174
- 'Relative Humidity', 'Atmospheric Station Pressure',
175
- 'Extraterrestrial Horizontal Radiation',
176
- 'Extraterrestrial Direct Normal Radiation',
177
- 'Horizontal Infrared Radiation Intensity',
178
- 'Global Horizontal Radiation',
179
- 'Direct Normal Radiation', 'Diffuse Horizontal Radiation',
180
- 'Global Horizontal Illuminance',
181
- 'Direct Normal Illuminance', 'Diffuse Horizontal Illuminance',
182
- 'Zenith Luminance', 'Wind Direction', 'Wind Speed',
183
- 'Total Sky Cover', 'Opaque Sky Cover', 'Visibility',
184
- 'Ceiling Height', 'Precipitable Water',
185
- 'Aerosol Optical Depth', 'Snow Depth',
186
- 'Days Since Last Snowfall', 'Albedo',
187
- 'Liquid Precipitation Depth', 'Liquid Precipitation Quantity'
188
- ]
189
-
190
- # Convert to numeric, handling any parsing errors gracefully
191
- for col in numeric_columns:
192
- df[col] = pd.to_numeric(df[col], errors='coerce')
193
-
194
- # Create datetime index for time series analysis
195
- # EPW hours are 1-24, but pandas expects 0-23 for proper datetime handling
196
- df['datetime'] = pd.to_datetime({
197
- 'year': df['Year'],
198
- 'month': df['Month'],
199
- 'day': df['Day'],
200
- 'hour': df['Hour'] - 1, # EPW hours are 1-24, pandas expects 0-23
201
- 'minute': df['Minute']
202
- })
203
- df.set_index('datetime', inplace=True)
204
-
205
- return df, headers
206
-
207
- # =============================================================================
208
- # MAIN WEATHER DATA DOWNLOAD FUNCTION
209
- # =============================================================================
210
-
211
- def get_nearest_epw_from_climate_onebuilding(longitude: float, latitude: float, output_dir: str = "./", max_distance: Optional[float] = None,
212
- extract_zip: bool = True, load_data: bool = True, region: Optional[Union[str, List[str]]] = None,
213
- allow_insecure_ssl: bool = False, allow_http_fallback: bool = False,
214
- ssl_verify: Union[bool, str] = True) -> Tuple[Optional[str], Optional[pd.DataFrame], Optional[Dict]]:
215
- """
216
- Download and process EPW weather file from Climate.OneBuilding.Org based on coordinates.
217
-
218
- This function automatically finds and downloads the nearest available weather station
219
- data from Climate.OneBuilding.Org's global database. It supports region-based searching
220
- for improved performance and can automatically detect the appropriate region based on
221
- coordinates.
222
-
223
- The function performs the following steps:
224
- 1. Determines which regional KML files to scan based on coordinates or user input
225
- 2. Downloads and parses KML files to extract weather station metadata
226
- 3. Calculates distances to find the nearest station
227
- 4. Downloads the EPW file from the nearest station
228
- 5. Optionally processes the EPW data into a pandas DataFrame
229
-
230
- Args:
231
- longitude (float): Longitude of the location (-180 to 180)
232
- latitude (float): Latitude of the location (-90 to 90)
233
- output_dir (str): Directory to save the EPW file (defaults to current directory)
234
- max_distance (float, optional): Maximum distance in kilometers to search for stations.
235
- If no stations within this distance, uses closest available.
236
- extract_zip (bool): Whether to extract the ZIP file (default True)
237
- load_data (bool): Whether to load the EPW data into a DataFrame (default True)
238
- region (str or List[str], optional): Specific region(s) or dataset(s) to scan for stations.
239
- Regions: "Africa", "Asia", "South_America",
240
- "North_and_Central_America", "Southwest_Pacific",
241
- "Europe", "Antarctica".
242
- Sub-datasets (can be used alone or auto-included by region):
243
- "Japan", "India", "CSWD", "CityUHK", "PHIKO",
244
- "Argentina", "INMET_TRY", "AMTUes", "BrazFuture",
245
- plus legacy "Canada", "USA", "Caribbean" (Region 4).
246
- Use "all" to scan every dataset.
247
- If None, will auto-detect region based on coordinates.
248
- allow_insecure_ssl (bool): If True, on SSL errors retry with certificate verification disabled.
249
- allow_http_fallback (bool): If True, on SSL/network errors, also try HTTP (insecure) fallback.
250
- ssl_verify (bool|str): Passed to requests as 'verify' parameter for HTTPS; can be False or CA bundle path.
251
-
252
- Returns:
253
- Tuple containing:
254
- - Path to the EPW file (or None if download fails)
255
- - DataFrame with hourly weather data (if load_data=True, else None)
256
- - Dictionary with EPW header metadata (if load_data=True, else None)
257
-
258
- Raises:
259
- ValueError: If invalid region specified or no weather stations found
260
- requests.exceptions.RequestException: If network requests fail
261
- """
262
-
263
- # Regional KML sources from Climate.OneBuilding.Org (2024+ TMYx structure)
264
- # Each WMO region maintains a primary KML in /sources with the naming pattern:
265
- # Region{N}_{Name}_TMYx_EPW_Processing_locations.kml
266
- # Keep sub-region keys for backward compatibility (mapping to the Region KML where applicable)
267
- KML_SOURCES = {
268
- # WMO Region 1
269
- "Africa": "https://climate.onebuilding.org/sources/Region1_Africa_TMYx_EPW_Processing_locations.kml",
270
- # WMO Region 2
271
- "Asia": "https://climate.onebuilding.org/sources/Region2_Asia_TMYx_EPW_Processing_locations.kml",
272
- # Subsets/datasets within Asia that still publish dedicated KMLs
273
- "Japan": "https://climate.onebuilding.org/sources/JGMY_EPW_Processing_locations.kml",
274
- "India": "https://climate.onebuilding.org/sources/ITMY_EPW_Processing_locations.kml",
275
- "CSWD": "https://climate.onebuilding.org/sources/CSWD_EPW_Processing_locations.kml",
276
- "CityUHK": "https://climate.onebuilding.org/sources/CityUHK_EPW_Processing_locations.kml",
277
- "PHIKO": "https://climate.onebuilding.org/sources/PHIKO_EPW_Processing_locations.kml",
278
- # WMO Region 3
279
- "South_America": "https://climate.onebuilding.org/sources/Region3_South_America_TMYx_EPW_Processing_locations.kml",
280
- # Historical/legacy dataset for Argentina maintained separately
281
- "Argentina": "https://climate.onebuilding.org/sources/ArgTMY_EPW_Processing_locations.kml",
282
- "INMET_TRY": "https://climate.onebuilding.org/sources/INMET_TRY_EPW_Processing_locations.kml",
283
- "AMTUes": "https://climate.onebuilding.org/sources/AMTUes_EPW_Processing_locations.kml",
284
- "BrazFuture": "https://climate.onebuilding.org/sources/BrazFuture_EPW_Processing_locations.kml",
285
- # WMO Region 4 (use subregion KMLs; umbrella selection expands to these)
286
- # Note: There is no single unified Region 4 KML in /sources as of 2024.
287
- # Use these three subregion KMLs instead.
288
- "Canada": "https://climate.onebuilding.org/sources/Region4_Canada_TMYx_EPW_Processing_locations.kml",
289
- "USA": "https://climate.onebuilding.org/sources/Region4_USA_TMYx_EPW_Processing_locations.kml",
290
- "Caribbean": "https://climate.onebuilding.org/sources/Region4_NA_CA_Caribbean_TMYx_EPW_Processing_locations.kml",
291
- # WMO Region 5
292
- "Southwest_Pacific": "https://climate.onebuilding.org/sources/Region5_Southwest_Pacific_TMYx_EPW_Processing_locations.kml",
293
- # WMO Region 6
294
- "Europe": "https://climate.onebuilding.org/sources/Region6_Europe_TMYx_EPW_Processing_locations.kml",
295
- # WMO Region 7
296
- "Antarctica": "https://climate.onebuilding.org/sources/Region7_Antarctica_TMYx_EPW_Processing_locations.kml",
297
- }
298
-
299
- # Group region selections to include relevant sub-datasets automatically
300
- REGION_DATASET_GROUPS = {
301
- "Africa": ["Africa"],
302
- "Asia": ["Asia", "Japan", "India", "CSWD", "CityUHK", "PHIKO"],
303
- "South_America": ["South_America", "Argentina", "INMET_TRY", "AMTUes", "BrazFuture"],
304
- "North_and_Central_America": ["North_and_Central_America", "Canada", "USA", "Caribbean"],
305
- "Southwest_Pacific": ["Southwest_Pacific"],
306
- "Europe": ["Europe"],
307
- "Antarctica": ["Antarctica"],
308
- }
309
-
310
- # Define approximate geographical boundaries for automatic region detection
311
- # These bounds help determine which regional KML files to scan based on coordinates
312
- REGION_BOUNDS = {
313
- # WMO Region 1 - Africa (includes islands in Indian Ocean and Spanish territories off N. Africa)
314
- "Africa": {"lon_min": -25, "lon_max": 80, "lat_min": -55, "lat_max": 45},
315
- # WMO Region 2 - Asia (includes SE Asia, West Asia, Asian Russia, and BIOT)
316
- "Asia": {"lon_min": 20, "lon_max": 180, "lat_min": -10, "lat_max": 80},
317
- # Subsets
318
- "Japan": {"lon_min": 127, "lon_max": 146, "lat_min": 24, "lat_max": 46},
319
- "India": {"lon_min": 68, "lon_max": 97, "lat_min": 6, "lat_max": 36},
320
- # WMO Region 3 - South America (includes Falklands, South Georgia/Sandwich, Galapagos)
321
- "South_America": {"lon_min": -92, "lon_max": -20, "lat_min": -60, "lat_max": 15},
322
- # Legacy/compatibility subset
323
- "Argentina": {"lon_min": -75, "lon_max": -53, "lat_min": -55, "lat_max": -22},
324
- # WMO Region 4 - North and Central America (includes Greenland and Caribbean)
325
- "North_and_Central_America": {"lon_min": -180, "lon_max": 20, "lat_min": -10, "lat_max": 85},
326
- # Backward-compatible subsets mapped to Region 4 KML
327
- "Canada": {"lon_min": -141, "lon_max": -52, "lat_min": 42, "lat_max": 83},
328
- "USA": {"lon_min": -170, "lon_max": -65, "lat_min": 20, "lat_max": 72},
329
- "Caribbean": {"lon_min": -90, "lon_max": -59, "lat_min": 10, "lat_max": 27},
330
- # WMO Region 5 - Southwest Pacific (covers SE Asia + Pacific islands + Hawaii via antimeridian)
331
- "Southwest_Pacific": {
332
- "boxes": [
333
- {"lon_min": 90, "lon_max": 180, "lat_min": -50, "lat_max": 25},
334
- {"lon_min": -180, "lon_max": -140, "lat_min": -50, "lat_max": 25},
335
- ]
336
- },
337
- # WMO Region 6 - Europe (includes Middle East countries listed and Greenland)
338
- "Europe": {"lon_min": -75, "lon_max": 60, "lat_min": 25, "lat_max": 85},
339
- # WMO Region 7 - Antarctica
340
- "Antarctica": {"lon_min": -180, "lon_max": 180, "lat_min": -90, "lat_max": -60}
341
- }
342
-
343
- def detect_regions(lon: float, lat: float) -> List[str]:
344
- """
345
- Detect which region(s) the coordinates belong to.
346
-
347
- Uses the REGION_BOUNDS to determine appropriate regions to search.
348
- If coordinates don't fall within any region, returns the 3 closest regions.
349
-
350
- Args:
351
- lon: Longitude coordinate
352
- lat: Latitude coordinate
353
-
354
- Returns:
355
- List of region names to search
356
- """
357
- matching_regions = []
358
-
359
- # Handle special case of longitude wrap around 180/-180
360
- # Normalize longitude to standard -180 to 180 range
361
- lon_adjusted = lon
362
- if lon < -180:
363
- lon_adjusted = lon + 360
364
- elif lon > 180:
365
- lon_adjusted = lon - 360
366
-
367
- # Helper to test point within a single box
368
- def _in_box(bx: Dict[str, float], lon_v: float, lat_v: float) -> bool:
369
- return (bx["lon_min"] <= lon_v <= bx["lon_max"] and
370
- bx["lat_min"] <= lat_v <= bx["lat_max"])
371
-
372
- # Check if coordinates fall within any region bounds (support multi-box)
373
- for region_name, bounds in REGION_BOUNDS.items():
374
- if "boxes" in bounds:
375
- for bx in bounds["boxes"]:
376
- if _in_box(bx, lon_adjusted, lat):
377
- matching_regions.append(region_name)
378
- break
379
- else:
380
- if _in_box(bounds, lon_adjusted, lat):
381
- matching_regions.append(region_name)
382
-
383
- # If no regions matched, find the closest regions by boundary distance
384
- if not matching_regions:
385
- # Calculate "distance" to each region's boundary (simplified metric)
386
- region_distances = []
387
- for region_name, bounds in REGION_BOUNDS.items():
388
- def _box_distance(bx: Dict[str, float]) -> float:
389
- lon_dist = 0
390
- if lon_adjusted < bx["lon_min"]:
391
- lon_dist = bx["lon_min"] - lon_adjusted
392
- elif lon_adjusted > bx["lon_max"]:
393
- lon_dist = lon_adjusted - bx["lon_max"]
394
- lat_dist = 0
395
- if lat < bx["lat_min"]:
396
- lat_dist = bx["lat_min"] - lat
397
- elif lat > bx["lat_max"]:
398
- lat_dist = lat - bx["lat_max"]
399
- return (lon_dist**2 + lat_dist**2)**0.5
400
-
401
- if "boxes" in bounds:
402
- d = min(_box_distance(bx) for bx in bounds["boxes"])
403
- else:
404
- d = _box_distance(bounds)
405
- region_distances.append((region_name, d))
406
-
407
- # Get 3 closest regions to ensure we find stations
408
- closest_regions = sorted(region_distances, key=lambda x: x[1])[:3]
409
- matching_regions = [r[0] for r in closest_regions]
410
-
411
- return matching_regions
412
-
413
- def try_decode(content: bytes) -> str:
414
- """
415
- Try different encodings to decode content.
416
-
417
- KML files from different regions may use various text encodings.
418
- This function tries common encodings to successfully decode the content.
419
-
420
- Args:
421
- content: Raw bytes content
422
-
423
- Returns:
424
- Decoded string content
425
- """
426
- # Try common encodings in order of preference
427
- encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
428
- for encoding in encodings:
429
- try:
430
- return content.decode(encoding)
431
- except UnicodeDecodeError:
432
- continue
433
-
434
- # If all else fails, try to decode with replacement characters
435
- return content.decode('utf-8', errors='replace')
436
-
437
- def clean_xml(content: str) -> str:
438
- """
439
- Clean XML content of invalid characters.
440
-
441
- Some KML files contain characters that cause XML parsing issues.
442
- This function replaces or removes problematic characters to ensure
443
- successful XML parsing.
444
-
445
- Args:
446
- content: Raw XML content string
447
-
448
- Returns:
449
- Cleaned XML content string
450
- """
451
- # Replace problematic Spanish characters that cause XML parsing issues
452
- content = content.replace('&ntilde;', 'n')
453
- content = content.replace('&Ntilde;', 'N')
454
- content = content.replace('ñ', 'n')
455
- content = content.replace('Ñ', 'N')
456
-
457
- # Remove other invalid XML characters using regex
458
- # Keep only valid XML characters: tab, newline, carriage return, printable ASCII, and extended Latin
459
- content = re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]', '', content)
460
- return content
461
-
462
- def haversine_distance(lon1: float, lat1: float, lon2: float, lat2: float) -> float:
463
- """
464
- Calculate the great circle distance between two points on Earth.
465
-
466
- Uses the Haversine formula to calculate the shortest distance between
467
- two points on a sphere (Earth) given their latitude and longitude.
468
-
469
- Args:
470
- lon1, lat1: Coordinates of first point
471
- lon2, lat2: Coordinates of second point
472
-
473
- Returns:
474
- Distance in kilometers
475
- """
476
- R = 6371 # Earth's radius in kilometers
477
-
478
- # Convert coordinates to radians
479
- lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
480
- dlat = lat2 - lat1
481
- dlon = lon2 - lon1
482
-
483
- # Haversine formula calculation
484
- a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
485
- c = 2 * atan2(sqrt(a), sqrt(1-a))
486
- return R * c
487
-
488
- def parse_coordinates(point_text: str) -> Tuple[float, float, float]:
489
- """
490
- Parse coordinates from KML Point text.
491
-
492
- KML Point elements contain coordinates in the format "longitude,latitude,elevation".
493
- This function extracts and converts these values to float.
494
-
495
- Args:
496
- point_text: Raw coordinate text from KML Point element
497
-
498
- Returns:
499
- Tuple of (latitude, longitude, elevation) or None if parsing fails
500
- """
501
- try:
502
- coords = point_text.strip().split(',')
503
- if len(coords) >= 2:
504
- lon, lat = map(float, coords[:2])
505
- elevation = float(coords[2]) if len(coords) > 2 else 0
506
- return lat, lon, elevation
507
- except (ValueError, IndexError):
508
- pass
509
- return None
510
-
511
- def parse_station_from_description(desc: str, point_coords: Optional[Tuple[float, float, float]] = None) -> Dict:
512
- """
513
- Parse station metadata from KML description.
514
-
515
- KML description fields contain detailed station information including:
516
- - Download URL for EPW file
517
- - Coordinates in degrees/minutes format
518
- - Station metadata (WMO code, climate zone, etc.)
519
- - Design conditions and climate statistics
520
-
521
- Args:
522
- desc: KML description text containing station metadata
523
- point_coords: Fallback coordinates from KML Point element
524
-
525
- Returns:
526
- Dictionary with parsed station metadata or None if parsing fails
527
- """
528
- if not desc:
529
- return None
530
-
531
- # Extract download URL - this is required for station to be valid
532
- url_match = re.search(r'URL (https://.*?\.zip)', desc)
533
- if not url_match:
534
- return None
535
-
536
- url = url_match.group(1)
537
-
538
- # First try to parse coordinates in degrees/minutes format from description
539
- # Format: N XX°YY.YY' W ZZ°AA.AA'
540
- coord_match = re.search(r'([NS]) (\d+)&deg;\s*(\d+\.\d+)\'.*?([EW]) (\d+)&deg;\s*(\d+\.\d+)\'', desc)
541
-
542
- if coord_match:
543
- # Convert degrees/minutes to decimal degrees
544
- ns, lat_deg, lat_min, ew, lon_deg, lon_min = coord_match.groups()
545
- lat = float(lat_deg) + float(lat_min)/60
546
- if ns == 'S':
547
- lat = -lat
548
- lon = float(lon_deg) + float(lon_min)/60
549
- if ew == 'W':
550
- lon = -lon
551
- elif point_coords:
552
- # Fall back to coordinates from KML Point element
553
- lat, lon, _ = point_coords
554
- else:
555
- # No coordinates available - station is not usable
556
- return None
557
-
558
- # Extract metadata with error handling using helper function
559
- def extract_value(pattern: str, default: str = None) -> str:
560
- """Extract value using regex pattern, return default if not found."""
561
- match = re.search(pattern, desc)
562
- return match.group(1) if match else default
563
-
564
- # Build comprehensive station metadata dictionary
565
- metadata = {
566
- 'url': url,
567
- 'longitude': lon,
568
- 'latitude': lat,
569
- 'elevation': int(extract_value(r'Elevation <b>(-?\d+)</b>', '0')),
570
- 'name': extract_value(r'<b>(.*?)</b>'),
571
- 'wmo': extract_value(r'WMO <b>(\d+)</b>'),
572
- 'climate_zone': extract_value(r'Climate Zone <b>(.*?)</b>'),
573
- 'period': extract_value(r'Period of Record=(\d{4}-\d{4})'),
574
- 'heating_db': extract_value(r'99% Heating DB <b>(.*?)</b>'),
575
- 'cooling_db': extract_value(r'1% Cooling DB <b>(.*?)</b>'),
576
- 'hdd18': extract_value(r'HDD18 <b>(\d+)</b>'),
577
- 'cdd10': extract_value(r'CDD10 <b>(\d+)</b>'),
578
- 'time_zone': extract_value(r'Time Zone {GMT <b>([-+]?\d+\.\d+)</b>')
579
- }
580
-
581
- return metadata
582
-
583
- def try_download_station_zip(original_url: str, timeout_s: int = 30) -> Optional[bytes]:
584
- """
585
- Try downloading station archive; on 404s, attempt smart fallbacks.
586
-
587
- Fallback strategies:
588
- - Country rename: /TUR_Turkey/ -> /TUR_Turkiye/ (per Oct 2024 site update)
589
- - TMYx period variants: .2009-2023.zip, .2007-2021.zip, .zip, .2004-2018.zip
590
-
591
- Args:
592
- original_url: URL extracted from KML
593
- timeout_s: request timeout seconds
594
- Returns:
595
- Bytes of the downloaded zip on success, otherwise None
596
- """
597
- def candidate_urls(url: str) -> List[str]:
598
- urls = []
599
- urls.append(url)
600
- # Country rename variants
601
- if "/TUR_Turkey/" in url:
602
- urls.append(url.replace("/TUR_Turkey/", "/TUR_Turkiye/"))
603
- if "/TUR_Turkiye/" in url:
604
- urls.append(url.replace("/TUR_Turkiye/", "/TUR_Turkey/"))
605
- # TMYx period variants
606
- m = re.search(r"(.*_TMYx)(?:\.(\d{4}-\d{4}))?\.zip$", url)
607
- if m:
608
- base = m.group(1)
609
- suffix = m.group(2)
610
- variants = [
611
- f"{base}.2009-2023.zip",
612
- f"{base}.2007-2021.zip",
613
- f"{base}.zip",
614
- f"{base}.2004-2018.zip",
615
- ]
616
- for v in variants:
617
- if v not in urls:
618
- urls.append(v)
619
- # Also apply country rename to each variant
620
- extra = []
621
- for v in variants:
622
- if "/TUR_Turkey/" in url:
623
- extra.append(v.replace("/TUR_Turkey/", "/TUR_Turkiye/"))
624
- if "/TUR_Turkiye/" in url:
625
- extra.append(v.replace("/TUR_Turkiye/", "/TUR_Turkey/"))
626
- for v in extra:
627
- if v not in urls:
628
- urls.append(v)
629
- return urls
630
-
631
- tried = set()
632
- for u in candidate_urls(original_url):
633
- if u in tried:
634
- continue
635
- tried.add(u)
636
- try:
637
- resp = requests.get(u, timeout=timeout_s, verify=ssl_verify)
638
- resp.raise_for_status()
639
- return resp.content
640
- except requests.exceptions.SSLError:
641
- # Retry with user-controlled insecure SSL
642
- if allow_insecure_ssl:
643
- try:
644
- resp = requests.get(u, timeout=timeout_s, verify=False)
645
- resp.raise_for_status()
646
- return resp.content
647
- except requests.exceptions.RequestException:
648
- if allow_http_fallback and u.lower().startswith("https://"):
649
- insecure_url = "http://" + u.split("://", 1)[1]
650
- try:
651
- resp = requests.get(insecure_url, timeout=timeout_s)
652
- resp.raise_for_status()
653
- return resp.content
654
- except requests.exceptions.RequestException:
655
- pass
656
- continue
657
- else:
658
- if allow_http_fallback and u.lower().startswith("https://"):
659
- insecure_url = "http://" + u.split("://", 1)[1]
660
- try:
661
- resp = requests.get(insecure_url, timeout=timeout_s)
662
- resp.raise_for_status()
663
- return resp.content
664
- except requests.exceptions.RequestException:
665
- pass
666
- continue
667
- except requests.exceptions.HTTPError as he:
668
- # Only continue on 404; raise on other HTTP errors
669
- if getattr(he.response, "status_code", None) == 404:
670
- continue
671
- else:
672
- raise
673
- except requests.exceptions.RequestException:
674
- # On network errors, try next candidate
675
- continue
676
- return None
677
-
678
- def get_stations_from_kml(kml_url: str) -> List[Dict]:
679
- """
680
- Get weather stations from a KML file.
681
-
682
- Downloads and parses a KML file containing weather station information.
683
- Each Placemark in the KML represents a weather station with metadata
684
- in the description field and coordinates in Point elements.
685
-
686
- Args:
687
- kml_url: URL to the KML file
688
-
689
- Returns:
690
- List of dictionaries containing station metadata
691
- """
692
- try:
693
- # Download KML file with timeout (secure first)
694
- try:
695
- response = requests.get(kml_url, timeout=30, verify=ssl_verify)
696
- response.raise_for_status()
697
- except requests.exceptions.SSLError:
698
- if allow_insecure_ssl:
699
- # Retry with certificate verification disabled (last resort)
700
- try:
701
- response = requests.get(kml_url, timeout=30, verify=False)
702
- response.raise_for_status()
703
- except requests.exceptions.RequestException:
704
- # Try HTTP fallback if original was HTTPS and allowed
705
- if allow_http_fallback and kml_url.lower().startswith("https://"):
706
- insecure_url = "http://" + kml_url.split("://", 1)[1]
707
- response = requests.get(insecure_url, timeout=30)
708
- response.raise_for_status()
709
- else:
710
- raise
711
- else:
712
- # Try HTTP fallback only if allowed and original was HTTPS
713
- if allow_http_fallback and kml_url.lower().startswith("https://"):
714
- insecure_url = "http://" + kml_url.split("://", 1)[1]
715
- response = requests.get(insecure_url, timeout=30)
716
- response.raise_for_status()
717
- else:
718
- raise
719
-
720
- # Try to decode content with multiple encodings
721
- content = try_decode(response.content)
722
- content = clean_xml(content)
723
-
724
- # Parse XML content
725
- try:
726
- root = ET.fromstring(content.encode('utf-8'))
727
- except ET.ParseError as e:
728
- print(f"Error parsing KML file {kml_url}: {e}")
729
- return []
730
-
731
- # Define KML namespace for element searching
732
- ns = {'kml': 'http://earth.google.com/kml/2.1'}
733
-
734
- stations = []
735
-
736
- # Find all Placemark elements (each represents a weather station)
737
- for placemark in root.findall('.//kml:Placemark', ns):
738
- name = placemark.find('kml:name', ns)
739
- desc = placemark.find('kml:description', ns)
740
- point = placemark.find('.//kml:Point/kml:coordinates', ns)
741
-
742
- # Skip placemarks without description or that don't contain weather data
743
- if desc is None or not desc.text or "Data Source" not in desc.text:
744
- continue
745
-
746
- # Get coordinates from Point element if available
747
- point_coords = None
748
- if point is not None and point.text:
749
- point_coords = parse_coordinates(point.text)
750
-
751
- # Parse comprehensive station data from description
752
- station_data = parse_station_from_description(desc.text, point_coords)
753
- if station_data:
754
- # Add station name and source information
755
- station_data['name'] = name.text if name is not None else "Unknown"
756
- station_data['kml_source'] = kml_url
757
- stations.append(station_data)
758
-
759
- return stations
760
-
761
- except requests.exceptions.RequestException as e:
762
- print(f"Error accessing KML file {kml_url}: {e}")
763
- return []
764
- except Exception as e:
765
- print(f"Error processing KML file {kml_url}: {e}")
766
- return []
767
-
768
- try:
769
- # Create output directory if it doesn't exist
770
- Path(output_dir).mkdir(parents=True, exist_ok=True)
771
-
772
- # Determine which regions to scan based on user input or auto-detection
773
- regions_to_scan = {}
774
- def _add_selection(selection_name: str, mapping: Dict[str, str], out: Dict[str, str]):
775
- """Expand a region or dataset selection into concrete KML URLs."""
776
- if selection_name in REGION_DATASET_GROUPS:
777
- for key in REGION_DATASET_GROUPS[selection_name]:
778
- if key in KML_SOURCES:
779
- out[key] = KML_SOURCES[key]
780
- elif selection_name in KML_SOURCES:
781
- out[selection_name] = KML_SOURCES[selection_name]
782
- else:
783
- valid = sorted(list(REGION_DATASET_GROUPS.keys()) + list(KML_SOURCES.keys()))
784
- raise ValueError(f"Invalid region/dataset: '{selection_name}'. Valid options include: {', '.join(valid)}")
785
-
786
- if region is None:
787
- # Auto-detect regions based on coordinates
788
- detected_regions = detect_regions(longitude, latitude)
789
-
790
- if detected_regions:
791
- print(f"Auto-detected regions: {', '.join(detected_regions)}")
792
- for r in detected_regions:
793
- _add_selection(r, KML_SOURCES, regions_to_scan)
794
- else:
795
- # Fallback to all regions if detection fails
796
- print("Could not determine region from coordinates. Scanning all regions.")
797
- regions_to_scan = dict(KML_SOURCES)
798
- elif isinstance(region, str):
799
- # Handle string input for region selection
800
- if region.lower() == "all":
801
- regions_to_scan = dict(KML_SOURCES)
802
- else:
803
- _add_selection(region, KML_SOURCES, regions_to_scan)
804
- else:
805
- # Handle list input for multiple regions
806
- for r in region:
807
- _add_selection(r, KML_SOURCES, regions_to_scan)
808
-
809
- # Get stations from selected KML sources
810
- print("Fetching weather station data from Climate.OneBuilding.Org...")
811
- all_stations = []
812
-
813
- # Process each selected region
814
- scanned_urls = set()
815
- for region_name, url in regions_to_scan.items():
816
- if url in scanned_urls:
817
- continue
818
- scanned_urls.add(url)
819
- print(f"Scanning {region_name}...")
820
- stations = get_stations_from_kml(url)
821
- all_stations.extend(stations)
822
- print(f"Found {len(stations)} stations in {region_name}")
823
-
824
- print(f"\nTotal stations found: {len(all_stations)}")
825
-
826
- if not all_stations:
827
- # Fallback: if no stations found, try scanning all available datasets
828
- if not (isinstance(region, str) and region.lower() == "all"):
829
- print("No stations found from detected/selected regions. Falling back to global scan...")
830
- regions_to_scan = dict(KML_SOURCES)
831
- all_stations = []
832
- scanned_urls = set()
833
- for region_name, url in regions_to_scan.items():
834
- if url in scanned_urls:
835
- continue
836
- scanned_urls.add(url)
837
- print(f"Scanning {region_name}...")
838
- stations = get_stations_from_kml(url)
839
- all_stations.extend(stations)
840
- print(f"Found {len(stations)} stations in {region_name}")
841
- print(f"\nTotal stations found after global scan: {len(all_stations)}")
842
- if not all_stations:
843
- raise ValueError("No weather stations found")
844
-
845
- # Calculate distances from target coordinates to all stations
846
- stations_with_distances = [
847
- (station, haversine_distance(longitude, latitude, station['longitude'], station['latitude']))
848
- for station in all_stations
849
- ]
850
-
851
- # Filter by maximum distance if specified
852
- if max_distance is not None:
853
- close_stations = [
854
- (station, distance)
855
- for station, distance in stations_with_distances
856
- if distance <= max_distance
857
- ]
858
- if not close_stations:
859
- # If no stations within max_distance, find the closest one anyway
860
- closest_station, min_distance = min(stations_with_distances, key=lambda x: x[1])
861
- print(f"\nNo stations found within {max_distance} km. Closest station is {min_distance:.1f} km away.")
862
- print("Using closest available station.")
863
- stations_with_distances = [(closest_station, min_distance)]
864
- else:
865
- stations_with_distances = close_stations
866
-
867
- # Find the nearest weather station
868
- nearest_station, distance = min(stations_with_distances, key=lambda x: x[1])
869
-
870
- # Download the EPW archive from the nearest station with fallbacks
871
- print(f"\nDownloading EPW file for {nearest_station['name']}...")
872
- archive_bytes = try_download_station_zip(nearest_station['url'], timeout_s=30)
873
- if archive_bytes is None:
874
- raise ValueError(f"Failed to download EPW archive from station URL and fallbacks: {nearest_station['url']}")
875
-
876
- # Create a temporary directory for zip extraction
877
- temp_dir = Path(output_dir) / "temp"
878
- temp_dir.mkdir(parents=True, exist_ok=True)
879
-
880
- # Save the downloaded zip file temporarily
881
- zip_file = temp_dir / "weather_data.zip"
882
- with open(zip_file, 'wb') as f:
883
- f.write(archive_bytes)
884
-
885
- final_epw = None
886
- try:
887
- # Extract the EPW file from the zip archive
888
- if extract_zip:
889
- with zipfile.ZipFile(zip_file, 'r') as zip_ref:
890
- # Find the EPW file in the archive (should be exactly one)
891
- epw_files = [f for f in zip_ref.namelist() if f.lower().endswith('.epw')]
892
- if not epw_files:
893
- raise ValueError("No EPW file found in the downloaded archive")
894
-
895
- # Extract the EPW file
896
- epw_filename = epw_files[0]
897
- extracted_epw = safe_extract(zip_ref, epw_filename, temp_dir)
898
-
899
- # Move the EPW file to the final location with cleaned filename
900
- final_epw = Path(output_dir) / f"{nearest_station['name'].replace(' ', '_').replace(',', '').lower()}.epw"
901
- final_epw = safe_rename(extracted_epw, final_epw)
902
- finally:
903
- # Clean up temporary files regardless of success or failure
904
- try:
905
- if zip_file.exists():
906
- zip_file.unlink()
907
- if temp_dir.exists() and not any(temp_dir.iterdir()):
908
- temp_dir.rmdir()
909
- except Exception as e:
910
- print(f"Warning: Could not clean up temporary files: {e}")
911
-
912
- if final_epw is None:
913
- raise ValueError("Failed to extract EPW file")
914
-
915
- # Save station metadata alongside the EPW file
916
- metadata_file = final_epw.with_suffix('.json')
917
- with open(metadata_file, 'w') as f:
918
- json.dump(nearest_station, f, indent=2)
919
-
920
- # Print comprehensive station information
921
- print(f"\nDownloaded EPW file for {nearest_station['name']}")
922
- print(f"Distance: {distance:.2f} km")
923
- print(f"Station coordinates: {nearest_station['longitude']}, {nearest_station['latitude']}")
924
- if nearest_station['wmo']:
925
- print(f"WMO: {nearest_station['wmo']}")
926
- if nearest_station['climate_zone']:
927
- print(f"Climate zone: {nearest_station['climate_zone']}")
928
- if nearest_station['period']:
929
- print(f"Data period: {nearest_station['period']}")
930
- print(f"Files saved:")
931
- print(f"- EPW: {final_epw}")
932
- print(f"- Metadata: {metadata_file}")
933
-
934
- # Load the EPW data into DataFrame if requested
935
- df = None
936
- headers = None
937
- if load_data:
938
- print("\nLoading EPW data...")
939
- df, headers = process_epw(final_epw)
940
- print(f"Loaded {len(df)} hourly records")
941
-
942
- return str(final_epw), df, headers
943
-
944
- except Exception as e:
945
- print(f"Error processing data: {e}")
946
- return None, None, None
947
-
948
- # =============================================================================
949
- # SOLAR SIMULATION UTILITIES
950
- # =============================================================================
951
-
952
- def read_epw_for_solar_simulation(epw_file_path):
953
- """
954
- Read EPW file specifically for solar simulation purposes.
955
-
956
- This function extracts essential solar radiation data and location metadata
957
- from an EPW file for use in solar energy calculations. It focuses on the
958
- Direct Normal Irradiance (DNI) and Diffuse Horizontal Irradiance (DHI)
959
- which are the primary inputs for solar simulation models.
960
-
961
- Args:
962
- epw_file_path: Path to the EPW weather file
963
-
964
- Returns:
965
- Tuple containing:
966
- - DataFrame with time-indexed DNI and DHI data
967
- - Longitude (degrees)
968
- - Latitude (degrees)
969
- - Time zone offset (hours from UTC)
970
- - Elevation (meters above sea level)
971
-
972
- Raises:
973
- ValueError: If LOCATION line not found or data parsing fails
974
- """
975
- # Validate input path
976
- if epw_file_path is None:
977
- raise TypeError("EPW file path is None. Provide a valid path or ensure download succeeded.")
978
- epw_path_obj = Path(epw_file_path)
979
- if not epw_path_obj.exists() or not epw_path_obj.is_file():
980
- raise FileNotFoundError(f"EPW file not found: {epw_file_path}")
981
-
982
- # Read the entire EPW file
983
- with open(epw_path_obj, 'r', encoding='utf-8') as f:
984
- lines = f.readlines()
985
-
986
- # Find the LOCATION line (first line in EPW format)
987
- location_line = None
988
- for line in lines:
989
- if line.startswith("LOCATION"):
990
- location_line = line.strip().split(',')
991
- break
992
-
993
- if location_line is None:
994
- raise ValueError("Could not find LOCATION line in EPW file.")
995
-
996
- # Parse LOCATION line format:
997
- # LOCATION,City,State/Country,Country,DataSource,WMO,Latitude,Longitude,Time Zone,Elevation
998
- # Example: LOCATION,Marina.Muni.AP,CA,USA,SRC-TMYx,690070,36.68300,-121.7670,-8.0,43.0
999
- lat = float(location_line[6])
1000
- lon = float(location_line[7])
1001
- tz = float(location_line[8]) # local standard time offset from UTC
1002
- elevation_m = float(location_line[9])
1003
-
1004
- # Find start of weather data (after 8 header lines)
1005
- data_start_index = None
1006
- for i, line in enumerate(lines):
1007
- vals = line.strip().split(',')
1008
- # Weather data lines have more than 30 columns and start after line 8
1009
- if i >= 8 and len(vals) > 30:
1010
- data_start_index = i
1011
- break
1012
-
1013
- if data_start_index is None:
1014
- raise ValueError("Could not find start of weather data lines in EPW file.")
1015
-
1016
- # Parse weather data focusing on solar radiation components
1017
- data = []
1018
- for l in lines[data_start_index:]:
1019
- vals = l.strip().split(',')
1020
- if len(vals) < 15: # Skip malformed lines
1021
- continue
1022
- # Extract time components and solar radiation data
1023
- year = int(vals[0])
1024
- month = int(vals[1])
1025
- day = int(vals[2])
1026
- hour = int(vals[3]) - 1 # Convert EPW 1-24 hours to 0-23
1027
- dni = float(vals[14]) # Direct Normal Irradiance (Wh/m²)
1028
- dhi = float(vals[15]) # Diffuse Horizontal Irradiance (Wh/m²)
1029
-
1030
- # Create pandas timestamp for time series indexing
1031
- timestamp = pd.Timestamp(year, month, day, hour)
1032
- data.append([timestamp, dni, dhi])
1033
-
1034
- # Create DataFrame with time index for efficient time series operations
1035
- df = pd.DataFrame(data, columns=['time', 'DNI', 'DHI']).set_index('time')
1036
- df = df.sort_index()
1037
-
1038
- return df, lon, lat, tz, elevation_m