windborne 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windborne/utils.py ADDED
@@ -0,0 +1,940 @@
1
+ from .config import CLIENT_ID, API_KEY
2
+
3
+ import requests
4
+ import jwt
5
+ import time
6
+ import re
7
+ import uuid
8
+ from datetime import datetime, timezone
9
+ import dateutil.parser
10
+ import boto3
11
+ import io
12
+ import json
13
+ import csv
14
+
15
+ import numpy as np
16
+
17
+ # Check if input is uuid v4
18
+ def is_valid_uuid_v4(client_id):
19
+ try:
20
+ return str(uuid.UUID(client_id, version=4)) == client_id
21
+ except ValueError:
22
+ return False
23
+
24
+ # Check if client id input format
25
+ def is_valid_client_id_format(client_id):
26
+ return re.fullmatch(r"[a-z0-9_]+", client_id) is not None
27
+
28
+ # Authenticate requests using a JWT | no reveal of underlying key
29
+ def make_api_request(url, params=None, return_type=None):
30
+ # Check if credentials are set
31
+ if not CLIENT_ID and not API_KEY:
32
+ print("To access the WindBorne API, you need to set your Client ID and API key by setting the environment variables WB_CLIENT_ID and WB_API_KEY.")
33
+ print("--------------------------------------")
34
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
35
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
36
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
37
+ "for instruction on how to set your credentials for code usage.")
38
+ print("--------------------------------------")
39
+ print("To get an API key, email data@windbornesystems.com.")
40
+ exit(80)
41
+ elif not CLIENT_ID:
42
+ print("To access the WindBorne API, you need to set your Client ID by setting the environment variable WB_CLIENT_ID.")
43
+ print("--------------------------------------")
44
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
45
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
46
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
47
+ "for instruction on how to set your credentials for code usage.")
48
+ print("--------------------------------------")
49
+ print("To get an API key, email data@windbornesystems.com.")
50
+ exit(90)
51
+ elif not API_KEY:
52
+ print("To access the WindBorne API, you need to set your CAPI key by setting the environment variable WB_API_KEY.")
53
+ print("--------------------------------------")
54
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
55
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
56
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
57
+ "for instruction on how to set your credentials for code usage.")
58
+ print("--------------------------------------")
59
+ print("To get an API key, email data@windbornesystems.com.")
60
+ exit(91)
61
+
62
+ # Validate WB_CLIENT_ID format
63
+ if not (is_valid_uuid_v4(CLIENT_ID) or is_valid_client_id_format(CLIENT_ID)):
64
+ print("Your Client ID is misformatted.")
65
+ print("--------------------------------------")
66
+ print("It should either be a valid UUID v4 or consist of only lowercase letters, digits, and underscores ([a-z0-9_]).")
67
+ print("--------------------------------------")
68
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
69
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
70
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
71
+ "for instruction on how to set your credentials for code usage.")
72
+ print("--------------------------------------")
73
+ print(f"Current Client ID: {CLIENT_ID}")
74
+ exit(92)
75
+
76
+ # Validate WB_API_KEY for both newer and older formats
77
+ if API_KEY.startswith("wb_"):
78
+ if len(API_KEY) != 35:
79
+ print("Your API key is misformatted.")
80
+ print("--------------------------------------")
81
+ print("API keys starting with 'wb_' must be 35 characters long (including the 'wb_' prefix).")
82
+ print("--------------------------------------")
83
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
84
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
85
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
86
+ "for instruction on how to set your credentials for code usage.")
87
+ print("--------------------------------------")
88
+ print(f"Current API key: {API_KEY}")
89
+ exit(93)
90
+ elif len(API_KEY) != 32: # For early tokens
91
+ print("Your API key is misformatted.")
92
+ print("--------------------------------------")
93
+ print("API keys created in 2023 or earlier must be exactly 32 characters long.")
94
+ print("--------------------------------------")
95
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
96
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
97
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
98
+ "for instruction on how to set your credentials for code usage.")
99
+ print("--------------------------------------")
100
+ print(f"Current API key: {API_KEY}")
101
+ exit(94)
102
+
103
+ signed_token = jwt.encode({
104
+ 'client_id': CLIENT_ID,
105
+ 'iat': int(time.time()),
106
+ }, API_KEY, algorithm='HS256')
107
+
108
+ try:
109
+ if params:
110
+ response = requests.get(url, auth=(CLIENT_ID, signed_token), params=params)
111
+ else:
112
+ response = requests.get(url, auth=(CLIENT_ID, signed_token))
113
+
114
+ response.raise_for_status()
115
+
116
+ if return_type is None:
117
+ # For Data API
118
+ return response.json()
119
+ elif return_type == 'all':
120
+ # For Forecasts API (except tcs) --> return whole response not .json to obtain S3 url
121
+ return response
122
+ except requests.exceptions.HTTPError as http_err:
123
+ if http_err.response.status_code == 403:
124
+ print("--------------------------------------")
125
+ print("We couldn't authenticate your request.")
126
+ print("--------------------------------------")
127
+ print("Please make sure you have properly set your WB_CLIENT_ID and WB_API_KEY.\n")
128
+ print("You can verify this by running\necho $WB_CLIENT_ID and echo $WB_API_KEY in your terminal.\n")
129
+ print("To get an API key, email data@windbornesystems.com.")
130
+ elif http_err.response.status_code in [404, 400]:
131
+ print("-------------------------------------------------------")
132
+ print("Our server couldn't find the information you requested.")
133
+ print("-------------------------------------------------------")
134
+ print(f"URL: {url}")
135
+ print(f"Error: {http_err.response.status_code}")
136
+ print("-------------------------------------------------------")
137
+ if params:
138
+ print("\nParameters provided:")
139
+ for key, value in params.items():
140
+ print(f" {key}: {value}")
141
+ else:
142
+ if 'missions/' in url:
143
+ mission_id = url.split('/missions/')[1].split('/')[0]
144
+ print(f"Mission ID provided: {mission_id}")
145
+ print(f"We couldn't find a mission with id: {mission_id}")
146
+ elif http_err.response.status_code == 502:
147
+ retries = 1
148
+ while response.status_code == 502 and retries < 5:
149
+ print("502 Bad Gateway, sleeping and retrying")
150
+ time.sleep(2**retries)
151
+ response = requests.get(url, auth=(CLIENT_ID, signed_token))
152
+ retries += 1
153
+ else:
154
+ print(f"HTTP error occurred\n\n{http_err}")
155
+ if params:
156
+ print("\nParameters provided:")
157
+ for key, value in params.items():
158
+ print(f" {key}: {value}")
159
+ exit(http_err.response.status_code)
160
+ except requests.exceptions.ConnectionError as conn_err:
161
+ print(f"Connection error occurred\n\n{conn_err}")
162
+ except requests.exceptions.Timeout as timeout_err:
163
+ print(f"Timeout error occurred\n\n{timeout_err}")
164
+ except requests.exceptions.RequestException as req_err:
165
+ print(f"An error occurred\n\n{req_err}")
166
+
167
+ # Supported date formats
168
+ # YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings
169
+ def to_unix_timestamp(date_string):
170
+ """
171
+ Converts a date string or integer to a UNIX timestamp.
172
+ Supports various date formats and handles future dates gracefully.
173
+
174
+ Args:
175
+ date_string (str | int | None): The date string to convert or an integer UNIX timestamp.
176
+
177
+ Returns:
178
+ int | None: The UNIX timestamp or None if the input is None.
179
+ """
180
+ if date_string is None:
181
+ return None
182
+ if isinstance(date_string, int):
183
+ return date_string # If it's already an integer, return as is
184
+ if isinstance(date_string, str):
185
+ # Supported date formats
186
+ formats = [
187
+ "%Y-%m-%d %H:%M:%S", # e.g., 2024-12-05 14:48:00
188
+ "%Y-%m-%d_%H:%M", # e.g., 2024-12-05_14:48
189
+ "%Y-%m-%dT%H:%M:%S.%fZ", # e.g., 2024-12-05T14:48:00.000Z
190
+ ]
191
+ current_time = datetime.now(timezone.utc)
192
+ for fmt in formats:
193
+ try:
194
+ dt = datetime.strptime(date_string, fmt).replace(tzinfo=timezone.utc)
195
+ if dt > current_time:
196
+ print(f"How would it be to live in {dt} ?\n")
197
+ print("Looks like you are coming from the future!\n")
198
+ exit(1111)
199
+ return int(dt.timestamp())
200
+ except ValueError:
201
+ continue
202
+
203
+ # If no formats match, raise an error
204
+ print("Invalid date format. Please use one of the supported formats:\n"
205
+ "- YYYY-MM-DD HH:MM:SS\n"
206
+ "- YYYY-MM-DD_HH:MM\n"
207
+ "- YYYY-MM-DDTHH:MM:SS.fffZ")
208
+ exit(11)
209
+
210
+ # Supported date format
211
+ # Compact format YYYYMMDDHH
212
+ def parse_time(time, init_time_flag=None):
213
+ """
214
+ Parse and validate initialization time with support for multiple formats.
215
+ Returns validated initialization time in ISO format, or None if invalid.
216
+ """
217
+ if time is None:
218
+ return None
219
+
220
+ try:
221
+ # Try parsing compact format first (YYYYMMDDHH)
222
+ if re.match(r'^\d{10}$', time):
223
+ try:
224
+ parsed_date = datetime.strptime(time, "%Y%m%d%H")
225
+ except (ValueError, OverflowError):
226
+ print(f"Invalid date values in: {time}")
227
+ print("Make sure your date values are valid")
228
+ exit(2)
229
+
230
+ if init_time_flag and parsed_date.hour not in [0, 6, 12, 18]:
231
+ print("Initialization time hour must be 00, 06, 12, or 18")
232
+ exit(2)
233
+ else:
234
+ try:
235
+ parsed_date = dateutil.parser.parse(time)
236
+ except (ValueError, OverflowError, TypeError):
237
+ print(f"Invalid date format: {time}\n")
238
+ print("Please use one of these formats:")
239
+ print(" - Compact: 'YYYYMMDDHH' (e.g., 2024073112)")
240
+ print(" - ISO: 'YYYY-MM-DDTHH' or 'YYYY-MM-DDTHH:00:00'")
241
+ print(" - Initialization time hour must be 00, 06, 12, or 18")
242
+ exit(2)
243
+
244
+ if parsed_date > datetime.now():
245
+ print(f"How would it be to live in {parsed_date} ?\n")
246
+ print("Looks like you are coming from the future!\n")
247
+ exit(1111)
248
+
249
+ return parsed_date.strftime('%Y-%m-%dT%H:00:00')
250
+
251
+ except Exception:
252
+ print(f"Invalid date format: {time}")
253
+ print("Please check your input format and try again")
254
+ exit(2)
255
+
256
+ # Save API response data to a file in either JSON or CSV format
257
+ def save_csv_json(save_to_file, response, csv_data_key=None):
258
+ """
259
+ Save Data API response data to a file in either JSON or CSV format.
260
+
261
+ Args:
262
+ save_to_file (str): The file path where the response will be saved.
263
+ response (dict or list): The response data to save.
264
+ csv_data_key (str, optional): Key to extract data for CSV. Defaults to None.
265
+ """
266
+ if '.' not in save_to_file:
267
+ print("You have to provide a file type for your filename.")
268
+ print("Supported formats:")
269
+ print(" - .csv")
270
+ print(" - .json")
271
+ exit(2)
272
+ elif not response:
273
+ print("There are no available data to save to file.")
274
+ exit(1)
275
+ elif save_to_file.lower().endswith('.json'):
276
+ with open(save_to_file, 'w', encoding='utf-8') as f:
277
+ json.dump(response, f, indent=4)
278
+ print("Saved to", save_to_file)
279
+ elif save_to_file.lower().endswith('.csv'):
280
+ # Extract data for CSV if a key is provided
281
+ data = response if not csv_data_key else response.get(csv_data_key, [])
282
+ if not data:
283
+ print("No data available to save to CSV.")
284
+ return
285
+ # Handle nested list case (for forecasts)
286
+ if isinstance(data, list) and data and isinstance(data[0], list):
287
+ data = data[0] # Take the first list from nested lists
288
+ # If data is a list of dictionaries, write each dictionary as a row
289
+ if isinstance(data, list) and all(isinstance(item, dict) for item in data):
290
+ headers = data[0].keys() if data else []
291
+ # If data is a dictionary, determine if it contains a list of dictionaries or is a flat dictionary
292
+ elif isinstance(data, dict):
293
+ # If the dictionary contains a list of dictionaries, use the keys of the first dictionary in the list as headers
294
+ for key, value in data.items():
295
+ if isinstance(value, list) and all(isinstance(item, dict) for item in value):
296
+ headers = value[0].keys() if value else []
297
+ data = value
298
+ break
299
+ else:
300
+ # If no lists of dictionaries are found, use the keys of the dictionary as headers
301
+ headers = data.keys()
302
+ data = [data]
303
+ else:
304
+ print("Unsupported data format for CSV.")
305
+ exit(5)
306
+
307
+ # Write data to CSV
308
+ with open(save_to_file, mode='w', newline='', encoding='utf-8') as file:
309
+ writer = csv.DictWriter(file, fieldnames=headers)
310
+ writer.writeheader()
311
+ for row in data:
312
+ # If no value available write 'None'
313
+ row_data = {k: 'None' if v is None or v == '' else v for k, v in row.items()}
314
+ writer.writerow(row_data)
315
+ print("Saved to", save_to_file)
316
+ else:
317
+ print("Unsupported file format. Please use either .json or .csv.")
318
+ exit(4)
319
+
320
+ def convert_to_netcdf(data, curtime, output_filename=None):
321
+ # This module outputs data in netcdf format for the WMO ISARRA program. The output format is netcdf
322
+ # and the style (variable names, file names, etc.) are described here:
323
+ # https://github.com/synoptic/wmo-uasdc/tree/main/raw_uas_to_netCDF
324
+
325
+ # Import necessary libraries
326
+ import xarray as xr
327
+ import pandas as pd
328
+ import numpy as np
329
+
330
+ # Mapping of WindBorne names to ISARRA names
331
+ rename_dict = {
332
+ 'latitude': 'lat',
333
+ 'longitude': 'lon',
334
+ 'altitude': 'altitude',
335
+ 'temperature': 'air_temperature',
336
+ 'wind_direction': 'wind_direction',
337
+ 'wind_speed': 'wind_speed',
338
+ 'pressure': 'air_pressure',
339
+ 'humidity_mixing_ratio': 'humidity_mixing_ratio',
340
+ 'index': 'obs',
341
+ }
342
+
343
+ # Convert dictionary to list for DataFrame
344
+ data_list = []
345
+ for obs_id, obs_data in data.items():
346
+ # Convert 'None' strings to None type
347
+ clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
348
+ data_list.append(clean_data)
349
+
350
+ # Put the data in a panda dataframe in order to easily push to xarray then netcdf output
351
+ df = pd.DataFrame(data_list)
352
+
353
+ # Convert numeric columns to float
354
+ numeric_columns = ['latitude', 'longitude', 'altitude', 'pressure', 'temperature',
355
+ 'speed_u', 'speed_v', 'specific_humidity', 'timestamp']
356
+ for col in numeric_columns:
357
+ if col in df.columns:
358
+ df[col] = pd.to_numeric(df[col], errors='coerce')
359
+
360
+ ds = xr.Dataset.from_dataframe(df)
361
+
362
+ # Build the filename and save some variables for use later
363
+ mt = datetime.fromtimestamp(curtime, tz=timezone.utc)
364
+ # Handle dropsondes
365
+ mission_name = str(df['mission_name'].iloc[0]) if (not df.empty and not pd.isna(df['mission_name'].iloc[0])) else ' '
366
+
367
+ if output_filename:
368
+ output_file = output_filename
369
+ else:
370
+ output_file = f"WindBorne_{mission_name}_{mt.year:04d}-{mt.month:02d}-{mt.day:02d}_{mt.hour:02d}.nc"
371
+
372
+ # Derived quantities calculated here:
373
+
374
+ # convert from specific humidity to humidity_mixing_ratio
375
+ mg_to_kg = 1000000.
376
+ if not all(x is None for x in ds['specific_humidity'].data):
377
+ ds['humidity_mixing_ratio'] = (ds['specific_humidity'] / mg_to_kg) / (1 - (ds['specific_humidity'] / mg_to_kg))
378
+ else:
379
+ ds['humidity_mixing_ratio'] = ds['specific_humidity']
380
+
381
+ # Wind speed and direction from components
382
+ ds['wind_speed'] = np.sqrt(ds['speed_u']*ds['speed_u'] + ds['speed_v']*ds['speed_v'])
383
+ ds['wind_direction'] = np.mod(180 + (180 / np.pi) * np.arctan2(ds['speed_u'], ds['speed_v']), 360)
384
+
385
+ ds['time'] = ds['timestamp'].astype(float)
386
+ ds = ds.assign_coords(time=("time", ds['time'].data))
387
+
388
+ # Now that calculations are done, remove variables not needed in the netcdf output
389
+ variables_to_drop = ['humidity', 'speed_u', 'speed_v', 'speed_x', 'speed_y', 'specific_humidity',
390
+ 'timestamp', 'mission_name']
391
+ existing_vars = [var for var in variables_to_drop if var in ds]
392
+ ds = ds.drop_vars(existing_vars)
393
+
394
+ # Rename the variables
395
+ ds = ds.rename(rename_dict)
396
+
397
+ # Adding attributes to variables in the xarray dataset
398
+ ds['time'].attrs = {'units': 'seconds since 1970-01-01T00:00:00', 'long_name': 'Time', '_FillValue': float('nan'),
399
+ 'processing_level': ''}
400
+ ds['lat'].attrs = {'units': 'degrees_north', 'long_name': 'Latitude', '_FillValue': float('nan'),
401
+ 'processing_level': ''}
402
+ ds['lon'].attrs = {'units': 'degrees_east', 'long_name': 'Longitude', '_FillValue': float('nan'),
403
+ 'processing_level': ''}
404
+ ds['altitude'].attrs = {'units': 'meters_above_sea_level', 'long_name': 'Altitude', '_FillValue': float('nan'),
405
+ 'processing_level': ''}
406
+ ds['air_temperature'].attrs = {'units': 'Kelvin', 'long_name': 'Air Temperature', '_FillValue': float('nan'),
407
+ 'processing_level': ''}
408
+ ds['wind_speed'].attrs = {'units': 'm/s', 'long_name': 'Wind Speed', '_FillValue': float('nan'),
409
+ 'processing_level': ''}
410
+ ds['wind_direction'].attrs = {'units': 'degrees', 'long_name': 'Wind Direction', '_FillValue': float('nan'),
411
+ 'processing_level': ''}
412
+ ds['humidity_mixing_ratio'].attrs = {'units': 'kg/kg', 'long_name': 'Humidity Mixing Ratio',
413
+ '_FillValue': float('nan'), 'processing_level': ''}
414
+ ds['air_pressure'].attrs = {'units': 'Pa', 'long_name': 'Atmospheric Pressure', '_FillValue': float('nan'),
415
+ 'processing_level': ''}
416
+
417
+ # Add Global Attributes synonymous across all UASDC providers
418
+ ds.attrs['Conventions'] = "CF-1.8, WMO-CF-1.0"
419
+ ds.attrs['wmo__cf_profile'] = "FM 303-2024"
420
+ ds.attrs['featureType'] = "trajectory"
421
+
422
+ # Add Global Attributes unique to Provider
423
+ ds.attrs['platform_name'] = "WindBorne Global Sounding Balloon"
424
+ ds.attrs['flight_id'] = mission_name
425
+ ds.attrs['site_terrain_elevation_height'] = 'not applicable'
426
+ ds.attrs['processing_level'] = "b1"
427
+ ds.to_netcdf(output_file)
428
+
429
+ def format_value(value, fortran_format, align=None):
430
+ if fortran_format[0] == 'F':
431
+ length, decimal_places = fortran_format[1:].split('.')
432
+ if value is None or value == '':
433
+ return ' ' * int(length)
434
+
435
+ # turn into a string of length characters, with decimal_places decimal places
436
+ return f"{value:>{length}.{decimal_places}f}"[:int(length)]
437
+
438
+ if fortran_format[0] == 'I':
439
+ length = int(fortran_format[1:])
440
+ if value is None or value == '':
441
+ return ' ' * length
442
+
443
+ return f"{value:>{length}d}"[:int(length)]
444
+
445
+ if fortran_format[0] == 'A':
446
+ length = int(fortran_format[1:])
447
+ if value is None:
448
+ return ' ' * length
449
+
450
+ if align == 'right':
451
+ return str(value)[:length].rjust(length, ' ')
452
+
453
+ return str(value)[:length].ljust(length, ' ')
454
+
455
+ if fortran_format[0] == 'L':
456
+ if value and value in ['T', 't', 'True', 'true', '1', True]:
457
+ value = 'T'
458
+ else:
459
+ value = 'F'
460
+
461
+ length = int(fortran_format[1:])
462
+
463
+ return value.rjust(length, ' ')
464
+
465
+ raise ValueError(f"Unknown format: {fortran_format}")
466
+
467
+ def safe_float(value, default=-888888.0):
468
+ """
469
+ Convert a value to float. If the value is None, empty, or invalid, return the default.
470
+ """
471
+ try:
472
+ return float(value) if value not in (None, '', 'None') else default
473
+ except (ValueError, TypeError):
474
+ return default
475
+
476
+ def format_little_r(observations):
477
+ """
478
+ Convert observations to Little_R format.
479
+
480
+ Args:
481
+ observations (list): List of observation dictionaries
482
+
483
+ Returns:
484
+ list: Formatted Little_R records
485
+ """
486
+ little_r_records = []
487
+
488
+ for obs_id, point in observations:
489
+ # Observation time
490
+ observation_time = datetime.fromtimestamp(point['timestamp'], tz=timezone.utc)
491
+
492
+ # Convert and validate fields
493
+ pressure_hpa = safe_float(point.get('pressure'))
494
+ pressure_pa = pressure_hpa * 100.0
495
+
496
+ temperature_c = safe_float(point.get('temperature'))
497
+ temperature_k = temperature_c + 273.15
498
+
499
+ altitude = safe_float(point.get('altitude'))
500
+ humidity = safe_float(point.get('humidity'))
501
+ speed_u = safe_float(point.get('speed_u'))
502
+ speed_v = safe_float(point.get('speed_v'))
503
+
504
+ # Header formatting
505
+ header = ''.join([
506
+ # Latitude: F20.5
507
+ format_value(point.get('latitude'), 'F20.5'),
508
+
509
+ # Longitude: F20.5
510
+ format_value(point.get('longitude'), 'F20.5'),
511
+
512
+ # ID: A40
513
+ format_value(point.get('id'), 'A40'),
514
+
515
+ # Name: A40
516
+ format_value(point.get('mission_name'), 'A40'),
517
+
518
+ # Platform (FM‑Code): A40
519
+ format_value('FM-35 TEMP', 'A40'),
520
+
521
+ # Source: A40
522
+ format_value('WindBorne', 'A40'),
523
+
524
+ # Elevation: F20.5
525
+ format_value('', 'F20.5'),
526
+
527
+ # Valid fields: I10
528
+ format_value(-888888, 'I10'),
529
+
530
+ # Num. errors: I10
531
+ format_value(0, 'I10'),
532
+
533
+ # Num. warnings: I10
534
+ format_value(0, 'I10'),
535
+
536
+ # Sequence number: I10
537
+ format_value(0, 'I10'),
538
+
539
+ # Num. duplicates: I10
540
+ format_value(0, 'I10'),
541
+
542
+ # Is sounding?: L
543
+ format_value('T', 'L10'),
544
+
545
+ # Is bogus?: L
546
+ format_value('F', 'L10'),
547
+
548
+ # Discard?: L
549
+ format_value('F', 'L10'),
550
+
551
+ # Unix time: I10
552
+ # format_value(point['timestamp'], 'I10'),
553
+ format_value(-888888, 'I10'),
554
+
555
+ # Julian day: I10
556
+ format_value(-888888, 'I10'),
557
+
558
+ # Date: A20 YYYYMMDDhhmmss
559
+ format_value(observation_time.strftime('%Y%m%d%H%M%S'), 'A20', align='right'),
560
+
561
+ # SLP, QC: F13.5, I7
562
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
563
+
564
+ # Ref Pressure, QC: F13.5, I7
565
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
566
+
567
+ # Ground Temp, QC: F13.5, I7
568
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
569
+
570
+ # SST, QC: F13.5, I7
571
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
572
+
573
+ # SFC Pressure, QC: F13.5, I7
574
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
575
+
576
+ # Precip, QC: F13.5, I7
577
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
578
+
579
+ # Daily Max T, QC: F13.5, I7
580
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
581
+
582
+ # Daily Min T, QC: F13.5, I7
583
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
584
+
585
+ # Night Min T, QC: F13.5, I7
586
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
587
+
588
+ # 3hr Pres Change, QC: F13.5, I7
589
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
590
+
591
+ # 24hr Pres Change, QC: F13.5, I7
592
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
593
+
594
+ # Cloud cover, QC: F13.5, I7
595
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
596
+
597
+ # Ceiling, QC: F13.5, I7
598
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
599
+
600
+ # Precipitable water, QC (see note): F13.5, I7
601
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
602
+ ])
603
+
604
+ # Data record formatting
605
+ data_record = ''.join([
606
+ # Pressure (Pa): F13.5
607
+ format_value(pressure_pa, 'F13.5'),
608
+
609
+ # QC: I7
610
+ format_value(0, 'I7'),
611
+
612
+ # Height (m): F13.5
613
+ format_value(altitude, 'F13.5'),
614
+
615
+ # QC: I7
616
+ format_value(0, 'I7'),
617
+
618
+ # Temperature (K): F13.5
619
+ format_value(temperature_k, 'F13.5'),
620
+
621
+ # QC: I7
622
+ format_value(0, 'I7'),
623
+
624
+ # Dew point (K): F13.5
625
+ format_value(-888888.0, 'F13.5'),
626
+
627
+ # QC: I7
628
+ format_value(0, 'I7'),
629
+
630
+ # Wind speed (m/s): F13.5
631
+ format_value(-888888.0, 'F13.5'),
632
+
633
+ # QC: I7
634
+ format_value(0, 'I7'),
635
+
636
+ # Wind direction (deg): F13.5
637
+ format_value(-888888.0, 'F13.5'),
638
+
639
+ # QC: I7
640
+ format_value(0, 'I7'),
641
+
642
+ # Wind U (m/s): F13.5
643
+ format_value(speed_u, 'F13.5'),
644
+
645
+ # QC: I7
646
+ format_value(0, 'I7'),
647
+
648
+ # Wind V (m/s): F13.5
649
+ format_value(speed_v, 'F13.5'),
650
+
651
+ # QC: I7
652
+ format_value(0, 'I7'),
653
+
654
+ # Relative humidity (%): F13.5
655
+ format_value(humidity, 'F13.5'),
656
+
657
+ # QC: I7
658
+ format_value(0, 'I7'),
659
+
660
+ # Thickness (m): F13.5
661
+ format_value(-888888.0, 'F13.5'),
662
+
663
+ # QC: I7
664
+ format_value(0, 'I7')
665
+ ])
666
+
667
+ # End record and tail record
668
+ end_record = '-777777.00000 0-777777.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0'
669
+ tail_record = ' 39 0 0'
670
+
671
+ # Combine into a complete record
672
+ complete_record = '\n'.join([header, data_record, end_record, tail_record, ''])
673
+ little_r_records.append(complete_record)
674
+
675
+ return little_r_records
676
+
677
+ # Download and save a file in .npy upon provided an S3 link
678
+ def download_and_save_npy(save_to_file, response):
679
+ """
680
+ Downloads data from a presigned S3 url contained in a response and saves it as a .npy file.
681
+
682
+ Args:
683
+ save_to_file (str): Path where to save the .npy file
684
+ response (str): Response that contains the S3 url to download the data from
685
+
686
+ Returns:
687
+ bool: True if successful, False otherwise
688
+ """
689
+ try:
690
+ # Download the file
691
+ print(f"Downloading data")
692
+ # Load the data into memory
693
+ data = np.load(io.BytesIO(response.content))
694
+
695
+ # Save the data
696
+ np.save(save_to_file, data)
697
+ print(f"Data Successfully saved to {save_to_file}")
698
+ return True
699
+
700
+ except requests.exceptions.RequestException as e:
701
+ print(f"Error downloading the file: {e}")
702
+ return False
703
+ except Exception as e:
704
+ print(f"Error processing the file: {e}")
705
+ return False
706
+
707
+ # Download and save a file in .nc upon provided an S3 link
708
+ def download_and_save_nc(save_to_file, response):
709
+ """
710
+ Downloads data from a presigned S3 url contained in a response and saves it as a .nc file.
711
+
712
+ Args:
713
+ save_to_file (str): Path where to save the .nc file
714
+ response (str): Response that contains the S3 url to download the data from
715
+
716
+ Returns:
717
+ bool: True if successful, False otherwise
718
+ """
719
+
720
+ # Add .nc extension if not present
721
+ if not save_to_file.endswith('.nc'):
722
+ save_to_file = save_to_file + '.nc'
723
+
724
+ try:
725
+ # Save the content directly to file
726
+ with open(save_to_file, 'wb') as f:
727
+ f.write(response.content)
728
+ print(f"Data Successfully saved to {save_to_file}")
729
+ return True
730
+
731
+ except requests.exceptions.RequestException as e:
732
+ print(f"Error downloading the file: {e}")
733
+ return False
734
+ except Exception as e:
735
+ print(f"Error processing the file: {e}")
736
+ return False
737
+
738
+ def save_as_geojson(filename, cyclone_data):
739
+ """Convert and save cyclone data as GeoJSON, handling meridian crossing."""
740
+ features = []
741
+ for cyclone_id, tracks in cyclone_data.items():
742
+ # Initialize lists to store line segments
743
+ line_segments = []
744
+ current_segment = []
745
+
746
+ for i in range(len(tracks)):
747
+ lon = float(tracks[i]['longitude'])
748
+ lat = float(tracks[i]['latitude'])
749
+
750
+ if not current_segment:
751
+ current_segment.append([lon, lat])
752
+ continue
753
+
754
+ prev_lon = current_segment[-1][0]
755
+
756
+ # Check if we've crossed the meridian (large longitude jump)
757
+ if abs(lon - prev_lon) > 180:
758
+ # If previous longitude was positive and current is negative
759
+ if prev_lon > 0 and lon < 0:
760
+ # Add point at 180° with same latitude
761
+ current_segment.append([180, lat])
762
+ line_segments.append(current_segment)
763
+ # Start new segment at -180°
764
+ current_segment = [[-180, lat], [lon, lat]]
765
+ # If previous longitude was negative and current is positive
766
+ elif prev_lon < 0 and lon > 0:
767
+ # Add point at -180° with same latitude
768
+ current_segment.append([-180, lat])
769
+ line_segments.append(current_segment)
770
+ # Start new segment at 180°
771
+ current_segment = [[180, lat], [lon, lat]]
772
+ else:
773
+ current_segment.append([lon, lat])
774
+
775
+ # Add the last segment if it's not empty
776
+ if current_segment:
777
+ line_segments.append(current_segment)
778
+
779
+ # Create a MultiLineString feature with all segments
780
+ feature = {
781
+ "type": "Feature",
782
+ "properties": {
783
+ "cyclone_id": cyclone_id,
784
+ "start_time": tracks[0]['time'],
785
+ "end_time": tracks[-1]['time']
786
+ },
787
+ "geometry": {
788
+ "type": "MultiLineString",
789
+ "coordinates": line_segments
790
+ }
791
+ }
792
+ features.append(feature)
793
+
794
+ geojson = {
795
+ "type": "FeatureCollection",
796
+ "features": features
797
+ }
798
+
799
+ with open(filename, 'w', encoding='utf-8') as f:
800
+ json.dump(geojson, f, indent=4)
801
+ print("Saved to", filename)
802
+
803
+ def save_as_gpx(filename, cyclone_data):
804
+ """Convert and save cyclone data as GPX, handling meridian crossing."""
805
+ gpx = '<?xml version="1.0" encoding="UTF-8"?>\n'
806
+ gpx += '<gpx version="1.1" creator="Windborne" xmlns="http://www.topografix.com/GPX/1/1">\n'
807
+
808
+ for cyclone_id, tracks in cyclone_data.items():
809
+ gpx += f' <trk>\n <name>{cyclone_id}</name>\n'
810
+
811
+ current_segment = []
812
+ segment_count = 1
813
+
814
+ for i in range(len(tracks)):
815
+ lon = float(tracks[i]['longitude'])
816
+ lat = float(tracks[i]['latitude'])
817
+
818
+ if not current_segment:
819
+ current_segment.append(tracks[i])
820
+ continue
821
+
822
+ prev_lon = float(current_segment[-1]['longitude'])
823
+
824
+ # Check if we've crossed the meridian
825
+ if abs(lon - prev_lon) > 180:
826
+ # Write the current segment
827
+ gpx += ' <trkseg>\n'
828
+ for point in current_segment:
829
+ gpx += f' <trkpt lat="{point["latitude"]}" lon="{point["longitude"]}">\n'
830
+ gpx += f' <time>{point["time"]}</time>\n'
831
+ gpx += ' </trkpt>\n'
832
+ gpx += ' </trkseg>\n'
833
+
834
+ # Start new segment
835
+ current_segment = [tracks[i]]
836
+ segment_count += 1
837
+ else:
838
+ current_segment.append(tracks[i])
839
+
840
+ # Write the last segment if it's not empty
841
+ if current_segment:
842
+ gpx += ' <trkseg>\n'
843
+ for point in current_segment:
844
+ gpx += f' <trkpt lat="{point["latitude"]}" lon="{point["longitude"]}">\n'
845
+ gpx += f' <time>{point["time"]}</time>\n'
846
+ gpx += ' </trkpt>\n'
847
+ gpx += ' </trkseg>\n'
848
+
849
+ gpx += ' </trk>\n'
850
+
851
+ gpx += '</gpx>'
852
+
853
+ with open(filename, 'w', encoding='utf-8') as f:
854
+ f.write(gpx)
855
+ print(f"Saved to {filename}")
856
+
857
+ def save_as_kml(filename, cyclone_data):
858
+ """Convert and save cyclone data as KML, handling meridian crossing."""
859
+ kml = '<?xml version="1.0" encoding="UTF-8"?>\n'
860
+ kml += '<kml xmlns="http://www.opengis.net/kml/2.2">\n<Document>\n'
861
+
862
+ for cyclone_id, tracks in cyclone_data.items():
863
+ kml += f' <Placemark>\n <name>{cyclone_id}</name>\n <MultiGeometry>\n'
864
+
865
+ current_segment = []
866
+
867
+ for i in range(len(tracks)):
868
+ lon = float(tracks[i]['longitude'])
869
+ lat = float(tracks[i]['latitude'])
870
+
871
+ if not current_segment:
872
+ current_segment.append(tracks[i])
873
+ continue
874
+
875
+ prev_lon = float(current_segment[-1]['longitude'])
876
+
877
+ # Check if we've crossed the meridian
878
+ if abs(lon - prev_lon) > 180:
879
+ # Write the current segment
880
+ kml += ' <LineString>\n <coordinates>\n'
881
+ coordinates = [f' {track["longitude"]},{track["latitude"]},{0}'
882
+ for track in current_segment]
883
+ kml += '\n'.join(coordinates)
884
+ kml += '\n </coordinates>\n </LineString>\n'
885
+
886
+ # Start new segment
887
+ current_segment = [tracks[i]]
888
+ else:
889
+ current_segment.append(tracks[i])
890
+
891
+ # Write the last segment if it's not empty
892
+ if current_segment:
893
+ kml += ' <LineString>\n <coordinates>\n'
894
+ coordinates = [f' {track["longitude"]},{track["latitude"]},{0}'
895
+ for track in current_segment]
896
+ kml += '\n'.join(coordinates)
897
+ kml += '\n </coordinates>\n </LineString>\n'
898
+
899
+ kml += ' </MultiGeometry>\n </Placemark>\n'
900
+
901
+ kml += '</Document>\n</kml>'
902
+
903
+ with open(filename, 'w', encoding='utf-8') as f:
904
+ f.write(kml)
905
+ print(f"Saved to {filename}")
906
+
907
+ def save_as_little_r(filename, cyclone_data):
908
+ """Convert and save cyclone data in little_R format."""
909
+ with open(filename, 'w', encoding='utf-8') as f:
910
+ for cyclone_id, tracks in cyclone_data.items():
911
+ for track in tracks:
912
+ # Parse the time
913
+ dt = datetime.fromisoformat(track['time'].replace('Z', '+00:00'))
914
+
915
+ # Header line 1
916
+ header1 = f"{float(track['latitude']):20.5f}{float(track['longitude']):20.5f}{'HMS':40}"
917
+ header1 += f"{0:10d}{0:10d}{0:10d}" # Station ID numbers
918
+ header1 += f"{dt.year:10d}{dt.month:10d}{dt.day:10d}{dt.hour:10d}{0:10d}"
919
+ header1 += f"{0:10d}{0:10.3f}{cyclone_id:40}"
920
+ f.write(header1 + '\n')
921
+
922
+ # Header line 2
923
+ header2 = f"{0:20.5f}{1:10d}{0:10.3f}"
924
+ f.write(header2 + '\n')
925
+
926
+ # Data line format: p, z, t, d, s, d (pressure, height, temp, dewpoint, speed, direction)
927
+ # We'll only include position data
928
+ data_line = f"{-888888.0:13.5f}{float(track['latitude']):13.5f}{-888888.0:13.5f}"
929
+ data_line += f"{-888888.0:13.5f}{-888888.0:13.5f}{float(track['longitude']):13.5f}"
930
+ data_line += f"{0:7d}" # End of data line marker
931
+ f.write(data_line + '\n')
932
+
933
+ # End of record line
934
+ f.write(f"{-777777.0:13.5f}\n")
935
+
936
+ print("Saved to", filename)
937
+
938
+ def sync_to_s3(data, bucket_name, object_name):
939
+ s3 = boto3.client("s3")
940
+ s3.put_object(Body=str(data), Bucket=bucket_name, Key=object_name)