windborne 1.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
windborne/utils.py ADDED
@@ -0,0 +1,940 @@
1
+ from .config import CLIENT_ID, API_KEY
2
+
3
+ import requests
4
+ import jwt
5
+ import time
6
+ import re
7
+ import uuid
8
+ from datetime import datetime, timezone
9
+ import dateutil.parser
10
+ import boto3
11
+ import io
12
+ import json
13
+ import csv
14
+
15
+ import numpy as np
16
+
17
+ # Check if input is uuid v4
18
+ def is_valid_uuid_v4(client_id):
19
+ try:
20
+ return str(uuid.UUID(client_id, version=4)) == client_id
21
+ except ValueError:
22
+ return False
23
+
24
+ # Check if client id input format
25
+ def is_valid_client_id_format(client_id):
26
+ return re.fullmatch(r"[a-z0-9_]+", client_id) is not None
27
+
28
+ # Authenticate requests using a JWT | no reveal of underlying key
29
+ def make_api_request(url, params=None, return_type=None):
30
+ # Check if credentials are set
31
+ if not CLIENT_ID and not API_KEY:
32
+ print("To access the WindBorne API, you need to set your Client ID and API key by setting the environment variables WB_CLIENT_ID and WB_API_KEY.")
33
+ print("--------------------------------------")
34
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
35
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
36
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
37
+ "for instruction on how to set your credentials for code usage.")
38
+ print("--------------------------------------")
39
+ print("To get an API key, email data@windbornesystems.com.")
40
+ exit(80)
41
+ elif not CLIENT_ID:
42
+ print("To access the WindBorne API, you need to set your Client ID by setting the environment variable WB_CLIENT_ID.")
43
+ print("--------------------------------------")
44
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
45
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
46
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
47
+ "for instruction on how to set your credentials for code usage.")
48
+ print("--------------------------------------")
49
+ print("To get an API key, email data@windbornesystems.com.")
50
+ exit(90)
51
+ elif not API_KEY:
52
+ print("To access the WindBorne API, you need to set your CAPI key by setting the environment variable WB_API_KEY.")
53
+ print("--------------------------------------")
54
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
55
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
56
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
57
+ "for instruction on how to set your credentials for code usage.")
58
+ print("--------------------------------------")
59
+ print("To get an API key, email data@windbornesystems.com.")
60
+ exit(91)
61
+
62
+ # Validate WB_CLIENT_ID format
63
+ if not (is_valid_uuid_v4(CLIENT_ID) or is_valid_client_id_format(CLIENT_ID)):
64
+ print("Your Client ID is misformatted.")
65
+ print("--------------------------------------")
66
+ print("It should either be a valid UUID v4 or consist of only lowercase letters, digits, and underscores ([a-z0-9_]).")
67
+ print("--------------------------------------")
68
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
69
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
70
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
71
+ "for instruction on how to set your credentials for code usage.")
72
+ print("--------------------------------------")
73
+ print(f"Current Client ID: {CLIENT_ID}")
74
+ exit(92)
75
+
76
+ # Validate WB_API_KEY for both newer and older formats
77
+ if API_KEY.startswith("wb_"):
78
+ if len(API_KEY) != 35:
79
+ print("Your API key is misformatted.")
80
+ print("--------------------------------------")
81
+ print("API keys starting with 'wb_' must be 35 characters long (including the 'wb_' prefix).")
82
+ print("--------------------------------------")
83
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
84
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
85
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
86
+ "for instruction on how to set your credentials for code usage.")
87
+ print("--------------------------------------")
88
+ print(f"Current API key: {API_KEY}")
89
+ exit(93)
90
+ elif len(API_KEY) != 32: # For early tokens
91
+ print("Your API key is misformatted.")
92
+ print("--------------------------------------")
93
+ print("API keys created in 2023 or earlier must be exactly 32 characters long.")
94
+ print("--------------------------------------")
95
+ print("You may refer to https://windbornesystems.com/docs/api/cli#introduction\n"
96
+ "for instructions on how to set your credentials as environment variables for CLI and Code usage\n\n"
97
+ "and to https://windbornesystems.com/docs/api/pip_data#introduction\n"
98
+ "for instruction on how to set your credentials for code usage.")
99
+ print("--------------------------------------")
100
+ print(f"Current API key: {API_KEY}")
101
+ exit(94)
102
+
103
+ signed_token = jwt.encode({
104
+ 'client_id': CLIENT_ID,
105
+ 'iat': int(time.time()),
106
+ }, API_KEY, algorithm='HS256')
107
+
108
+ try:
109
+ if params:
110
+ response = requests.get(url, auth=(CLIENT_ID, signed_token), params=params)
111
+ else:
112
+ response = requests.get(url, auth=(CLIENT_ID, signed_token))
113
+
114
+ response.raise_for_status()
115
+
116
+ if return_type is None:
117
+ # For Data API
118
+ return response.json()
119
+ elif return_type == 'all':
120
+ # For Forecasts API (except tcs) --> return whole response not .json to obtain S3 url
121
+ return response
122
+ except requests.exceptions.HTTPError as http_err:
123
+ if http_err.response.status_code == 403:
124
+ print("--------------------------------------")
125
+ print("We couldn't authenticate your request.")
126
+ print("--------------------------------------")
127
+ print("Please make sure you have properly set your WB_CLIENT_ID and WB_API_KEY.\n")
128
+ print("You can verify this by running\necho $WB_CLIENT_ID and echo $WB_API_KEY in your terminal.\n")
129
+ print("To get an API key, email data@windbornesystems.com.")
130
+ elif http_err.response.status_code in [404, 400]:
131
+ print("-------------------------------------------------------")
132
+ print("Our server couldn't find the information you requested.")
133
+ print("-------------------------------------------------------")
134
+ print(f"URL: {url}")
135
+ print(f"Error: {http_err.response.status_code}")
136
+ print("-------------------------------------------------------")
137
+ if params:
138
+ print("\nParameters provided:")
139
+ for key, value in params.items():
140
+ print(f" {key}: {value}")
141
+ else:
142
+ if 'missions/' in url:
143
+ mission_id = url.split('/missions/')[1].split('/')[0]
144
+ print(f"Mission ID provided: {mission_id}")
145
+ print(f"We couldn't find a mission with id: {mission_id}")
146
+ elif http_err.response.status_code == 502:
147
+ retries = 1
148
+ while response.status_code == 502 and retries < 5:
149
+ print("502 Bad Gateway, sleeping and retrying")
150
+ time.sleep(2**retries)
151
+ response = requests.get(url, auth=(CLIENT_ID, signed_token))
152
+ retries += 1
153
+ else:
154
+ print(f"HTTP error occurred\n\n{http_err}")
155
+ if params:
156
+ print("\nParameters provided:")
157
+ for key, value in params.items():
158
+ print(f" {key}: {value}")
159
+ exit(http_err.response.status_code)
160
+ except requests.exceptions.ConnectionError as conn_err:
161
+ print(f"Connection error occurred\n\n{conn_err}")
162
+ except requests.exceptions.Timeout as timeout_err:
163
+ print(f"Timeout error occurred\n\n{timeout_err}")
164
+ except requests.exceptions.RequestException as req_err:
165
+ print(f"An error occurred\n\n{req_err}")
166
+
167
+ # Supported date formats
168
+ # YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings
169
+ def to_unix_timestamp(date_string):
170
+ """
171
+ Converts a date string or integer to a UNIX timestamp.
172
+ Supports various date formats and handles future dates gracefully.
173
+
174
+ Args:
175
+ date_string (str | int | None): The date string to convert or an integer UNIX timestamp.
176
+
177
+ Returns:
178
+ int | None: The UNIX timestamp or None if the input is None.
179
+ """
180
+ if date_string is None:
181
+ return None
182
+ if isinstance(date_string, int):
183
+ return date_string # If it's already an integer, return as is
184
+ if isinstance(date_string, str):
185
+ # Supported date formats
186
+ formats = [
187
+ "%Y-%m-%d %H:%M:%S", # e.g., 2024-12-05 14:48:00
188
+ "%Y-%m-%d_%H:%M", # e.g., 2024-12-05_14:48
189
+ "%Y-%m-%dT%H:%M:%S.%fZ", # e.g., 2024-12-05T14:48:00.000Z
190
+ ]
191
+ current_time = datetime.now(timezone.utc)
192
+ for fmt in formats:
193
+ try:
194
+ dt = datetime.strptime(date_string, fmt).replace(tzinfo=timezone.utc)
195
+ if dt > current_time:
196
+ print(f"How would it be to live in {dt} ?\n")
197
+ print("Looks like you are coming from the future!\n")
198
+ exit(1111)
199
+ return int(dt.timestamp())
200
+ except ValueError:
201
+ continue
202
+
203
+ # If no formats match, raise an error
204
+ print("Invalid date format. Please use one of the supported formats:\n"
205
+ "- YYYY-MM-DD HH:MM:SS\n"
206
+ "- YYYY-MM-DD_HH:MM\n"
207
+ "- YYYY-MM-DDTHH:MM:SS.fffZ")
208
+ exit(11)
209
+
210
+ # Supported date format
211
+ # Compact format YYYYMMDDHH
212
+ def parse_time(time, init_time_flag=None):
213
+ """
214
+ Parse and validate initialization time with support for multiple formats.
215
+ Returns validated initialization time in ISO format, or None if invalid.
216
+ """
217
+ if time is None:
218
+ return None
219
+
220
+ try:
221
+ # Try parsing compact format first (YYYYMMDDHH)
222
+ if re.match(r'^\d{10}$', time):
223
+ try:
224
+ parsed_date = datetime.strptime(time, "%Y%m%d%H")
225
+ except (ValueError, OverflowError):
226
+ print(f"Invalid date values in: {time}")
227
+ print("Make sure your date values are valid")
228
+ exit(2)
229
+
230
+ if init_time_flag and parsed_date.hour not in [0, 6, 12, 18]:
231
+ print("Initialization time hour must be 00, 06, 12, or 18")
232
+ exit(2)
233
+ else:
234
+ try:
235
+ parsed_date = dateutil.parser.parse(time)
236
+ except (ValueError, OverflowError, TypeError):
237
+ print(f"Invalid date format: {time}\n")
238
+ print("Please use one of these formats:")
239
+ print(" - Compact: 'YYYYMMDDHH' (e.g., 2024073112)")
240
+ print(" - ISO: 'YYYY-MM-DDTHH' or 'YYYY-MM-DDTHH:00:00'")
241
+ print(" - Initialization time hour must be 00, 06, 12, or 18")
242
+ exit(2)
243
+
244
+ if parsed_date > datetime.now():
245
+ print(f"How would it be to live in {parsed_date} ?\n")
246
+ print("Looks like you are coming from the future!\n")
247
+ exit(1111)
248
+
249
+ return parsed_date.strftime('%Y-%m-%dT%H:00:00')
250
+
251
+ except Exception:
252
+ print(f"Invalid date format: {time}")
253
+ print("Please check your input format and try again")
254
+ exit(2)
255
+
256
+ # Save API response data to a file in either JSON or CSV format
257
+ def save_csv_json(save_to_file, response, csv_data_key=None):
258
+ """
259
+ Save Data API response data to a file in either JSON or CSV format.
260
+
261
+ Args:
262
+ save_to_file (str): The file path where the response will be saved.
263
+ response (dict or list): The response data to save.
264
+ csv_data_key (str, optional): Key to extract data for CSV. Defaults to None.
265
+ """
266
+ if '.' not in save_to_file:
267
+ print("You have to provide a file type for your filename.")
268
+ print("Supported formats:")
269
+ print(" - .csv")
270
+ print(" - .json")
271
+ exit(2)
272
+ elif not response:
273
+ print("There are no available data to save to file.")
274
+ exit(1)
275
+ elif save_to_file.lower().endswith('.json'):
276
+ with open(save_to_file, 'w', encoding='utf-8') as f:
277
+ json.dump(response, f, indent=4)
278
+ print("Saved to", save_to_file)
279
+ elif save_to_file.lower().endswith('.csv'):
280
+ # Extract data for CSV if a key is provided
281
+ data = response if not csv_data_key else response.get(csv_data_key, [])
282
+ if not data:
283
+ print("No data available to save to CSV.")
284
+ return
285
+ # Handle nested list case (for forecasts)
286
+ if isinstance(data, list) and data and isinstance(data[0], list):
287
+ data = data[0] # Take the first list from nested lists
288
+ # If data is a list of dictionaries, write each dictionary as a row
289
+ if isinstance(data, list) and all(isinstance(item, dict) for item in data):
290
+ headers = data[0].keys() if data else []
291
+ # If data is a dictionary, determine if it contains a list of dictionaries or is a flat dictionary
292
+ elif isinstance(data, dict):
293
+ # If the dictionary contains a list of dictionaries, use the keys of the first dictionary in the list as headers
294
+ for key, value in data.items():
295
+ if isinstance(value, list) and all(isinstance(item, dict) for item in value):
296
+ headers = value[0].keys() if value else []
297
+ data = value
298
+ break
299
+ else:
300
+ # If no lists of dictionaries are found, use the keys of the dictionary as headers
301
+ headers = data.keys()
302
+ data = [data]
303
+ else:
304
+ print("Unsupported data format for CSV.")
305
+ exit(5)
306
+
307
+ # Write data to CSV
308
+ with open(save_to_file, mode='w', newline='', encoding='utf-8') as file:
309
+ writer = csv.DictWriter(file, fieldnames=headers)
310
+ writer.writeheader()
311
+ for row in data:
312
+ # If no value available write 'None'
313
+ row_data = {k: 'None' if v is None or v == '' else v for k, v in row.items()}
314
+ writer.writerow(row_data)
315
+ print("Saved to", save_to_file)
316
+ else:
317
+ print("Unsupported file format. Please use either .json or .csv.")
318
+ exit(4)
319
+
320
+ def convert_to_netcdf(data, curtime, output_filename=None):
321
+ # This module outputs data in netcdf format for the WMO ISARRA program. The output format is netcdf
322
+ # and the style (variable names, file names, etc.) are described here:
323
+ # https://github.com/synoptic/wmo-uasdc/tree/main/raw_uas_to_netCDF
324
+
325
+ # Import necessary libraries
326
+ import xarray as xr
327
+ import pandas as pd
328
+ import numpy as np
329
+
330
+ # Mapping of WindBorne names to ISARRA names
331
+ rename_dict = {
332
+ 'latitude': 'lat',
333
+ 'longitude': 'lon',
334
+ 'altitude': 'altitude',
335
+ 'temperature': 'air_temperature',
336
+ 'wind_direction': 'wind_direction',
337
+ 'wind_speed': 'wind_speed',
338
+ 'pressure': 'air_pressure',
339
+ 'humidity_mixing_ratio': 'humidity_mixing_ratio',
340
+ 'index': 'obs',
341
+ }
342
+
343
+ # Convert dictionary to list for DataFrame
344
+ data_list = []
345
+ for obs_id, obs_data in data.items():
346
+ # Convert 'None' strings to None type
347
+ clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
348
+ data_list.append(clean_data)
349
+
350
+ # Put the data in a panda dataframe in order to easily push to xarray then netcdf output
351
+ df = pd.DataFrame(data_list)
352
+
353
+ # Convert numeric columns to float
354
+ numeric_columns = ['latitude', 'longitude', 'altitude', 'pressure', 'temperature',
355
+ 'speed_u', 'speed_v', 'specific_humidity', 'timestamp']
356
+ for col in numeric_columns:
357
+ if col in df.columns:
358
+ df[col] = pd.to_numeric(df[col], errors='coerce')
359
+
360
+ ds = xr.Dataset.from_dataframe(df)
361
+
362
+ # Build the filename and save some variables for use later
363
+ mt = datetime.fromtimestamp(curtime, tz=timezone.utc)
364
+ # Handle dropsondes
365
+ mission_name = str(df['mission_name'].iloc[0]) if (not df.empty and not pd.isna(df['mission_name'].iloc[0])) else ' '
366
+
367
+ if output_filename:
368
+ output_file = output_filename
369
+ else:
370
+ output_file = f"WindBorne_{mission_name}_{mt.year:04d}-{mt.month:02d}-{mt.day:02d}_{mt.hour:02d}.nc"
371
+
372
+ # Derived quantities calculated here:
373
+
374
+ # convert from specific humidity to humidity_mixing_ratio
375
+ mg_to_kg = 1000000.
376
+ if not all(x is None for x in ds['specific_humidity'].data):
377
+ ds['humidity_mixing_ratio'] = (ds['specific_humidity'] / mg_to_kg) / (1 - (ds['specific_humidity'] / mg_to_kg))
378
+ else:
379
+ ds['humidity_mixing_ratio'] = ds['specific_humidity']
380
+
381
+ # Wind speed and direction from components
382
+ ds['wind_speed'] = np.sqrt(ds['speed_u']*ds['speed_u'] + ds['speed_v']*ds['speed_v'])
383
+ ds['wind_direction'] = np.mod(180 + (180 / np.pi) * np.arctan2(ds['speed_u'], ds['speed_v']), 360)
384
+
385
+ ds['time'] = ds['timestamp'].astype(float)
386
+ ds = ds.assign_coords(time=("time", ds['time'].data))
387
+
388
+ # Now that calculations are done, remove variables not needed in the netcdf output
389
+ variables_to_drop = ['humidity', 'speed_u', 'speed_v', 'speed_x', 'speed_y', 'specific_humidity',
390
+ 'timestamp', 'mission_name']
391
+ existing_vars = [var for var in variables_to_drop if var in ds]
392
+ ds = ds.drop_vars(existing_vars)
393
+
394
+ # Rename the variables
395
+ ds = ds.rename(rename_dict)
396
+
397
+ # Adding attributes to variables in the xarray dataset
398
+ ds['time'].attrs = {'units': 'seconds since 1970-01-01T00:00:00', 'long_name': 'Time', '_FillValue': float('nan'),
399
+ 'processing_level': ''}
400
+ ds['lat'].attrs = {'units': 'degrees_north', 'long_name': 'Latitude', '_FillValue': float('nan'),
401
+ 'processing_level': ''}
402
+ ds['lon'].attrs = {'units': 'degrees_east', 'long_name': 'Longitude', '_FillValue': float('nan'),
403
+ 'processing_level': ''}
404
+ ds['altitude'].attrs = {'units': 'meters_above_sea_level', 'long_name': 'Altitude', '_FillValue': float('nan'),
405
+ 'processing_level': ''}
406
+ ds['air_temperature'].attrs = {'units': 'Kelvin', 'long_name': 'Air Temperature', '_FillValue': float('nan'),
407
+ 'processing_level': ''}
408
+ ds['wind_speed'].attrs = {'units': 'm/s', 'long_name': 'Wind Speed', '_FillValue': float('nan'),
409
+ 'processing_level': ''}
410
+ ds['wind_direction'].attrs = {'units': 'degrees', 'long_name': 'Wind Direction', '_FillValue': float('nan'),
411
+ 'processing_level': ''}
412
+ ds['humidity_mixing_ratio'].attrs = {'units': 'kg/kg', 'long_name': 'Humidity Mixing Ratio',
413
+ '_FillValue': float('nan'), 'processing_level': ''}
414
+ ds['air_pressure'].attrs = {'units': 'Pa', 'long_name': 'Atmospheric Pressure', '_FillValue': float('nan'),
415
+ 'processing_level': ''}
416
+
417
+ # Add Global Attributes synonymous across all UASDC providers
418
+ ds.attrs['Conventions'] = "CF-1.8, WMO-CF-1.0"
419
+ ds.attrs['wmo__cf_profile'] = "FM 303-2024"
420
+ ds.attrs['featureType'] = "trajectory"
421
+
422
+ # Add Global Attributes unique to Provider
423
+ ds.attrs['platform_name'] = "WindBorne Global Sounding Balloon"
424
+ ds.attrs['flight_id'] = mission_name
425
+ ds.attrs['site_terrain_elevation_height'] = 'not applicable'
426
+ ds.attrs['processing_level'] = "b1"
427
+ ds.to_netcdf(output_file)
428
+
429
+ def format_value(value, fortran_format, align=None):
430
+ if fortran_format[0] == 'F':
431
+ length, decimal_places = fortran_format[1:].split('.')
432
+ if value is None or value == '':
433
+ return ' ' * int(length)
434
+
435
+ # turn into a string of length characters, with decimal_places decimal places
436
+ return f"{value:>{length}.{decimal_places}f}"[:int(length)]
437
+
438
+ if fortran_format[0] == 'I':
439
+ length = int(fortran_format[1:])
440
+ if value is None or value == '':
441
+ return ' ' * length
442
+
443
+ return f"{value:>{length}d}"[:int(length)]
444
+
445
+ if fortran_format[0] == 'A':
446
+ length = int(fortran_format[1:])
447
+ if value is None:
448
+ return ' ' * length
449
+
450
+ if align == 'right':
451
+ return str(value)[:length].rjust(length, ' ')
452
+
453
+ return str(value)[:length].ljust(length, ' ')
454
+
455
+ if fortran_format[0] == 'L':
456
+ if value and value in ['T', 't', 'True', 'true', '1', True]:
457
+ value = 'T'
458
+ else:
459
+ value = 'F'
460
+
461
+ length = int(fortran_format[1:])
462
+
463
+ return value.rjust(length, ' ')
464
+
465
+ raise ValueError(f"Unknown format: {fortran_format}")
466
+
467
+ def safe_float(value, default=-888888.0):
468
+ """
469
+ Convert a value to float. If the value is None, empty, or invalid, return the default.
470
+ """
471
+ try:
472
+ return float(value) if value not in (None, '', 'None') else default
473
+ except (ValueError, TypeError):
474
+ return default
475
+
476
+ def format_little_r(observations):
477
+ """
478
+ Convert observations to Little_R format.
479
+
480
+ Args:
481
+ observations (list): List of observation dictionaries
482
+
483
+ Returns:
484
+ list: Formatted Little_R records
485
+ """
486
+ little_r_records = []
487
+
488
+ for obs_id, point in observations:
489
+ # Observation time
490
+ observation_time = datetime.fromtimestamp(point['timestamp'], tz=timezone.utc)
491
+
492
+ # Convert and validate fields
493
+ pressure_hpa = safe_float(point.get('pressure'))
494
+ pressure_pa = pressure_hpa * 100.0
495
+
496
+ temperature_c = safe_float(point.get('temperature'))
497
+ temperature_k = temperature_c + 273.15
498
+
499
+ altitude = safe_float(point.get('altitude'))
500
+ humidity = safe_float(point.get('humidity'))
501
+ speed_u = safe_float(point.get('speed_u'))
502
+ speed_v = safe_float(point.get('speed_v'))
503
+
504
+ # Header formatting
505
+ header = ''.join([
506
+ # Latitude: F20.5
507
+ format_value(point.get('latitude'), 'F20.5'),
508
+
509
+ # Longitude: F20.5
510
+ format_value(point.get('longitude'), 'F20.5'),
511
+
512
+ # ID: A40
513
+ format_value(point.get('id'), 'A40'),
514
+
515
+ # Name: A40
516
+ format_value(point.get('mission_name'), 'A40'),
517
+
518
+ # Platform (FM‑Code): A40
519
+ format_value('FM-35 TEMP', 'A40'),
520
+
521
+ # Source: A40
522
+ format_value('WindBorne', 'A40'),
523
+
524
+ # Elevation: F20.5
525
+ format_value('', 'F20.5'),
526
+
527
+ # Valid fields: I10
528
+ format_value(-888888, 'I10'),
529
+
530
+ # Num. errors: I10
531
+ format_value(0, 'I10'),
532
+
533
+ # Num. warnings: I10
534
+ format_value(0, 'I10'),
535
+
536
+ # Sequence number: I10
537
+ format_value(0, 'I10'),
538
+
539
+ # Num. duplicates: I10
540
+ format_value(0, 'I10'),
541
+
542
+ # Is sounding?: L
543
+ format_value('T', 'L10'),
544
+
545
+ # Is bogus?: L
546
+ format_value('F', 'L10'),
547
+
548
+ # Discard?: L
549
+ format_value('F', 'L10'),
550
+
551
+ # Unix time: I10
552
+ # format_value(point['timestamp'], 'I10'),
553
+ format_value(-888888, 'I10'),
554
+
555
+ # Julian day: I10
556
+ format_value(-888888, 'I10'),
557
+
558
+ # Date: A20 YYYYMMDDhhmmss
559
+ format_value(observation_time.strftime('%Y%m%d%H%M%S'), 'A20', align='right'),
560
+
561
+ # SLP, QC: F13.5, I7
562
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
563
+
564
+ # Ref Pressure, QC: F13.5, I7
565
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
566
+
567
+ # Ground Temp, QC: F13.5, I7
568
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
569
+
570
+ # SST, QC: F13.5, I7
571
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
572
+
573
+ # SFC Pressure, QC: F13.5, I7
574
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
575
+
576
+ # Precip, QC: F13.5, I7
577
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
578
+
579
+ # Daily Max T, QC: F13.5, I7
580
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
581
+
582
+ # Daily Min T, QC: F13.5, I7
583
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
584
+
585
+ # Night Min T, QC: F13.5, I7
586
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
587
+
588
+ # 3hr Pres Change, QC: F13.5, I7
589
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
590
+
591
+ # 24hr Pres Change, QC: F13.5, I7
592
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
593
+
594
+ # Cloud cover, QC: F13.5, I7
595
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
596
+
597
+ # Ceiling, QC: F13.5, I7
598
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
599
+
600
+ # Precipitable water, QC (see note): F13.5, I7
601
+ format_value(-888888.0, 'F13.5') + format_value(0, 'I7'),
602
+ ])
603
+
604
+ # Data record formatting
605
+ data_record = ''.join([
606
+ # Pressure (Pa): F13.5
607
+ format_value(pressure_pa, 'F13.5'),
608
+
609
+ # QC: I7
610
+ format_value(0, 'I7'),
611
+
612
+ # Height (m): F13.5
613
+ format_value(altitude, 'F13.5'),
614
+
615
+ # QC: I7
616
+ format_value(0, 'I7'),
617
+
618
+ # Temperature (K): F13.5
619
+ format_value(temperature_k, 'F13.5'),
620
+
621
+ # QC: I7
622
+ format_value(0, 'I7'),
623
+
624
+ # Dew point (K): F13.5
625
+ format_value(-888888.0, 'F13.5'),
626
+
627
+ # QC: I7
628
+ format_value(0, 'I7'),
629
+
630
+ # Wind speed (m/s): F13.5
631
+ format_value(-888888.0, 'F13.5'),
632
+
633
+ # QC: I7
634
+ format_value(0, 'I7'),
635
+
636
+ # Wind direction (deg): F13.5
637
+ format_value(-888888.0, 'F13.5'),
638
+
639
+ # QC: I7
640
+ format_value(0, 'I7'),
641
+
642
+ # Wind U (m/s): F13.5
643
+ format_value(speed_u, 'F13.5'),
644
+
645
+ # QC: I7
646
+ format_value(0, 'I7'),
647
+
648
+ # Wind V (m/s): F13.5
649
+ format_value(speed_v, 'F13.5'),
650
+
651
+ # QC: I7
652
+ format_value(0, 'I7'),
653
+
654
+ # Relative humidity (%): F13.5
655
+ format_value(humidity, 'F13.5'),
656
+
657
+ # QC: I7
658
+ format_value(0, 'I7'),
659
+
660
+ # Thickness (m): F13.5
661
+ format_value(-888888.0, 'F13.5'),
662
+
663
+ # QC: I7
664
+ format_value(0, 'I7')
665
+ ])
666
+
667
+ # End record and tail record
668
+ end_record = '-777777.00000 0-777777.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0-888888.00000 0'
669
+ tail_record = ' 39 0 0'
670
+
671
+ # Combine into a complete record
672
+ complete_record = '\n'.join([header, data_record, end_record, tail_record, ''])
673
+ little_r_records.append(complete_record)
674
+
675
+ return little_r_records
676
+
677
+ # Download and save a file in .npy upon provided an S3 link
678
+ def download_and_save_npy(save_to_file, response):
679
+ """
680
+ Downloads data from a presigned S3 url contained in a response and saves it as a .npy file.
681
+
682
+ Args:
683
+ save_to_file (str): Path where to save the .npy file
684
+ response (str): Response that contains the S3 url to download the data from
685
+
686
+ Returns:
687
+ bool: True if successful, False otherwise
688
+ """
689
+ try:
690
+ # Download the file
691
+ print(f"Downloading data")
692
+ # Load the data into memory
693
+ data = np.load(io.BytesIO(response.content))
694
+
695
+ # Save the data
696
+ np.save(save_to_file, data)
697
+ print(f"Data Successfully saved to {save_to_file}")
698
+ return True
699
+
700
+ except requests.exceptions.RequestException as e:
701
+ print(f"Error downloading the file: {e}")
702
+ return False
703
+ except Exception as e:
704
+ print(f"Error processing the file: {e}")
705
+ return False
706
+
707
+ # Download and save a file in .nc upon provided an S3 link
708
+ def download_and_save_nc(save_to_file, response):
709
+ """
710
+ Downloads data from a presigned S3 url contained in a response and saves it as a .nc file.
711
+
712
+ Args:
713
+ save_to_file (str): Path where to save the .nc file
714
+ response (str): Response that contains the S3 url to download the data from
715
+
716
+ Returns:
717
+ bool: True if successful, False otherwise
718
+ """
719
+
720
+ # Add .nc extension if not present
721
+ if not save_to_file.endswith('.nc'):
722
+ save_to_file = save_to_file + '.nc'
723
+
724
+ try:
725
+ # Save the content directly to file
726
+ with open(save_to_file, 'wb') as f:
727
+ f.write(response.content)
728
+ print(f"Data Successfully saved to {save_to_file}")
729
+ return True
730
+
731
+ except requests.exceptions.RequestException as e:
732
+ print(f"Error downloading the file: {e}")
733
+ return False
734
+ except Exception as e:
735
+ print(f"Error processing the file: {e}")
736
+ return False
737
+
738
+ def save_as_geojson(filename, cyclone_data):
739
+ """Convert and save cyclone data as GeoJSON, handling meridian crossing."""
740
+ features = []
741
+ for cyclone_id, tracks in cyclone_data.items():
742
+ # Initialize lists to store line segments
743
+ line_segments = []
744
+ current_segment = []
745
+
746
+ for i in range(len(tracks)):
747
+ lon = float(tracks[i]['longitude'])
748
+ lat = float(tracks[i]['latitude'])
749
+
750
+ if not current_segment:
751
+ current_segment.append([lon, lat])
752
+ continue
753
+
754
+ prev_lon = current_segment[-1][0]
755
+
756
+ # Check if we've crossed the meridian (large longitude jump)
757
+ if abs(lon - prev_lon) > 180:
758
+ # If previous longitude was positive and current is negative
759
+ if prev_lon > 0 and lon < 0:
760
+ # Add point at 180° with same latitude
761
+ current_segment.append([180, lat])
762
+ line_segments.append(current_segment)
763
+ # Start new segment at -180°
764
+ current_segment = [[-180, lat], [lon, lat]]
765
+ # If previous longitude was negative and current is positive
766
+ elif prev_lon < 0 and lon > 0:
767
+ # Add point at -180° with same latitude
768
+ current_segment.append([-180, lat])
769
+ line_segments.append(current_segment)
770
+ # Start new segment at 180°
771
+ current_segment = [[180, lat], [lon, lat]]
772
+ else:
773
+ current_segment.append([lon, lat])
774
+
775
+ # Add the last segment if it's not empty
776
+ if current_segment:
777
+ line_segments.append(current_segment)
778
+
779
+ # Create a MultiLineString feature with all segments
780
+ feature = {
781
+ "type": "Feature",
782
+ "properties": {
783
+ "cyclone_id": cyclone_id,
784
+ "start_time": tracks[0]['time'],
785
+ "end_time": tracks[-1]['time']
786
+ },
787
+ "geometry": {
788
+ "type": "MultiLineString",
789
+ "coordinates": line_segments
790
+ }
791
+ }
792
+ features.append(feature)
793
+
794
+ geojson = {
795
+ "type": "FeatureCollection",
796
+ "features": features
797
+ }
798
+
799
+ with open(filename, 'w', encoding='utf-8') as f:
800
+ json.dump(geojson, f, indent=4)
801
+ print("Saved to", filename)
802
+
803
+ def save_as_gpx(filename, cyclone_data):
804
+ """Convert and save cyclone data as GPX, handling meridian crossing."""
805
+ gpx = '<?xml version="1.0" encoding="UTF-8"?>\n'
806
+ gpx += '<gpx version="1.1" creator="Windborne" xmlns="http://www.topografix.com/GPX/1/1">\n'
807
+
808
+ for cyclone_id, tracks in cyclone_data.items():
809
+ gpx += f' <trk>\n <name>{cyclone_id}</name>\n'
810
+
811
+ current_segment = []
812
+ segment_count = 1
813
+
814
+ for i in range(len(tracks)):
815
+ lon = float(tracks[i]['longitude'])
816
+ lat = float(tracks[i]['latitude'])
817
+
818
+ if not current_segment:
819
+ current_segment.append(tracks[i])
820
+ continue
821
+
822
+ prev_lon = float(current_segment[-1]['longitude'])
823
+
824
+ # Check if we've crossed the meridian
825
+ if abs(lon - prev_lon) > 180:
826
+ # Write the current segment
827
+ gpx += ' <trkseg>\n'
828
+ for point in current_segment:
829
+ gpx += f' <trkpt lat="{point["latitude"]}" lon="{point["longitude"]}">\n'
830
+ gpx += f' <time>{point["time"]}</time>\n'
831
+ gpx += ' </trkpt>\n'
832
+ gpx += ' </trkseg>\n'
833
+
834
+ # Start new segment
835
+ current_segment = [tracks[i]]
836
+ segment_count += 1
837
+ else:
838
+ current_segment.append(tracks[i])
839
+
840
+ # Write the last segment if it's not empty
841
+ if current_segment:
842
+ gpx += ' <trkseg>\n'
843
+ for point in current_segment:
844
+ gpx += f' <trkpt lat="{point["latitude"]}" lon="{point["longitude"]}">\n'
845
+ gpx += f' <time>{point["time"]}</time>\n'
846
+ gpx += ' </trkpt>\n'
847
+ gpx += ' </trkseg>\n'
848
+
849
+ gpx += ' </trk>\n'
850
+
851
+ gpx += '</gpx>'
852
+
853
+ with open(filename, 'w', encoding='utf-8') as f:
854
+ f.write(gpx)
855
+ print(f"Saved to {filename}")
856
+
857
+ def save_as_kml(filename, cyclone_data):
858
+ """Convert and save cyclone data as KML, handling meridian crossing."""
859
+ kml = '<?xml version="1.0" encoding="UTF-8"?>\n'
860
+ kml += '<kml xmlns="http://www.opengis.net/kml/2.2">\n<Document>\n'
861
+
862
+ for cyclone_id, tracks in cyclone_data.items():
863
+ kml += f' <Placemark>\n <name>{cyclone_id}</name>\n <MultiGeometry>\n'
864
+
865
+ current_segment = []
866
+
867
+ for i in range(len(tracks)):
868
+ lon = float(tracks[i]['longitude'])
869
+ lat = float(tracks[i]['latitude'])
870
+
871
+ if not current_segment:
872
+ current_segment.append(tracks[i])
873
+ continue
874
+
875
+ prev_lon = float(current_segment[-1]['longitude'])
876
+
877
+ # Check if we've crossed the meridian
878
+ if abs(lon - prev_lon) > 180:
879
+ # Write the current segment
880
+ kml += ' <LineString>\n <coordinates>\n'
881
+ coordinates = [f' {track["longitude"]},{track["latitude"]},{0}'
882
+ for track in current_segment]
883
+ kml += '\n'.join(coordinates)
884
+ kml += '\n </coordinates>\n </LineString>\n'
885
+
886
+ # Start new segment
887
+ current_segment = [tracks[i]]
888
+ else:
889
+ current_segment.append(tracks[i])
890
+
891
+ # Write the last segment if it's not empty
892
+ if current_segment:
893
+ kml += ' <LineString>\n <coordinates>\n'
894
+ coordinates = [f' {track["longitude"]},{track["latitude"]},{0}'
895
+ for track in current_segment]
896
+ kml += '\n'.join(coordinates)
897
+ kml += '\n </coordinates>\n </LineString>\n'
898
+
899
+ kml += ' </MultiGeometry>\n </Placemark>\n'
900
+
901
+ kml += '</Document>\n</kml>'
902
+
903
+ with open(filename, 'w', encoding='utf-8') as f:
904
+ f.write(kml)
905
+ print(f"Saved to {filename}")
906
+
907
+ def save_as_little_r(filename, cyclone_data):
908
+ """Convert and save cyclone data in little_R format."""
909
+ with open(filename, 'w', encoding='utf-8') as f:
910
+ for cyclone_id, tracks in cyclone_data.items():
911
+ for track in tracks:
912
+ # Parse the time
913
+ dt = datetime.fromisoformat(track['time'].replace('Z', '+00:00'))
914
+
915
+ # Header line 1
916
+ header1 = f"{float(track['latitude']):20.5f}{float(track['longitude']):20.5f}{'HMS':40}"
917
+ header1 += f"{0:10d}{0:10d}{0:10d}" # Station ID numbers
918
+ header1 += f"{dt.year:10d}{dt.month:10d}{dt.day:10d}{dt.hour:10d}{0:10d}"
919
+ header1 += f"{0:10d}{0:10.3f}{cyclone_id:40}"
920
+ f.write(header1 + '\n')
921
+
922
+ # Header line 2
923
+ header2 = f"{0:20.5f}{1:10d}{0:10.3f}"
924
+ f.write(header2 + '\n')
925
+
926
+ # Data line format: p, z, t, d, s, d (pressure, height, temp, dewpoint, speed, direction)
927
+ # We'll only include position data
928
+ data_line = f"{-888888.0:13.5f}{float(track['latitude']):13.5f}{-888888.0:13.5f}"
929
+ data_line += f"{-888888.0:13.5f}{-888888.0:13.5f}{float(track['longitude']):13.5f}"
930
+ data_line += f"{0:7d}" # End of data line marker
931
+ f.write(data_line + '\n')
932
+
933
+ # End of record line
934
+ f.write(f"{-777777.0:13.5f}\n")
935
+
936
+ print("Saved to", filename)
937
+
938
+ def sync_to_s3(data, bucket_name, object_name):
939
+ s3 = boto3.client("s3")
940
+ s3.put_object(Body=str(data), Bucket=bucket_name, Key=object_name)