windborne 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windborne/data_api.py ADDED
@@ -0,0 +1,784 @@
1
+ from .config import DATA_API_BASE_URL, LAUNCH_SITES
2
+ from .utils import make_api_request, to_unix_timestamp, save_csv_json, format_little_r, convert_to_netcdf
3
+
4
+ import time
5
+ import os
6
+ from math import floor
7
+ from datetime import datetime, timezone, timedelta
8
+ import csv
9
+ import json
10
+
11
+ def get_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
12
+ """
13
+ Retrieves observations based on specified filters including geographical bounds.
14
+
15
+ Args:
16
+ since (str): Filter observations after this timestamp.
17
+
18
+ min_time (str): Minimum timestamp for observations.
19
+ max_time (str): Maximum timestamp for observations.
20
+ include_ids (bool): Include observation IDs in response.
21
+ include_mission_name (bool): Include mission names in response.
22
+ include_updated_at (bool): Include update timestamps in response.
23
+ mission_id (str): Filter observations by mission ID.
24
+ min_latitude (float): Minimum latitude boundary.
25
+ max_latitude (float): Maximum latitude boundary.
26
+ min_longitude (float): Minimum longitude boundary.
27
+ max_longitude (float): Maximum longitude boundary.
28
+
29
+ save_to_file (str): Optional path to save the response data.
30
+ If provided, saves the data in CSV format.
31
+
32
+ Returns:
33
+ dict: The API response containing filtered observations.
34
+ """
35
+
36
+ url = f"{DATA_API_BASE_URL}/observations.json"
37
+
38
+ # Convert date strings to Unix timestamps
39
+ params = {}
40
+ if since:
41
+ params["since"] = to_unix_timestamp(since)
42
+ if min_time:
43
+ params["min_time"] = to_unix_timestamp(min_time)
44
+ if max_time:
45
+ params["max_time"] = to_unix_timestamp(min_time)
46
+ if mission_id:
47
+ params["mission_id"] = mission_id
48
+ if min_latitude:
49
+ params["min_latitude"] = min_latitude
50
+ if max_latitude:
51
+ params["max_latitude"] = max_latitude
52
+ if min_longitude:
53
+ params["min_longitude"] = min_longitude
54
+ if max_longitude:
55
+ params["max_longitude"] = max_longitude
56
+ if include_ids:
57
+ params["include_ids"] = True
58
+ if include_mission_name:
59
+ params["include_mission_name"] = True
60
+ if include_updated_at:
61
+ params["include_updated_at"] = True
62
+
63
+ # Remove any keys where the value is None to avoid sending unnecessary parameters
64
+ params = {k: v for k, v in params.items() if v is not None}
65
+
66
+ response = make_api_request(url, params=params)
67
+
68
+ if save_to_file:
69
+ save_csv_json(save_to_file, response, csv_data_key='observations')
70
+
71
+ return response
72
+
73
+ def get_super_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
74
+ """
75
+ Retrieves super observations based on specified filters.
76
+
77
+ Args:
78
+ since (str): Filter observations after this timestamp.
79
+ min_time (str): Minimum timestamp for observations.
80
+ max_time (str): Maximum timestamp for observations.
81
+ include_ids (bool): Include observation IDs in response.
82
+ include_mission_name (bool): Include mission names in response.
83
+ include_updated_at (bool): Include update timestamps in response.
84
+ mission_id (str): Filter observations by mission ID.
85
+ save_to_file (str): Optional path to save the response data.
86
+ If provided, saves the data in CSV format.
87
+
88
+ Returns:
89
+ dict: The API response containing filtered super observations.
90
+ """
91
+
92
+ url = f"{DATA_API_BASE_URL}/super_observations.json"
93
+
94
+ params = {}
95
+ if since:
96
+ params["since"] = to_unix_timestamp(since)
97
+ if min_time:
98
+ params["min_time"] = to_unix_timestamp(min_time)
99
+ if max_time:
100
+ params["max_time"] = to_unix_timestamp(max_time)
101
+ if mission_id:
102
+ params["mission_id"] = mission_id
103
+ if include_ids:
104
+ params["include_ids"] = True
105
+ if include_mission_name:
106
+ params["include_mission_name"] = True
107
+ if include_updated_at:
108
+ params["include_updated_at"] = True
109
+
110
+ params = {k: v for k, v in params.items() if v is not None}
111
+
112
+ response = make_api_request(url, params=params)
113
+ if save_to_file:
114
+ save_csv_json(save_to_file, response, csv_data_key='observations')
115
+
116
+ return response
117
+
118
+ def poll_observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
119
+ """
120
+ Fetches observations between a start time and an optional end time and saves to files in specified format.
121
+ Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
122
+ For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
123
+
124
+ Args:
125
+ start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
126
+ representing the starting time of fetching data.
127
+ end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
128
+ representing the end time of fetching data. If not provided, current time is used as end time.
129
+
130
+ include_ids (bool): Include observation IDs in response.
131
+ include_updated_at (bool): Include update timestamps in response.
132
+ mission_id (str): Filter observations by mission ID.
133
+ min_latitude (float): Minimum latitude boundary.
134
+ max_latitude (float): Maximum latitude boundary.
135
+ min_longitude (float): Minimum longitude boundary.
136
+ max_longitude (float): Maximum longitude boundary.
137
+
138
+ interval (int): Optional. Interval in seconds between polls when a empty page is received (default: 60)
139
+ save_to_file (str): Saves all data to a single file instead of bucketing.
140
+ Supported formats are '.csv', '.json', '.little_r' and '.nc'
141
+ bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
142
+ output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
143
+ callback (callable): Optional callback function that receives (super observations, metadata) before saving.
144
+ This allows custom processing or saving in custom formats.
145
+ """
146
+
147
+ start_time = to_unix_timestamp(start_time)
148
+
149
+ if end_time:
150
+ end_time = to_unix_timestamp(end_time)
151
+ else:
152
+ end_time = int(datetime.now().timestamp())
153
+
154
+ # Supported formats for saving into separate files:
155
+ # - csv (default)
156
+ # - little_r
157
+ # - json
158
+ # - netcdf
159
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
160
+ print("Please use one of the following formats:")
161
+ print(" - json")
162
+ print(" - csv")
163
+ print(" - little_r")
164
+ print(" - netcdf")
165
+ return
166
+
167
+ # Supported formats for saving into a single file:
168
+ # NOTE: for poll_observations we handle .csv saving within poll_observations and not using save_csv_json
169
+ # - .csv
170
+ # - .json
171
+ # - .little_r
172
+ # - .nc
173
+ if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
174
+ print("Please use one of the following formats:")
175
+ print(" - .json")
176
+ print(" - .csv")
177
+ print(" - .little_r")
178
+ print(" - .nc")
179
+ return
180
+
181
+ # Convert start_time to datetime
182
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
183
+
184
+ # Calculate first center time that's after start_time
185
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
186
+ bucket_number = hours_since_day_start // bucket_hours
187
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
188
+
189
+
190
+ # Headers for CSV files
191
+ headers = [
192
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
193
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
194
+ ]
195
+
196
+ if save_to_file:
197
+ all_observations = {}
198
+ else:
199
+ buckets = {}
200
+
201
+ # Initialize the polling loop
202
+ current_timestamp = start_time
203
+ has_next_page = True
204
+
205
+
206
+ while has_next_page:
207
+ try:
208
+ # Fetch observations
209
+ observations_page = get_observations(
210
+ since=current_timestamp,
211
+ min_latitude=min_latitude,
212
+ max_latitude=max_latitude,
213
+ min_longitude=min_longitude,
214
+ max_longitude=max_longitude,
215
+ include_updated_at=include_updated_at,
216
+ mission_id=mission_id,
217
+ include_ids=include_ids,
218
+ include_mission_name=True
219
+ )
220
+
221
+ if observations_page is None:
222
+ print("\n----------------------------------------------------------------------")
223
+ print(f"Received null response from API. Retrying in {interval} seconds ...")
224
+ print("----------------------------------------------------------------------")
225
+ time.sleep(interval)
226
+ continue
227
+
228
+ observations = observations_page.get('observations', [])
229
+ print(f"Fetched {len(observations)} observation(s)")
230
+
231
+ # Invoke the callback with fetched observations
232
+ if callback:
233
+ print("/nCallback/n")
234
+ callback(observations)
235
+
236
+ for obs in observations:
237
+ if 'mission_name' not in obs:
238
+ print("Warning: got an observation without a mission name")
239
+ continue
240
+
241
+ timestamp = obs.get('timestamp')
242
+ if not timestamp:
243
+ continue
244
+
245
+ try:
246
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
247
+ except (OSError, ValueError, TypeError, OverflowError):
248
+ continue
249
+
250
+ mission_name = obs.get('mission_name', 'Unknown')
251
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
252
+
253
+ processed_obs = {}
254
+ for header in headers:
255
+ value = obs.get(header)
256
+ if value is None or value == '' or (isinstance(value, str) and not value.strip()):
257
+ processed_obs[header] = 'None'
258
+ else:
259
+ processed_obs[header] = value
260
+
261
+ obs_id = f"{timestamp}_{mission_name}"
262
+
263
+ if save_to_file:
264
+ all_observations[obs_id] = processed_obs
265
+ else:
266
+ if obs_time >= start_dt: # Only process observations after start time
267
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
268
+ bucket_index = floor(hours_diff / bucket_hours)
269
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
270
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
271
+
272
+ if obs_time <= bucket_end: # Include observations up to the end of the bucket
273
+ bucket_key = (bucket_center, mission_name)
274
+ if bucket_key not in buckets:
275
+ buckets[bucket_key] = {}
276
+ buckets[bucket_key][obs_id] = processed_obs
277
+
278
+ # Update pagination
279
+ next_timestamp = observations_page.get('next_since')
280
+ has_next_page = observations_page.get('has_next_page', False)
281
+
282
+ if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
283
+ print("-----------------------------------------------------\n")
284
+ print("No more pages available or reached end of time range.")
285
+ print("\n-----------------------------------------------------")
286
+ break
287
+
288
+ current_timestamp = next_timestamp
289
+
290
+ except Exception as e:
291
+ print(f"Error occurred: {e}")
292
+ exit(1001)
293
+
294
+ # Save data to a single file
295
+ if save_to_file:
296
+ filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
297
+ if float(obs['timestamp']) >= start_time}
298
+ # Sort by timestamp
299
+ sorted_observations = dict(sorted(filtered_observations.items(),
300
+ key=lambda x: float(x[1]['timestamp'])))
301
+
302
+ if save_to_file.endswith('.nc'):
303
+ first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
304
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
305
+ elif save_to_file.endswith('.json'):
306
+ with open(save_to_file, 'w', encoding='utf-8') as f:
307
+ json.dump(sorted_observations, f, indent=4)
308
+
309
+ elif save_to_file.endswith('.csv'):
310
+ with open(save_to_file, mode='w', newline='') as file:
311
+ writer = csv.DictWriter(file, fieldnames=headers)
312
+ writer.writeheader()
313
+ writer.writerows(sorted_observations.values())
314
+
315
+ elif save_to_file.endswith('.little_r'):
316
+ little_r_records = format_little_r(list(sorted_observations.items()))
317
+ with open(save_to_file, 'w') as file:
318
+ file.write('\n'.join(little_r_records))
319
+
320
+ print(f"Saved {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
321
+
322
+ # Save data to multiple file
323
+ elif output_format:
324
+ # Track statistics per mission
325
+ mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
326
+ total_observations_written = 0
327
+
328
+ # Save bucketed data
329
+ for (bucket_center, mission_name), observations in buckets.items():
330
+ if observations:
331
+ # Format hour to be the actual bucket center
332
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
333
+
334
+ if output_format == 'netcdf':
335
+ convert_to_netcdf(observations, bucket_center.timestamp())
336
+
337
+ if output_format == 'csv':
338
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
339
+ (bucket_center.year, bucket_center.month, bucket_center.day,
340
+ bucket_hour, bucket_hours))
341
+
342
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
343
+
344
+ # Sort observations by timestamp within each bucket
345
+ sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
346
+
347
+ with open(output_file, mode='w', newline='') as file:
348
+ writer = csv.DictWriter(file, fieldnames=headers)
349
+ writer.writeheader()
350
+ writer.writerows(sorted_obs)
351
+
352
+ elif output_format == 'json':
353
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
354
+ (bucket_center.year, bucket_center.month, bucket_center.day,
355
+ bucket_hour, bucket_hours))
356
+
357
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
358
+
359
+ # Sort observations by timestamp within each bucket
360
+ sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
361
+
362
+ with open(output_file, 'w', encoding='utf-8') as file:
363
+ json.dump(sorted_obs, file, indent=4)
364
+
365
+ elif output_format == 'little_r':
366
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
367
+ (bucket_center.year, bucket_center.month, bucket_center.day,
368
+ bucket_hour, bucket_hours))
369
+
370
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
371
+
372
+ sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
373
+
374
+ little_r_records = format_little_r(sorted_obs)
375
+ with open(output_file, 'w') as file:
376
+ file.write('\n'.join(little_r_records))
377
+ total_observations_written += len(observations)
378
+
379
+ # Update statistics
380
+ if mission_name not in mission_stats:
381
+ mission_stats[mission_name] = {'files': 0, 'observations': 0}
382
+ mission_stats[mission_name]['files'] += 1
383
+ mission_stats[mission_name]['observations'] += len(observations)
384
+ # Print total observations written
385
+ print(f"Total {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
386
+ print("-----------------------------------------------------")
387
+
388
+ # Print summary for each mission
389
+ for mission_name, stats in mission_stats.items():
390
+ print(f"Mission {mission_name}: Saved {stats['observations']} {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
391
+
392
+ print("-----------------------------------------------------")
393
+ print("All observations have been processed and saved.")
394
+
395
+ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
396
+ """
397
+ Fetches super observations between a start time and an optional end time and saves to files in specified format.
398
+ Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
399
+ For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
400
+
401
+ Args:
402
+ start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
403
+ representing the starting time of fetching data.
404
+ end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
405
+ representing the end time of fetching data. If not provided, current time is used as end time.
406
+ interval (int): Optional. Interval in seconds between polls when a empty page is received (default: 60)
407
+ save_to_file (str): Saves all data to a single file instead of bucketing.
408
+ Supported formats are '.csv', '.json', '.little_r' and '.nc'
409
+ bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
410
+ output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
411
+ callback (callable): Optional callback function that receives (super observations, metadata) before saving.
412
+ This allows custom processing or saving in custom formats.
413
+ """
414
+
415
+ start_time = to_unix_timestamp(start_time)
416
+
417
+ if end_time:
418
+ end_time = to_unix_timestamp(end_time)
419
+ else:
420
+ end_time = int(datetime.now().timestamp())
421
+
422
+ # Supported formats for saving into separate files:
423
+ # - csv (default)
424
+ # - little_r
425
+ # - json
426
+ # - netcdf
427
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
428
+ print("Please use one of the following formats:")
429
+ print(" - json")
430
+ print(" - csv")
431
+ print(" - little_r")
432
+ print(" - netcdf")
433
+ return
434
+
435
+ # Supported formats for saving into a single file:
436
+ # NOTE: for poll_super_observations we handle .csv saving within poll_super_observations and not using save_csv_json
437
+ # - .csv
438
+ # - .json
439
+ # - .little_r
440
+ # - .nc
441
+ if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
442
+ print("Please use one of the following formats:")
443
+ print(" - .json")
444
+ print(" - .csv")
445
+ print(" - .little_r")
446
+ print(" - .nc")
447
+ return
448
+
449
+ # Convert start_time to datetime
450
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
451
+
452
+ # Calculate first center time that's after start_time
453
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
454
+ bucket_number = hours_since_day_start // bucket_hours
455
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
456
+
457
+
458
+ # Headers for CSV files
459
+ headers = [
460
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
461
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
462
+ ]
463
+
464
+ if save_to_file:
465
+ all_observations = {}
466
+ else:
467
+ buckets = {}
468
+
469
+ # Initialize the polling loop
470
+ current_timestamp = start_time
471
+ has_next_page = True
472
+
473
+
474
+ while has_next_page:
475
+ try:
476
+ # Fetch observations
477
+ observations_page = get_super_observations(
478
+ since=current_timestamp,
479
+ min_time=start_time,
480
+ max_time=end_time,
481
+ include_ids=True,
482
+ include_mission_name=True
483
+ )
484
+
485
+ if observations_page is None:
486
+ print("\n----------------------------------------------------------------------")
487
+ print(f"Received null response from API. Retrying in {interval} seconds ...")
488
+ print("----------------------------------------------------------------------")
489
+ time.sleep(interval)
490
+ continue
491
+
492
+ observations = observations_page.get('observations', [])
493
+ print(f"Fetched {len(observations)} super observation(s)")
494
+
495
+ # Invoke the callback with fetched observations
496
+ if callback:
497
+ print("/nCallback/n")
498
+ callback(observations)
499
+
500
+ for obs in observations:
501
+ if 'mission_name' not in obs:
502
+ print("Warning: got an super observation without a mission name")
503
+ continue
504
+
505
+ timestamp = obs.get('timestamp')
506
+ if not timestamp:
507
+ continue
508
+
509
+ try:
510
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
511
+ except (OSError, ValueError, TypeError, OverflowError):
512
+ continue
513
+
514
+ mission_name = obs.get('mission_name', 'Unknown')
515
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
516
+
517
+ processed_obs = {}
518
+ for header in headers:
519
+ value = obs.get(header)
520
+ if value is None or value == '' or (isinstance(value, str) and not value.strip()):
521
+ processed_obs[header] = 'None'
522
+ else:
523
+ processed_obs[header] = value
524
+
525
+ obs_id = f"{timestamp}_{mission_name}"
526
+
527
+ if save_to_file:
528
+ all_observations[obs_id] = processed_obs
529
+ else:
530
+ if obs_time >= start_dt: # Only process observations after start time
531
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
532
+ bucket_index = floor(hours_diff / bucket_hours)
533
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
534
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
535
+
536
+ if obs_time <= bucket_end: # Include observations up to the end of the bucket
537
+ bucket_key = (bucket_center, mission_name)
538
+ if bucket_key not in buckets:
539
+ buckets[bucket_key] = {}
540
+ buckets[bucket_key][obs_id] = processed_obs
541
+
542
+ # Update pagination
543
+ next_timestamp = observations_page.get('next_since')
544
+ has_next_page = observations_page.get('has_next_page', False)
545
+
546
+ if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
547
+ print("-----------------------------------------------------\n")
548
+ print("No more pages available or reached end of time range.")
549
+ print("\n-----------------------------------------------------")
550
+ break
551
+
552
+ current_timestamp = next_timestamp
553
+
554
+ except Exception as e:
555
+ print(f"Error occurred: {e}")
556
+ exit(1001)
557
+
558
+ # Save data to a single file
559
+ if save_to_file:
560
+ filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
561
+ if float(obs['timestamp']) >= start_time}
562
+ # Sort by timestamp
563
+ sorted_observations = dict(sorted(filtered_observations.items(),
564
+ key=lambda x: float(x[1]['timestamp'])))
565
+
566
+ if save_to_file.endswith('.nc'):
567
+ first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
568
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
569
+
570
+ elif save_to_file.endswith('.json'):
571
+ with open(save_to_file, 'w', encoding='utf-8') as f:
572
+ json.dump(sorted_observations, f, indent=4)
573
+
574
+ elif save_to_file.endswith('.csv'):
575
+ with open(save_to_file, mode='w', newline='') as file:
576
+ writer = csv.DictWriter(file, fieldnames=headers)
577
+ writer.writeheader()
578
+ writer.writerows(sorted_observations.values())
579
+
580
+ elif save_to_file.endswith('.little_r'):
581
+ little_r_records = format_little_r(list(sorted_observations.items()))
582
+ with open(save_to_file, 'w') as file:
583
+ file.write('\n'.join(little_r_records))
584
+
585
+ print(f"Saved {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
586
+
587
+ # Save data to multiple file
588
+ elif output_format:
589
+ # Track statistics per mission
590
+ mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
591
+ total_observations_written = 0
592
+
593
+ # Save bucketed data
594
+ for (bucket_center, mission_name), observations in buckets.items():
595
+ if observations:
596
+ # Format hour to be the actual bucket center
597
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
598
+
599
+ if output_format == 'netcdf':
600
+ convert_to_netcdf(observations, bucket_center.timestamp())
601
+
602
+ if output_format == 'csv':
603
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
604
+ (bucket_center.year, bucket_center.month, bucket_center.day,
605
+ bucket_hour, bucket_hours))
606
+
607
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
608
+
609
+ # Sort observations by timestamp within each bucket
610
+ sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
611
+
612
+ with open(output_file, mode='w', newline='') as file:
613
+ writer = csv.DictWriter(file, fieldnames=headers)
614
+ writer.writeheader()
615
+ writer.writerows(sorted_obs)
616
+
617
+ elif output_format == 'json':
618
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
619
+ (bucket_center.year, bucket_center.month, bucket_center.day,
620
+ bucket_hour, bucket_hours))
621
+
622
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
623
+
624
+ # Sort observations by timestamp within each bucket
625
+ sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
626
+
627
+ with open(output_file, 'w', encoding='utf-8') as file:
628
+ json.dump(sorted_obs, file, indent=4)
629
+
630
+ elif output_format == 'little_r':
631
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
632
+ (bucket_center.year, bucket_center.month, bucket_center.day,
633
+ bucket_hour, bucket_hours))
634
+
635
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
636
+
637
+ sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
638
+
639
+ little_r_records = format_little_r(sorted_obs)
640
+ with open(output_file, 'w') as file:
641
+ file.write('\n'.join(little_r_records))
642
+ total_observations_written += len(observations)
643
+
644
+ # Update statistics
645
+ if mission_name not in mission_stats:
646
+ mission_stats[mission_name] = {'files': 0, 'observations': 0}
647
+ mission_stats[mission_name]['files'] += 1
648
+ mission_stats[mission_name]['observations'] += len(observations)
649
+ # Print total super observations written
650
+ print(f"Total super {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
651
+ print("-----------------------------------------------------")
652
+
653
+ # Print summary for each mission
654
+ for mission_name, stats in mission_stats.items():
655
+ print(f"Mission {mission_name}: Saved {stats['observations']} super {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
656
+
657
+ print("-----------------------------------------------------")
658
+ print("All super observations have been processed and saved.")
659
+
660
+ def get_flying_missions(cli=None, save_to_file=None):
661
+ """
662
+ Retrieves a list of currently flying missions.
663
+ In CLI mode, displays missions in a formatted table.
664
+
665
+ Args:
666
+ save_to_file (str): Optional path to save the response data.
667
+ If provided, saves the data in CSV or JSON format.
668
+
669
+ Returns:
670
+ dict: The API response containing list of flying missions.
671
+ """
672
+
673
+ url = f"{DATA_API_BASE_URL}/missions.json"
674
+ flying_missions_response = make_api_request(url)
675
+ flying_missions = flying_missions_response.get("missions", [])
676
+
677
+ # Display currently flying missions only if we are in cli and we don't save info in file
678
+ if flying_missions and cli and not save_to_file:
679
+ print("Currently flying missions:\n")
680
+
681
+ # Define headers and data
682
+ headers = ["Index", "Mission ID", "Mission Name"]
683
+ rows = [
684
+ [str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
685
+ for i, mission in enumerate(flying_missions, start=1)
686
+ ]
687
+
688
+ # Kinda overkill | but it's a good practice if we ever change missions naming convention
689
+ # Calculate column widths
690
+ col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
691
+
692
+ # Display table
693
+ print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
694
+ print("".join("-" * col_width for col_width in col_widths))
695
+ for row in rows:
696
+ print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
697
+
698
+ if save_to_file:
699
+ save_csv_json(save_to_file, flying_missions_response, csv_data_key='missions')
700
+
701
+ return flying_missions_response
702
+
703
+ def get_mission_launch_site(mission_id=None, save_to_file=None):
704
+ """
705
+ Retrieves launch site information for a specified mission.
706
+ """
707
+ if not mission_id:
708
+ print("Must provide mission ID")
709
+ return
710
+
711
+ url = f"{DATA_API_BASE_URL}/missions/{mission_id}/launch_site.json"
712
+ response = make_api_request(url)
713
+
714
+ if response and not save_to_file:
715
+ launch_site = response.get('launch_site')
716
+ if isinstance(launch_site, dict):
717
+ site_name = LAUNCH_SITES.get(launch_site.get('id'), 'N/A')
718
+ print("Mission launch site\n")
719
+ print(f"{'Location':<12} {site_name}")
720
+ print(f"{'Latitude':<12} {launch_site.get('latitude', 'N/A')}")
721
+ print(f"{'Longitude':<12} {launch_site.get('longitude', 'N/A')}")
722
+ else:
723
+ print("Unable to display launch site details - unexpected format")
724
+
725
+ if save_to_file:
726
+ save_csv_json(save_to_file, response, csv_data_key='launch_site')
727
+
728
+ return response
729
+
730
+ def get_predicted_path(mission_id=None, save_to_file=None):
731
+ """
732
+ Fetches the predicted flight path for a given mission.
733
+ Displays currently flying missions if the provided mission ID is invalid.
734
+
735
+ Args:
736
+ mission_id (str): The ID of the mission to fetch the prediction for.
737
+ save_to_file (str): Optional path to save the response data.
738
+ If provided, saves the data in CSV format.
739
+
740
+ Returns:
741
+ dict: The API response containing the predicted flight path data.
742
+ """
743
+ if not mission_id:
744
+ print("To get the predicted flight path for a given mission you must provide a mission ID.")
745
+ return
746
+
747
+ # Check if provided mission ID belong to a flying mission
748
+ flying_missions_response = get_flying_missions()
749
+ flying_missions = flying_missions_response.get("missions", [])
750
+
751
+ if mission_id not in [mission.get("id") for mission in flying_missions]:
752
+ print(f"Provided mission ID '{mission_id}' does not belong to a mission that is currently flying.")
753
+
754
+ # Display currently flying missions
755
+ if flying_missions:
756
+ print("\nCurrently flying missions:\n")
757
+
758
+ # Define headers and data
759
+ headers = ["Index", "Mission ID", "Mission Name"]
760
+ rows = [
761
+ [str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
762
+ for i, mission in enumerate(flying_missions, start=1)
763
+ ]
764
+
765
+ # Kinda overkill | but it's a good practice if we ever change missions naming convention
766
+ # Calculate column widths
767
+ col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
768
+
769
+ # Display table
770
+ print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
771
+ print("".join("-" * col_width for col_width in col_widths))
772
+ for row in rows:
773
+ print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
774
+ else:
775
+ print("No missions are currently flying.")
776
+ return
777
+
778
+ url = f"{DATA_API_BASE_URL}/missions/{mission_id}/prediction.json"
779
+ response = make_api_request(url)
780
+
781
+ if save_to_file:
782
+ save_csv_json(save_to_file, response, csv_data_key='prediction')
783
+
784
+ return response