windborne 1.0.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
windborne/data_api.py ADDED
@@ -0,0 +1,784 @@
1
+ from .config import DATA_API_BASE_URL, LAUNCH_SITES
2
+ from .utils import make_api_request, to_unix_timestamp, save_csv_json, format_little_r, convert_to_netcdf
3
+
4
+ import time
5
+ import os
6
+ from math import floor
7
+ from datetime import datetime, timezone, timedelta
8
+ import csv
9
+ import json
10
+
11
+ def get_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
12
+ """
13
+ Retrieves observations based on specified filters including geographical bounds.
14
+
15
+ Args:
16
+ since (str): Filter observations after this timestamp.
17
+
18
+ min_time (str): Minimum timestamp for observations.
19
+ max_time (str): Maximum timestamp for observations.
20
+ include_ids (bool): Include observation IDs in response.
21
+ include_mission_name (bool): Include mission names in response.
22
+ include_updated_at (bool): Include update timestamps in response.
23
+ mission_id (str): Filter observations by mission ID.
24
+ min_latitude (float): Minimum latitude boundary.
25
+ max_latitude (float): Maximum latitude boundary.
26
+ min_longitude (float): Minimum longitude boundary.
27
+ max_longitude (float): Maximum longitude boundary.
28
+
29
+ save_to_file (str): Optional path to save the response data.
30
+ If provided, saves the data in CSV format.
31
+
32
+ Returns:
33
+ dict: The API response containing filtered observations.
34
+ """
35
+
36
+ url = f"{DATA_API_BASE_URL}/observations.json"
37
+
38
+ # Convert date strings to Unix timestamps
39
+ params = {}
40
+ if since:
41
+ params["since"] = to_unix_timestamp(since)
42
+ if min_time:
43
+ params["min_time"] = to_unix_timestamp(min_time)
44
+ if max_time:
45
+ params["max_time"] = to_unix_timestamp(min_time)
46
+ if mission_id:
47
+ params["mission_id"] = mission_id
48
+ if min_latitude:
49
+ params["min_latitude"] = min_latitude
50
+ if max_latitude:
51
+ params["max_latitude"] = max_latitude
52
+ if min_longitude:
53
+ params["min_longitude"] = min_longitude
54
+ if max_longitude:
55
+ params["max_longitude"] = max_longitude
56
+ if include_ids:
57
+ params["include_ids"] = True
58
+ if include_mission_name:
59
+ params["include_mission_name"] = True
60
+ if include_updated_at:
61
+ params["include_updated_at"] = True
62
+
63
+ # Remove any keys where the value is None to avoid sending unnecessary parameters
64
+ params = {k: v for k, v in params.items() if v is not None}
65
+
66
+ response = make_api_request(url, params=params)
67
+
68
+ if save_to_file:
69
+ save_csv_json(save_to_file, response, csv_data_key='observations')
70
+
71
+ return response
72
+
73
+ def get_super_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
74
+ """
75
+ Retrieves super observations based on specified filters.
76
+
77
+ Args:
78
+ since (str): Filter observations after this timestamp.
79
+ min_time (str): Minimum timestamp for observations.
80
+ max_time (str): Maximum timestamp for observations.
81
+ include_ids (bool): Include observation IDs in response.
82
+ include_mission_name (bool): Include mission names in response.
83
+ include_updated_at (bool): Include update timestamps in response.
84
+ mission_id (str): Filter observations by mission ID.
85
+ save_to_file (str): Optional path to save the response data.
86
+ If provided, saves the data in CSV format.
87
+
88
+ Returns:
89
+ dict: The API response containing filtered super observations.
90
+ """
91
+
92
+ url = f"{DATA_API_BASE_URL}/super_observations.json"
93
+
94
+ params = {}
95
+ if since:
96
+ params["since"] = to_unix_timestamp(since)
97
+ if min_time:
98
+ params["min_time"] = to_unix_timestamp(min_time)
99
+ if max_time:
100
+ params["max_time"] = to_unix_timestamp(max_time)
101
+ if mission_id:
102
+ params["mission_id"] = mission_id
103
+ if include_ids:
104
+ params["include_ids"] = True
105
+ if include_mission_name:
106
+ params["include_mission_name"] = True
107
+ if include_updated_at:
108
+ params["include_updated_at"] = True
109
+
110
+ params = {k: v for k, v in params.items() if v is not None}
111
+
112
+ response = make_api_request(url, params=params)
113
+ if save_to_file:
114
+ save_csv_json(save_to_file, response, csv_data_key='observations')
115
+
116
+ return response
117
+
118
+ def poll_observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
119
+ """
120
+ Fetches observations between a start time and an optional end time and saves to files in specified format.
121
+ Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
122
+ For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
123
+
124
+ Args:
125
+ start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
126
+ representing the starting time of fetching data.
127
+ end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
128
+ representing the end time of fetching data. If not provided, current time is used as end time.
129
+
130
+ include_ids (bool): Include observation IDs in response.
131
+ include_updated_at (bool): Include update timestamps in response.
132
+ mission_id (str): Filter observations by mission ID.
133
+ min_latitude (float): Minimum latitude boundary.
134
+ max_latitude (float): Maximum latitude boundary.
135
+ min_longitude (float): Minimum longitude boundary.
136
+ max_longitude (float): Maximum longitude boundary.
137
+
138
+ interval (int): Optional. Interval in seconds between polls when a empty page is received (default: 60)
139
+ save_to_file (str): Saves all data to a single file instead of bucketing.
140
+ Supported formats are '.csv', '.json', '.little_r' and '.nc'
141
+ bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
142
+ output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
143
+ callback (callable): Optional callback function that receives (super observations, metadata) before saving.
144
+ This allows custom processing or saving in custom formats.
145
+ """
146
+
147
+ start_time = to_unix_timestamp(start_time)
148
+
149
+ if end_time:
150
+ end_time = to_unix_timestamp(end_time)
151
+ else:
152
+ end_time = int(datetime.now().timestamp())
153
+
154
+ # Supported formats for saving into separate files:
155
+ # - csv (default)
156
+ # - little_r
157
+ # - json
158
+ # - netcdf
159
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
160
+ print("Please use one of the following formats:")
161
+ print(" - json")
162
+ print(" - csv")
163
+ print(" - little_r")
164
+ print(" - netcdf")
165
+ return
166
+
167
+ # Supported formats for saving into a single file:
168
+ # NOTE: for poll_observations we handle .csv saving within poll_observations and not using save_csv_json
169
+ # - .csv
170
+ # - .json
171
+ # - .little_r
172
+ # - .nc
173
+ if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
174
+ print("Please use one of the following formats:")
175
+ print(" - .json")
176
+ print(" - .csv")
177
+ print(" - .little_r")
178
+ print(" - .nc")
179
+ return
180
+
181
+ # Convert start_time to datetime
182
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
183
+
184
+ # Calculate first center time that's after start_time
185
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
186
+ bucket_number = hours_since_day_start // bucket_hours
187
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
188
+
189
+
190
+ # Headers for CSV files
191
+ headers = [
192
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
193
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
194
+ ]
195
+
196
+ if save_to_file:
197
+ all_observations = {}
198
+ else:
199
+ buckets = {}
200
+
201
+ # Initialize the polling loop
202
+ current_timestamp = start_time
203
+ has_next_page = True
204
+
205
+
206
+ while has_next_page:
207
+ try:
208
+ # Fetch observations
209
+ observations_page = get_observations(
210
+ since=current_timestamp,
211
+ min_latitude=min_latitude,
212
+ max_latitude=max_latitude,
213
+ min_longitude=min_longitude,
214
+ max_longitude=max_longitude,
215
+ include_updated_at=include_updated_at,
216
+ mission_id=mission_id,
217
+ include_ids=include_ids,
218
+ include_mission_name=True
219
+ )
220
+
221
+ if observations_page is None:
222
+ print("\n----------------------------------------------------------------------")
223
+ print(f"Received null response from API. Retrying in {interval} seconds ...")
224
+ print("----------------------------------------------------------------------")
225
+ time.sleep(interval)
226
+ continue
227
+
228
+ observations = observations_page.get('observations', [])
229
+ print(f"Fetched {len(observations)} observation(s)")
230
+
231
+ # Invoke the callback with fetched observations
232
+ if callback:
233
+ print("/nCallback/n")
234
+ callback(observations)
235
+
236
+ for obs in observations:
237
+ if 'mission_name' not in obs:
238
+ print("Warning: got an observation without a mission name")
239
+ continue
240
+
241
+ timestamp = obs.get('timestamp')
242
+ if not timestamp:
243
+ continue
244
+
245
+ try:
246
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
247
+ except (OSError, ValueError, TypeError, OverflowError):
248
+ continue
249
+
250
+ mission_name = obs.get('mission_name', 'Unknown')
251
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
252
+
253
+ processed_obs = {}
254
+ for header in headers:
255
+ value = obs.get(header)
256
+ if value is None or value == '' or (isinstance(value, str) and not value.strip()):
257
+ processed_obs[header] = 'None'
258
+ else:
259
+ processed_obs[header] = value
260
+
261
+ obs_id = f"{timestamp}_{mission_name}"
262
+
263
+ if save_to_file:
264
+ all_observations[obs_id] = processed_obs
265
+ else:
266
+ if obs_time >= start_dt: # Only process observations after start time
267
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
268
+ bucket_index = floor(hours_diff / bucket_hours)
269
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
270
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
271
+
272
+ if obs_time <= bucket_end: # Include observations up to the end of the bucket
273
+ bucket_key = (bucket_center, mission_name)
274
+ if bucket_key not in buckets:
275
+ buckets[bucket_key] = {}
276
+ buckets[bucket_key][obs_id] = processed_obs
277
+
278
+ # Update pagination
279
+ next_timestamp = observations_page.get('next_since')
280
+ has_next_page = observations_page.get('has_next_page', False)
281
+
282
+ if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
283
+ print("-----------------------------------------------------\n")
284
+ print("No more pages available or reached end of time range.")
285
+ print("\n-----------------------------------------------------")
286
+ break
287
+
288
+ current_timestamp = next_timestamp
289
+
290
+ except Exception as e:
291
+ print(f"Error occurred: {e}")
292
+ exit(1001)
293
+
294
+ # Save data to a single file
295
+ if save_to_file:
296
+ filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
297
+ if float(obs['timestamp']) >= start_time}
298
+ # Sort by timestamp
299
+ sorted_observations = dict(sorted(filtered_observations.items(),
300
+ key=lambda x: float(x[1]['timestamp'])))
301
+
302
+ if save_to_file.endswith('.nc'):
303
+ first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
304
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
305
+ elif save_to_file.endswith('.json'):
306
+ with open(save_to_file, 'w', encoding='utf-8') as f:
307
+ json.dump(sorted_observations, f, indent=4)
308
+
309
+ elif save_to_file.endswith('.csv'):
310
+ with open(save_to_file, mode='w', newline='') as file:
311
+ writer = csv.DictWriter(file, fieldnames=headers)
312
+ writer.writeheader()
313
+ writer.writerows(sorted_observations.values())
314
+
315
+ elif save_to_file.endswith('.little_r'):
316
+ little_r_records = format_little_r(list(sorted_observations.items()))
317
+ with open(save_to_file, 'w') as file:
318
+ file.write('\n'.join(little_r_records))
319
+
320
+ print(f"Saved {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
321
+
322
+ # Save data to multiple file
323
+ elif output_format:
324
+ # Track statistics per mission
325
+ mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
326
+ total_observations_written = 0
327
+
328
+ # Save bucketed data
329
+ for (bucket_center, mission_name), observations in buckets.items():
330
+ if observations:
331
+ # Format hour to be the actual bucket center
332
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
333
+
334
+ if output_format == 'netcdf':
335
+ convert_to_netcdf(observations, bucket_center.timestamp())
336
+
337
+ if output_format == 'csv':
338
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
339
+ (bucket_center.year, bucket_center.month, bucket_center.day,
340
+ bucket_hour, bucket_hours))
341
+
342
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
343
+
344
+ # Sort observations by timestamp within each bucket
345
+ sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
346
+
347
+ with open(output_file, mode='w', newline='') as file:
348
+ writer = csv.DictWriter(file, fieldnames=headers)
349
+ writer.writeheader()
350
+ writer.writerows(sorted_obs)
351
+
352
+ elif output_format == 'json':
353
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
354
+ (bucket_center.year, bucket_center.month, bucket_center.day,
355
+ bucket_hour, bucket_hours))
356
+
357
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
358
+
359
+ # Sort observations by timestamp within each bucket
360
+ sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
361
+
362
+ with open(output_file, 'w', encoding='utf-8') as file:
363
+ json.dump(sorted_obs, file, indent=4)
364
+
365
+ elif output_format == 'little_r':
366
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
367
+ (bucket_center.year, bucket_center.month, bucket_center.day,
368
+ bucket_hour, bucket_hours))
369
+
370
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
371
+
372
+ sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
373
+
374
+ little_r_records = format_little_r(sorted_obs)
375
+ with open(output_file, 'w') as file:
376
+ file.write('\n'.join(little_r_records))
377
+ total_observations_written += len(observations)
378
+
379
+ # Update statistics
380
+ if mission_name not in mission_stats:
381
+ mission_stats[mission_name] = {'files': 0, 'observations': 0}
382
+ mission_stats[mission_name]['files'] += 1
383
+ mission_stats[mission_name]['observations'] += len(observations)
384
+ # Print total observations written
385
+ print(f"Total {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
386
+ print("-----------------------------------------------------")
387
+
388
+ # Print summary for each mission
389
+ for mission_name, stats in mission_stats.items():
390
+ print(f"Mission {mission_name}: Saved {stats['observations']} {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
391
+
392
+ print("-----------------------------------------------------")
393
+ print("All observations have been processed and saved.")
394
+
395
+ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
396
+ """
397
+ Fetches super observations between a start time and an optional end time and saves to files in specified format.
398
+ Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
399
+ For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
400
+
401
+ Args:
402
+ start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
403
+ representing the starting time of fetching data.
404
+ end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
405
+ representing the end time of fetching data. If not provided, current time is used as end time.
406
+ interval (int): Optional. Interval in seconds between polls when a empty page is received (default: 60)
407
+ save_to_file (str): Saves all data to a single file instead of bucketing.
408
+ Supported formats are '.csv', '.json', '.little_r' and '.nc'
409
+ bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
410
+ output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
411
+ callback (callable): Optional callback function that receives (super observations, metadata) before saving.
412
+ This allows custom processing or saving in custom formats.
413
+ """
414
+
415
+ start_time = to_unix_timestamp(start_time)
416
+
417
+ if end_time:
418
+ end_time = to_unix_timestamp(end_time)
419
+ else:
420
+ end_time = int(datetime.now().timestamp())
421
+
422
+ # Supported formats for saving into separate files:
423
+ # - csv (default)
424
+ # - little_r
425
+ # - json
426
+ # - netcdf
427
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
428
+ print("Please use one of the following formats:")
429
+ print(" - json")
430
+ print(" - csv")
431
+ print(" - little_r")
432
+ print(" - netcdf")
433
+ return
434
+
435
+ # Supported formats for saving into a single file:
436
+ # NOTE: for poll_super_observations we handle .csv saving within poll_super_observations and not using save_csv_json
437
+ # - .csv
438
+ # - .json
439
+ # - .little_r
440
+ # - .nc
441
+ if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
442
+ print("Please use one of the following formats:")
443
+ print(" - .json")
444
+ print(" - .csv")
445
+ print(" - .little_r")
446
+ print(" - .nc")
447
+ return
448
+
449
+ # Convert start_time to datetime
450
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
451
+
452
+ # Calculate first center time that's after start_time
453
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
454
+ bucket_number = hours_since_day_start // bucket_hours
455
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
456
+
457
+
458
+ # Headers for CSV files
459
+ headers = [
460
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
461
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
462
+ ]
463
+
464
+ if save_to_file:
465
+ all_observations = {}
466
+ else:
467
+ buckets = {}
468
+
469
+ # Initialize the polling loop
470
+ current_timestamp = start_time
471
+ has_next_page = True
472
+
473
+
474
+ while has_next_page:
475
+ try:
476
+ # Fetch observations
477
+ observations_page = get_super_observations(
478
+ since=current_timestamp,
479
+ min_time=start_time,
480
+ max_time=end_time,
481
+ include_ids=True,
482
+ include_mission_name=True
483
+ )
484
+
485
+ if observations_page is None:
486
+ print("\n----------------------------------------------------------------------")
487
+ print(f"Received null response from API. Retrying in {interval} seconds ...")
488
+ print("----------------------------------------------------------------------")
489
+ time.sleep(interval)
490
+ continue
491
+
492
+ observations = observations_page.get('observations', [])
493
+ print(f"Fetched {len(observations)} super observation(s)")
494
+
495
+ # Invoke the callback with fetched observations
496
+ if callback:
497
+ print("/nCallback/n")
498
+ callback(observations)
499
+
500
+ for obs in observations:
501
+ if 'mission_name' not in obs:
502
+ print("Warning: got an super observation without a mission name")
503
+ continue
504
+
505
+ timestamp = obs.get('timestamp')
506
+ if not timestamp:
507
+ continue
508
+
509
+ try:
510
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
511
+ except (OSError, ValueError, TypeError, OverflowError):
512
+ continue
513
+
514
+ mission_name = obs.get('mission_name', 'Unknown')
515
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
516
+
517
+ processed_obs = {}
518
+ for header in headers:
519
+ value = obs.get(header)
520
+ if value is None or value == '' or (isinstance(value, str) and not value.strip()):
521
+ processed_obs[header] = 'None'
522
+ else:
523
+ processed_obs[header] = value
524
+
525
+ obs_id = f"{timestamp}_{mission_name}"
526
+
527
+ if save_to_file:
528
+ all_observations[obs_id] = processed_obs
529
+ else:
530
+ if obs_time >= start_dt: # Only process observations after start time
531
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
532
+ bucket_index = floor(hours_diff / bucket_hours)
533
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
534
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
535
+
536
+ if obs_time <= bucket_end: # Include observations up to the end of the bucket
537
+ bucket_key = (bucket_center, mission_name)
538
+ if bucket_key not in buckets:
539
+ buckets[bucket_key] = {}
540
+ buckets[bucket_key][obs_id] = processed_obs
541
+
542
+ # Update pagination
543
+ next_timestamp = observations_page.get('next_since')
544
+ has_next_page = observations_page.get('has_next_page', False)
545
+
546
+ if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
547
+ print("-----------------------------------------------------\n")
548
+ print("No more pages available or reached end of time range.")
549
+ print("\n-----------------------------------------------------")
550
+ break
551
+
552
+ current_timestamp = next_timestamp
553
+
554
+ except Exception as e:
555
+ print(f"Error occurred: {e}")
556
+ exit(1001)
557
+
558
+ # Save data to a single file
559
+ if save_to_file:
560
+ filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
561
+ if float(obs['timestamp']) >= start_time}
562
+ # Sort by timestamp
563
+ sorted_observations = dict(sorted(filtered_observations.items(),
564
+ key=lambda x: float(x[1]['timestamp'])))
565
+
566
+ if save_to_file.endswith('.nc'):
567
+ first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
568
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
569
+
570
+ elif save_to_file.endswith('.json'):
571
+ with open(save_to_file, 'w', encoding='utf-8') as f:
572
+ json.dump(sorted_observations, f, indent=4)
573
+
574
+ elif save_to_file.endswith('.csv'):
575
+ with open(save_to_file, mode='w', newline='') as file:
576
+ writer = csv.DictWriter(file, fieldnames=headers)
577
+ writer.writeheader()
578
+ writer.writerows(sorted_observations.values())
579
+
580
+ elif save_to_file.endswith('.little_r'):
581
+ little_r_records = format_little_r(list(sorted_observations.items()))
582
+ with open(save_to_file, 'w') as file:
583
+ file.write('\n'.join(little_r_records))
584
+
585
+ print(f"Saved {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
586
+
587
+ # Save data to multiple file
588
+ elif output_format:
589
+ # Track statistics per mission
590
+ mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
591
+ total_observations_written = 0
592
+
593
+ # Save bucketed data
594
+ for (bucket_center, mission_name), observations in buckets.items():
595
+ if observations:
596
+ # Format hour to be the actual bucket center
597
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
598
+
599
+ if output_format == 'netcdf':
600
+ convert_to_netcdf(observations, bucket_center.timestamp())
601
+
602
+ if output_format == 'csv':
603
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
604
+ (bucket_center.year, bucket_center.month, bucket_center.day,
605
+ bucket_hour, bucket_hours))
606
+
607
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
608
+
609
+ # Sort observations by timestamp within each bucket
610
+ sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
611
+
612
+ with open(output_file, mode='w', newline='') as file:
613
+ writer = csv.DictWriter(file, fieldnames=headers)
614
+ writer.writeheader()
615
+ writer.writerows(sorted_obs)
616
+
617
+ elif output_format == 'json':
618
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
619
+ (bucket_center.year, bucket_center.month, bucket_center.day,
620
+ bucket_hour, bucket_hours))
621
+
622
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
623
+
624
+ # Sort observations by timestamp within each bucket
625
+ sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
626
+
627
+ with open(output_file, 'w', encoding='utf-8') as file:
628
+ json.dump(sorted_obs, file, indent=4)
629
+
630
+ elif output_format == 'little_r':
631
+ output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
632
+ (bucket_center.year, bucket_center.month, bucket_center.day,
633
+ bucket_hour, bucket_hours))
634
+
635
+ os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
636
+
637
+ sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
638
+
639
+ little_r_records = format_little_r(sorted_obs)
640
+ with open(output_file, 'w') as file:
641
+ file.write('\n'.join(little_r_records))
642
+ total_observations_written += len(observations)
643
+
644
+ # Update statistics
645
+ if mission_name not in mission_stats:
646
+ mission_stats[mission_name] = {'files': 0, 'observations': 0}
647
+ mission_stats[mission_name]['files'] += 1
648
+ mission_stats[mission_name]['observations'] += len(observations)
649
+ # Print total super observations written
650
+ print(f"Total super {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
651
+ print("-----------------------------------------------------")
652
+
653
+ # Print summary for each mission
654
+ for mission_name, stats in mission_stats.items():
655
+ print(f"Mission {mission_name}: Saved {stats['observations']} super {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
656
+
657
+ print("-----------------------------------------------------")
658
+ print("All super observations have been processed and saved.")
659
+
660
+ def get_flying_missions(cli=None, save_to_file=None):
661
+ """
662
+ Retrieves a list of currently flying missions.
663
+ In CLI mode, displays missions in a formatted table.
664
+
665
+ Args:
666
+ save_to_file (str): Optional path to save the response data.
667
+ If provided, saves the data in CSV or JSON format.
668
+
669
+ Returns:
670
+ dict: The API response containing list of flying missions.
671
+ """
672
+
673
+ url = f"{DATA_API_BASE_URL}/missions.json"
674
+ flying_missions_response = make_api_request(url)
675
+ flying_missions = flying_missions_response.get("missions", [])
676
+
677
+ # Display currently flying missions only if we are in cli and we don't save info in file
678
+ if flying_missions and cli and not save_to_file:
679
+ print("Currently flying missions:\n")
680
+
681
+ # Define headers and data
682
+ headers = ["Index", "Mission ID", "Mission Name"]
683
+ rows = [
684
+ [str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
685
+ for i, mission in enumerate(flying_missions, start=1)
686
+ ]
687
+
688
+ # Kinda overkill | but it's a good practice if we ever change missions naming convention
689
+ # Calculate column widths
690
+ col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
691
+
692
+ # Display table
693
+ print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
694
+ print("".join("-" * col_width for col_width in col_widths))
695
+ for row in rows:
696
+ print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
697
+
698
+ if save_to_file:
699
+ save_csv_json(save_to_file, flying_missions_response, csv_data_key='missions')
700
+
701
+ return flying_missions_response
702
+
703
+ def get_mission_launch_site(mission_id=None, save_to_file=None):
704
+ """
705
+ Retrieves launch site information for a specified mission.
706
+ """
707
+ if not mission_id:
708
+ print("Must provide mission ID")
709
+ return
710
+
711
+ url = f"{DATA_API_BASE_URL}/missions/{mission_id}/launch_site.json"
712
+ response = make_api_request(url)
713
+
714
+ if response and not save_to_file:
715
+ launch_site = response.get('launch_site')
716
+ if isinstance(launch_site, dict):
717
+ site_name = LAUNCH_SITES.get(launch_site.get('id'), 'N/A')
718
+ print("Mission launch site\n")
719
+ print(f"{'Location':<12} {site_name}")
720
+ print(f"{'Latitude':<12} {launch_site.get('latitude', 'N/A')}")
721
+ print(f"{'Longitude':<12} {launch_site.get('longitude', 'N/A')}")
722
+ else:
723
+ print("Unable to display launch site details - unexpected format")
724
+
725
+ if save_to_file:
726
+ save_csv_json(save_to_file, response, csv_data_key='launch_site')
727
+
728
+ return response
729
+
730
+ def get_predicted_path(mission_id=None, save_to_file=None):
731
+ """
732
+ Fetches the predicted flight path for a given mission.
733
+ Displays currently flying missions if the provided mission ID is invalid.
734
+
735
+ Args:
736
+ mission_id (str): The ID of the mission to fetch the prediction for.
737
+ save_to_file (str): Optional path to save the response data.
738
+ If provided, saves the data in CSV format.
739
+
740
+ Returns:
741
+ dict: The API response containing the predicted flight path data.
742
+ """
743
+ if not mission_id:
744
+ print("To get the predicted flight path for a given mission you must provide a mission ID.")
745
+ return
746
+
747
+ # Check if provided mission ID belong to a flying mission
748
+ flying_missions_response = get_flying_missions()
749
+ flying_missions = flying_missions_response.get("missions", [])
750
+
751
+ if mission_id not in [mission.get("id") for mission in flying_missions]:
752
+ print(f"Provided mission ID '{mission_id}' does not belong to a mission that is currently flying.")
753
+
754
+ # Display currently flying missions
755
+ if flying_missions:
756
+ print("\nCurrently flying missions:\n")
757
+
758
+ # Define headers and data
759
+ headers = ["Index", "Mission ID", "Mission Name"]
760
+ rows = [
761
+ [str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
762
+ for i, mission in enumerate(flying_missions, start=1)
763
+ ]
764
+
765
+ # Kinda overkill | but it's a good practice if we ever change missions naming convention
766
+ # Calculate column widths
767
+ col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
768
+
769
+ # Display table
770
+ print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
771
+ print("".join("-" * col_width for col_width in col_widths))
772
+ for row in rows:
773
+ print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
774
+ else:
775
+ print("No missions are currently flying.")
776
+ return
777
+
778
+ url = f"{DATA_API_BASE_URL}/missions/{mission_id}/prediction.json"
779
+ response = make_api_request(url)
780
+
781
+ if save_to_file:
782
+ save_csv_json(save_to_file, response, csv_data_key='prediction')
783
+
784
+ return response