windborne 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windborne/data_api.py CHANGED
@@ -1,23 +1,20 @@
1
- from .config import DATA_API_BASE_URL, LAUNCH_SITES
2
- from .utils import make_api_request, to_unix_timestamp, save_csv_json, format_little_r, convert_to_netcdf
3
-
4
1
  import time
5
2
  import os
6
- from math import floor
7
3
  from datetime import datetime, timezone, timedelta
8
4
  import csv
9
5
  import json
10
- import hashlib
11
6
 
12
- # UTC should be used across the lib
7
+ from .api_request import make_api_request
8
+ from .observation_formatting import format_little_r, convert_to_netcdf
9
+ from .utils import to_unix_timestamp, save_arbitrary_response
10
+
11
+ DATA_API_BASE_URL = "https://sensor-data.windbornesystems.com/api/v1"
13
12
 
14
13
  # ------------
15
14
  # CORE RESOURCES
16
15
  # ------------
17
16
 
18
- # Observations
19
- # ------------
20
- def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
17
+ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, output_file=None):
21
18
  """
22
19
  Retrieves observations page based on specified filters including geographical bounds.
23
20
 
@@ -35,7 +32,7 @@ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=
35
32
  min_longitude (float): Minimum longitude boundary.
36
33
  max_longitude (float): Maximum longitude boundary.
37
34
 
38
- save_to_file (str): Optional path to save the response data.
35
+ output_file (str): Optional path to save the response data.
39
36
  If provided, saves the data in CSV format.
40
37
 
41
38
  Returns:
@@ -51,7 +48,7 @@ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=
51
48
  if min_time:
52
49
  params["min_time"] = to_unix_timestamp(min_time)
53
50
  if max_time:
54
- params["max_time"] = to_unix_timestamp(min_time)
51
+ params["max_time"] = to_unix_timestamp(max_time)
55
52
  if mission_id:
56
53
  params["mission_id"] = mission_id
57
54
  if min_latitude:
@@ -69,551 +66,17 @@ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=
69
66
  if include_updated_at:
70
67
  params["include_updated_at"] = True
71
68
 
72
- # Remove any keys where the value is None to avoid sending unnecessary parameters
73
69
  params = {k: v for k, v in params.items() if v is not None}
74
70
 
75
71
  response = make_api_request(url, params=params)
76
72
 
77
- if save_to_file:
78
- save_csv_json(save_to_file, response, csv_data_key='observations')
73
+ if output_file:
74
+ save_arbitrary_response(output_file, response, csv_data_key='observations')
79
75
 
80
76
  return response
81
77
 
82
- def observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
83
- """
84
- Fetches observations between a start time and an optional end time and saves to files in specified format.
85
- Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
86
- For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
87
-
88
- Args:
89
- start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
90
- representing the starting time of fetching data.
91
- end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
92
- representing the end time of fetching data. If not provided, current time is used as end time.
93
-
94
- include_ids (bool): Include observation IDs in response.
95
- include_updated_at (bool): Include update timestamps in response.
96
- mission_id (str): Filter observations by mission ID.
97
- min_latitude (float): Minimum latitude boundary.
98
- max_latitude (float): Maximum latitude boundary.
99
- min_longitude (float): Minimum longitude boundary.
100
- max_longitude (float): Maximum longitude boundary.
101
78
 
102
- interval (int): Optional. Interval in seconds between polls when a empty page is received (default: 60)
103
- save_to_file (str): Saves all data to a single file instead of bucketing.
104
- Supported formats are '.csv', '.json', '.little_r' and '.nc'
105
- bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
106
- output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
107
- output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
108
- callback (callable): Optional callback function that receives (super observations, metadata) before saving.
109
- This allows custom processing or saving in custom formats.
110
- """
111
-
112
- start_time = to_unix_timestamp(start_time)
113
-
114
- if end_time:
115
- end_time = to_unix_timestamp(end_time)
116
- else:
117
- end_time = int(datetime.now().timestamp())
118
-
119
- # Supported formats for saving into separate files:
120
- # - csv (default)
121
- # - little_r
122
- # - json
123
- # - netcdf
124
- if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
125
- print("Please use one of the following formats:")
126
- print(" - json")
127
- print(" - csv")
128
- print(" - little_r")
129
- print(" - netcdf")
130
- return
131
-
132
- # Supported formats for saving into a single file:
133
- # NOTE: for observations we handle .csv saving within observations and not using save_csv_json
134
- # - .csv
135
- # - .json
136
- # - .little_r
137
- # - .nc
138
- if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
139
- print("Please use one of the following formats:")
140
- print(" - .json")
141
- print(" - .csv")
142
- print(" - .little_r")
143
- print(" - .nc")
144
- return
145
-
146
- # Convert start_time to datetime
147
- start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
148
-
149
- # Calculate first center time that's after start_time
150
- hours_since_day_start = start_dt.hour + start_dt.minute / 60
151
- bucket_number = hours_since_day_start // bucket_hours
152
- first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
153
-
154
-
155
- # Headers for CSV files
156
- headers = [
157
- "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
158
- "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
159
- ]
160
-
161
- if save_to_file:
162
- all_observations = {}
163
- else:
164
- buckets = {}
165
-
166
- # Initialize the polling loop
167
- current_timestamp = start_time
168
- has_next_page = True
169
- fetced_so_far = 0
170
-
171
- print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
172
- print("-----------------------------------------------------")
173
-
174
-
175
- while has_next_page:
176
- try:
177
- # Fetch observations
178
- observations_page = get_observations_page(
179
- since=current_timestamp,
180
- min_latitude=min_latitude,
181
- max_latitude=max_latitude,
182
- min_longitude=min_longitude,
183
- max_longitude=max_longitude,
184
- include_updated_at=include_updated_at,
185
- mission_id=mission_id,
186
- include_ids=include_ids,
187
- include_mission_name=True
188
- )
189
-
190
- if observations_page is None:
191
- print("\n----------------------------------------------------------------------")
192
- print(f"Received null response from API. Retrying in {interval} seconds ...")
193
- print("----------------------------------------------------------------------")
194
- time.sleep(interval)
195
- continue
196
-
197
- observations = observations_page.get('observations', [])
198
- fetced_so_far = fetced_so_far + len(observations)
199
- print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
200
- print(f"Fetched {fetced_so_far} observations")
201
- print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
202
- print("-----------------------------------------------------")
203
-
204
- # Invoke the callback with fetched observations
205
- if callback:
206
- print("--------\nCallback\n--------")
207
- callback(observations)
208
-
209
- for obs in observations:
210
- if 'mission_name' not in obs:
211
- print("Warning: got an observation without a mission name")
212
- continue
213
-
214
- timestamp = obs.get('timestamp')
215
- if not timestamp:
216
- continue
217
-
218
- try:
219
- obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
220
- except (OSError, ValueError, TypeError, OverflowError):
221
- continue
222
-
223
- mission_name = obs.get('mission_name', 'Unknown')
224
- obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
225
-
226
- processed_obs = {}
227
- for header in headers:
228
- value = obs.get(header)
229
- if value is None or value == '' or (isinstance(value, str) and not value.strip()):
230
- processed_obs[header] = 'None'
231
- else:
232
- processed_obs[header] = value
233
-
234
- obs_id = f"{timestamp}_{mission_name}"
235
-
236
- if save_to_file:
237
- all_observations[obs_id] = processed_obs
238
- else:
239
- if obs_time >= start_dt: # Only process observations after start time
240
- hours_diff = (obs_time - first_center).total_seconds() / 3600
241
- bucket_index = floor(hours_diff / bucket_hours)
242
- bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
243
- bucket_end = bucket_center + timedelta(hours=bucket_hours)
244
-
245
- if obs_time <= bucket_end: # Include observations up to the end of the bucket
246
- bucket_key = (bucket_center, mission_name)
247
- if bucket_key not in buckets:
248
- buckets[bucket_key] = {}
249
- buckets[bucket_key][obs_id] = processed_obs
250
-
251
- # Update pagination
252
- next_timestamp = observations_page.get('next_since')
253
- has_next_page = observations_page.get('has_next_page', False)
254
-
255
- if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
256
- print("-----------------------------------------------------\n")
257
- print("Fetching complete.")
258
- print("\n-----------------------------------------------------")
259
- break
260
-
261
- current_timestamp = next_timestamp
262
-
263
- except KeyboardInterrupt:
264
- print("\n\n\U0001F6D1 Received interrupt, stopping...")
265
- print("-----------------------------------------------------")
266
- print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
267
- print("-----------------------------------------------------")
268
- exit(3)
269
- except Exception as e:
270
- print(f"Error occurred: {e}")
271
- exit(1001)
272
-
273
- # Save data to a single file
274
- if save_to_file:
275
- # Create directory path if it doesn't exist
276
- directory = os.path.dirname(save_to_file)
277
- if directory and not os.path.isdir(directory):
278
- os.makedirs(directory, exist_ok=True)
279
- filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
280
- if float(obs['timestamp']) >= start_time}
281
- # Sort by timestamp
282
- sorted_observations = dict(sorted(filtered_observations.items(),
283
- key=lambda x: float(x[1]['timestamp'])))
284
-
285
- print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
286
- print("This may take a while...")
287
- print("-----------------------------------------------------\n")
288
-
289
- if save_to_file.endswith('.nc'):
290
- first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
291
- convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
292
- elif save_to_file.endswith('.json'):
293
- with open(save_to_file, 'w', encoding='utf-8') as f:
294
- json.dump(sorted_observations, f, indent=4)
295
-
296
- elif save_to_file.endswith('.csv'):
297
- with open(save_to_file, mode='w', newline='') as file:
298
- writer = csv.DictWriter(file, fieldnames=headers)
299
- writer.writeheader()
300
- writer.writerows(sorted_observations.values())
301
-
302
- elif save_to_file.endswith('.little_r'):
303
- little_r_records = format_little_r(list(sorted_observations.items()))
304
- with open(save_to_file, 'w') as file:
305
- file.write('\n'.join(little_r_records))
306
-
307
- print(f"Saved {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
308
-
309
- # Save data to multiple file
310
- elif output_format:
311
- # Create output directory if specified
312
- if output_dir:
313
- os.makedirs(output_dir, exist_ok=True)
314
- print(f"Files will be saved to {output_dir}")
315
- else:
316
- print(f"Files will be saved to {os.getcwd()}")
317
- print(f"Processing {fetced_so_far} {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
318
- print("This may take a while...")
319
- print("-----------------------------------------------------\n")
320
- # Track statistics per mission
321
- mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
322
- total_observations_written = 0
323
-
324
- # Save bucketed data
325
- for (bucket_center, mission_name), observations in buckets.items():
326
- if observations:
327
- # Format hour to be the actual bucket center
328
- bucket_hour = int((bucket_center.hour + bucket_hours / 2) % 24)
329
-
330
- # Generate file name based on output format
331
- file_name_format = {
332
- 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
333
- 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
334
- 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
335
- 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
336
- }
337
- file_name = file_name_format[output_format] % (
338
- bucket_center.year, bucket_center.month, bucket_center.day,
339
- bucket_hour, bucket_hours)
340
-
341
- output_file = os.path.join(output_dir or '.', file_name)
342
-
343
- # Sort observations by timestamp within each bucket
344
- sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
345
-
346
- if output_format == 'netcdf':
347
- convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
348
-
349
- elif output_format == 'csv':
350
- with open(output_file, mode='w', newline='') as file:
351
- writer = csv.DictWriter(file, fieldnames=headers)
352
- writer.writeheader()
353
- writer.writerows(sorted_obs)
354
-
355
- elif output_format == 'json':
356
- sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
357
- with open(output_file, 'w', encoding='utf-8') as file:
358
- json.dump(sorted_obs_dict, file, indent=4)
359
-
360
- elif output_format == 'little_r':
361
- little_r_records = format_little_r(sorted_obs)
362
- with open(output_file, 'w') as file:
363
- file.write('\n'.join(little_r_records))
364
- total_observations_written += len(observations)
365
-
366
- # Update statistics
367
- if mission_name not in mission_stats:
368
- mission_stats[mission_name] = {'files': 0, 'observations': 0}
369
- mission_stats[mission_name]['files'] += 1
370
- mission_stats[mission_name]['observations'] += len(observations)
371
- # Print total observations written
372
- print(f"Saved {total_observations_written} {'observation.' if total_observations_written == 1 else 'observations.'}")
373
- print("-----------------------------------------------------")
374
-
375
- # Print summary for each mission
376
- for mission_name, stats in mission_stats.items():
377
- print(f"Mission {mission_name}: Saved {stats['observations']} {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
378
-
379
- print("-----------------------------------------------------")
380
- print("All observations have been processed and saved.")
381
-
382
- def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
383
- """
384
- Continuously polls for observations and saves to files in specified format.
385
- Will run indefinitely until interrupted.
386
-
387
- Args:
388
- start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
389
- include_ids (bool): Include observation IDs in response.
390
- include_updated_at (bool): Include update timestamps in response.
391
- mission_id (str): Filter observations by mission ID.
392
- min_latitude (float): Minimum latitude boundary.
393
- max_latitude (float): Maximum latitude boundary.
394
- min_longitude (float): Minimum longitude boundary.
395
- max_longitude (float): Maximum longitude boundary.
396
- interval (int): Polling interval in seconds when no data is received (default: 60)
397
- bucket_hours (float): Size of time buckets in hours (default: 6.0)
398
- output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
399
- output_dir (str): Directory for bucket files (default: current directory)
400
- callback (callable): Optional callback for data processing
401
- """
402
- # Print warning about infinite loop
403
- print(" ___________________________________________________________________")
404
- print("| WARNING \U000026A0\U0000FE0F |")
405
- print("| You are entering an endless loop. |")
406
- print("| |")
407
- print("| Press Ctrl + C anytime to exit. |")
408
- print("|___________________________________________________________________|\n\n")
409
- time.sleep(4)
410
-
411
- start_time = to_unix_timestamp(start_time)
412
-
413
- if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
414
- print("Please use one of the following formats:")
415
- print(" - json\n - csv\n - little_r\n - netcdf")
416
- return
417
-
418
- if output_dir:
419
- os.makedirs(output_dir, exist_ok=True)
420
- print(f"\U0001F4C1 Files will be saved to {output_dir}")
421
- else:
422
- print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
423
-
424
- # Convert start_time to datetime
425
- start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
426
-
427
- # Calculate first center time that's after start_time
428
- hours_since_day_start = start_dt.hour + start_dt.minute / 60
429
- bucket_number = hours_since_day_start // bucket_hours
430
- first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
431
-
432
- headers = [
433
- "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
434
- "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
435
- ]
436
-
437
- buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
438
- current_timestamp = start_time
439
- fetched_so_far = 0
440
- mission_stats = {}
441
-
442
- print(f"Starting continuous observations polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
443
- print(f"Polling interval: {interval} seconds")
444
- print("-----------------------------------------------------")
445
-
446
- try:
447
- while True:
448
- observations_page = get_observations_page(
449
- since=current_timestamp,
450
- min_latitude=min_latitude,
451
- max_latitude=max_latitude,
452
- min_longitude=min_longitude,
453
- max_longitude=max_longitude,
454
- include_updated_at=include_updated_at,
455
- mission_id=mission_id,
456
- include_ids=include_ids,
457
- include_mission_name=True
458
- )
459
-
460
- if observations_page is None:
461
- print(f"\nNull response from API. Retrying in {interval} seconds ...")
462
- time.sleep(interval)
463
- continue
464
-
465
- observations = observations_page.get('observations', [])
466
-
467
- # Invoke the callback with fetched super observations
468
- if callback:
469
- print("--------\nCallback\n--------")
470
- callback(observations)
471
-
472
- if observations:
473
- fetched_so_far += len(observations)
474
- print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
475
- print(f"Fetched {fetched_so_far} observations")
476
- print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
477
- print("-----------------------------------------------------")
478
-
479
- for obs in observations:
480
- if 'mission_name' not in obs:
481
- continue
482
-
483
- timestamp = obs.get('timestamp')
484
- if not timestamp:
485
- continue
486
-
487
- try:
488
- obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
489
- except (OSError, ValueError, TypeError, OverflowError):
490
- continue
491
-
492
- mission_name = obs.get('mission_name', 'Unknown')
493
- obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
494
-
495
- processed_obs = {
496
- header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
497
- for header in headers
498
- }
499
-
500
- obs_id = f"{timestamp}_{mission_name}"
501
-
502
- if obs_time >= start_dt:
503
- hours_diff = (obs_time - first_center).total_seconds() / 3600
504
- bucket_index = floor(hours_diff / bucket_hours)
505
- bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
506
- bucket_end = bucket_center + timedelta(hours=bucket_hours)
507
-
508
- if obs_time <= bucket_end:
509
- bucket_key = (bucket_center, mission_name)
510
-
511
- # Initialize bucket if needed
512
- if bucket_key not in buckets:
513
- buckets[bucket_key] = {
514
- 'data': {},
515
- 'last_write': 0,
516
- 'data_hash': ''
517
- }
518
-
519
- # Update bucket data
520
- buckets[bucket_key]['data'][obs_id] = processed_obs
521
-
522
- # Track statistics
523
- if mission_name not in mission_stats:
524
- mission_stats[mission_name] = {'files': set(), 'observations': 0}
525
- mission_stats[mission_name]['observations'] += 1
526
-
527
- # Calculate new data hash
528
- sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
529
- data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
530
-
531
- # Check if we should write the bucket
532
- current_time = datetime.now(timezone.utc)
533
- time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
534
- data_changed = data_hash != buckets[bucket_key]['data_hash']
535
-
536
- # Write if it's been more than interval seconds since last write OR if data has changed
537
- if (time_since_last_write >= interval or data_changed) and output_format:
538
- bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
539
-
540
- file_name_format = {
541
- 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
542
- 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
543
- 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
544
- 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
545
- }
546
-
547
- file_name = file_name_format[output_format] % (
548
- bucket_center.year, bucket_center.month, bucket_center.day,
549
- bucket_hour, bucket_hours)
550
-
551
- output_file = os.path.join(output_dir or '.', file_name)
552
- sorted_obs = [obs for _, obs in sorted_data]
553
-
554
- # Write the file based on format
555
- try:
556
- if output_format == 'netcdf':
557
- convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
558
- elif output_format == 'csv':
559
- with open(output_file, mode='w', newline='') as file:
560
- writer = csv.DictWriter(file, fieldnames=headers)
561
- writer.writeheader()
562
- writer.writerows(sorted_obs)
563
- elif output_format == 'json':
564
- sorted_obs_dict = {k: v for k, v in sorted_data}
565
- with open(output_file, 'w', encoding='utf-8') as file:
566
- json.dump(sorted_obs_dict, file, indent=4)
567
- elif output_format == 'little_r':
568
- little_r_records = format_little_r(sorted_obs)
569
- with open(output_file, 'w') as file:
570
- file.write('\n'.join(little_r_records))
571
-
572
- buckets[bucket_key]['last_write'] = current_time.timestamp()
573
- buckets[bucket_key]['data_hash'] = data_hash
574
- mission_stats[mission_name]['files'].add(output_file)
575
- except Exception as e:
576
- print(f"Error writing bucket file {file_name}: {str(e)}")
577
-
578
- # Clean up old buckets
579
- current_time = datetime.now(timezone.utc)
580
- buckets = {
581
- k: v for k, v in buckets.items()
582
- if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
583
- }
584
-
585
- next_timestamp = observations_page.get('next_since')
586
- has_next_page = observations_page.get('has_next_page', False)
587
-
588
- if next_timestamp and next_timestamp > current_timestamp:
589
- current_timestamp = next_timestamp
590
- elif not has_next_page:
591
- print("-----------------------------------------------------")
592
- print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
593
- print("-----------------------------------------------------")
594
- time.sleep(interval)
595
- continue
596
-
597
- if not observations:
598
- print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
599
- print("-----------------------------------------------------")
600
- time.sleep(interval)
601
-
602
- except KeyboardInterrupt:
603
- print("\n\n\U0001F6D1 Received interrupt, stopping...")
604
- print("-----------------------------------------------------")
605
- for mission_name, stats in mission_stats.items():
606
- print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
607
- except Exception as e:
608
- print(f"Error occurred: {str(e)}")
609
- exit(1001)
610
- finally:
611
- print("-----------------------------------------------------")
612
- print("Finished processing observations.")
613
-
614
- # Super Observations
615
- # ------------
616
- def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
79
+ def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, output_file=None):
617
80
  """
618
81
  Retrieves super observations page based on specified filters.
619
82
 
@@ -625,7 +88,7 @@ def get_super_observations_page(since=None, min_time=None, max_time=None, includ
625
88
  include_mission_name (bool): Include mission names in response.
626
89
  include_updated_at (bool): Include update timestamps in response.
627
90
  mission_id (str): Filter observations by mission ID.
628
- save_to_file (str): Optional path to save the response data.
91
+ output_file (str): Optional path to save the response data.
629
92
  If provided, saves the data in CSV format.
630
93
 
631
94
  Returns:
@@ -633,7 +96,7 @@ def get_super_observations_page(since=None, min_time=None, max_time=None, includ
633
96
  """
634
97
 
635
98
  url = f"{DATA_API_BASE_URL}/super_observations.json"
636
-
99
+
637
100
  params = {}
638
101
  if since:
639
102
  params["since"] = to_unix_timestamp(since)
@@ -649,316 +112,358 @@ def get_super_observations_page(since=None, min_time=None, max_time=None, includ
649
112
  params["include_mission_name"] = True
650
113
  if include_updated_at:
651
114
  params["include_updated_at"] = True
652
-
115
+
653
116
  params = {k: v for k, v in params.items() if v is not None}
654
-
117
+
655
118
  response = make_api_request(url, params=params)
656
- if save_to_file:
657
- save_csv_json(save_to_file, response, csv_data_key='observations')
658
-
119
+ if output_file:
120
+ save_arbitrary_response(output_file, response, csv_data_key='observations')
121
+
659
122
  return response
660
123
 
661
- def super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
124
+
125
+ def save_observations_batch(observations, output_file, output_format, output_dir, start_time=None, end_time=None, bucket_hours=6.0, csv_headers=None, custom_save=None, prevent_overwrites=False):
126
+ filtered_observations = observations
127
+ if start_time is not None:
128
+ filtered_observations = [obs for obs in observations if float(obs['timestamp']) >= start_time]
129
+
130
+ if end_time is not None:
131
+ filtered_observations = [obs for obs in observations if float(obs['timestamp']) <= end_time]
132
+
133
+ # Sort by timestamp
134
+ sorted_observations = sorted(filtered_observations, key=lambda x: float(x['timestamp']))
135
+
136
+ if output_file:
137
+ if custom_save is not None:
138
+ custom_save(sorted_observations, output_file)
139
+ else:
140
+ save_observations_to_file(sorted_observations, output_file, csv_headers=csv_headers, prevent_overwrites=prevent_overwrites)
141
+ else:
142
+ save_observations_batch_in_buckets(sorted_observations, output_format, output_dir, bucket_hours=bucket_hours, csv_headers=csv_headers, custom_save=custom_save, prevent_overwrites=prevent_overwrites)
143
+
144
+
145
+ def save_observations_to_file(sorted_observations, output_file, csv_headers=None, prevent_overwrites=False):
146
+ if len(sorted_observations) == 0:
147
+ print(f"Skipping empty file {output_file}")
148
+ return
149
+
150
+ directory = os.path.dirname(output_file)
151
+ if directory and not os.path.isdir(directory):
152
+ os.makedirs(directory, exist_ok=True)
153
+
154
+ if prevent_overwrites and os.path.exists(output_file):
155
+ # save to outputfile.0.ext, outputfile.1.ext, etc.
156
+ base, ext = os.path.splitext(output_file)
157
+ if ext[0] == '.':
158
+ ext = ext[1:]
159
+
160
+ # if ext is already a .0.ext, we need to split it again
161
+ i = 1
162
+ if '.' in ext and ext.split('.')[0].isdigit():
163
+ i = int(ext.split('.')[0]) + 1
164
+ ext = '.'.join(ext.split('.')[1:])
165
+
166
+ while os.path.exists(f"{base}.{i}.{ext}"):
167
+ i += 1
168
+
169
+ output_file = f"{base}.{i}.{ext}"
170
+
171
+ print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {output_file}")
172
+ if len(sorted_observations) > 10_000:
173
+ print("This may take a while...")
174
+ print("-----------------------------------------------------\n")
175
+
176
+ if output_file.endswith('.nc'):
177
+ first_obs_timestamp = float(sorted_observations[0]['timestamp'])
178
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, output_file)
179
+
180
+ elif output_file.endswith('.json'):
181
+ with open(output_file, 'w', encoding='utf-8') as f:
182
+ json.dump(sorted_observations, f, indent=4)
183
+
184
+ elif output_file.endswith('.csv'):
185
+ with open(output_file, mode='w', newline='') as file:
186
+ writer = csv.DictWriter(file, fieldnames=csv_headers)
187
+ writer.writeheader()
188
+ writer.writerows(sorted_observations)
189
+
190
+ elif output_file.endswith('.little_r'):
191
+ little_r_records = format_little_r(sorted_observations)
192
+ with open(output_file, 'w') as file:
193
+ file.write('\n'.join(little_r_records))
194
+
195
+ print(f"Saved {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {output_file}")
196
+
197
+
198
+ def save_observations_batch_in_buckets(sorted_observations, output_format, output_dir, bucket_hours=6.0, csv_headers=None, custom_save=None, prevent_overwrites=False):
199
+ if output_dir:
200
+ os.makedirs(output_dir, exist_ok=True)
201
+ print(f"Files will be saved to {output_dir}")
202
+ else:
203
+ print(f"Files will be saved to {os.getcwd()}")
204
+
205
+
206
+ by_mission = {}
207
+ mission_names = {}
208
+ for observation in sorted_observations:
209
+ mission_id = observation['mission_id']
210
+ if mission_id not in by_mission:
211
+ by_mission[mission_id] = []
212
+ mission_names[mission_id] = observation.get('mission_name', mission_id)
213
+
214
+ by_mission[mission_id].append(observation)
215
+
216
+ for mission_id, accumulated_observations in by_mission.items():
217
+ mission_name = mission_names[mission_id]
218
+ start_index = 0
219
+ earliest_time = accumulated_observations[0]['timestamp']
220
+ curtime = earliest_time - earliest_time % (bucket_hours * 60 * 60)
221
+
222
+ for i in range(len(accumulated_observations)):
223
+ segment = None
224
+ if accumulated_observations[i]['timestamp'] - curtime > bucket_hours * 60 * 60:
225
+ segment = accumulated_observations[start_index:i]
226
+
227
+ if i == len(accumulated_observations) - 1:
228
+ segment = accumulated_observations[start_index:]
229
+
230
+ if segment is None:
231
+ continue
232
+
233
+ bucket_start = datetime.fromtimestamp(curtime, tz=timezone.utc)
234
+
235
+ file_name = f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh" % (
236
+ bucket_start.year, bucket_start.month, bucket_start.day,
237
+ bucket_start.hour, bucket_hours)
238
+
239
+ extension = f".{output_format}"
240
+ if output_format == 'netcdf':
241
+ extension = '.nc'
242
+
243
+ output_file = os.path.join(output_dir or '.', file_name + extension)
244
+ if custom_save is not None:
245
+ custom_save(segment, output_file)
246
+ else:
247
+ save_observations_to_file(segment, output_file, csv_headers=csv_headers, prevent_overwrites=prevent_overwrites)
248
+
249
+ start_index = i
250
+ curtime += timedelta(hours=bucket_hours).seconds
251
+
252
+
253
+ def get_observations_core(api_args, csv_headers, get_page, start_time=None, end_time=None, output_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None, custom_save=None, exit_at_end=True):
662
254
  """
663
- Fetches super observations between a start time and an optional end time and saves to files in specified format.
255
+ Fetches observations or superobservations between a start time and an optional end time and saves to files in specified format.
664
256
  Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
665
257
  For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
666
258
 
667
259
  Args:
260
+ api_args (dict): Arguments to pass to the API endpoint.
261
+ csv_headers (list): Headers for CSV files.
262
+ get_page (callable): Function to fetch a page of observations.
668
263
  start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
669
264
  representing the starting time of fetching data.
670
265
  end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
671
266
  representing the end time of fetching data. If not provided, current time is used as end time.
672
- interval (int): Optional. Interval in seconds between polls when a empty page is received (default: 60)
673
- save_to_file (str): Saves all data to a single file instead of bucketing.
267
+
268
+
269
+
270
+ output_file (str): Saves all data to a single file instead of bucketing.
674
271
  Supported formats are '.csv', '.json', '.little_r' and '.nc'
675
272
  bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
676
273
  output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
677
274
  output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
678
275
  callback (callable): Optional callback function that receives (super observations, metadata) before saving.
679
276
  This allows custom processing or saving in custom formats.
277
+ custom_save (callable): Optional function to save observations in a custom format.
278
+ exit_at_end (bool): Whether to exit after fetching all observations or keep polling.
680
279
  """
280
+ if output_format and not custom_save:
281
+ verify_observations_output_format(output_format)
282
+
283
+ if output_file and not custom_save:
284
+ verify_observations_output_format(output_file.split('.')[-1])
681
285
 
682
- start_time = to_unix_timestamp(start_time)
286
+ # When we don't clear batches, we can safely overwrite the output files; this is nice
287
+ # However, it also holds everything in memory, so we should only do this when we're not going to run indefinitely
288
+ clear_batches = not exit_at_end
289
+ batch_size = 10_000
290
+ if not batch_size: # save less frequently
291
+ batch_size = 100_000
683
292
 
684
- if end_time:
293
+ if start_time is not None:
294
+ start_time = to_unix_timestamp(start_time)
295
+
296
+ if end_time is not None:
685
297
  end_time = to_unix_timestamp(end_time)
686
- else:
687
- end_time = int(datetime.now().timestamp())
688
-
689
- # Supported formats for saving into separate files:
690
- # - csv (default)
691
- # - little_r
692
- # - json
693
- # - netcdf
694
- if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
695
- print("Please use one of the following formats:")
696
- print(" - json")
697
- print(" - csv")
698
- print(" - little_r")
699
- print(" - netcdf")
700
- return
701
298
 
702
- # Supported formats for saving into a single file:
703
- # NOTE: for poll_super_observations we handle .csv saving within poll_super_observations and not using save_csv_json
704
- # - .csv
705
- # - .json
706
- # - .little_r
707
- # - .nc
708
- if save_to_file and not save_to_file.endswith(('.json', '.csv', '.little_r', '.nc')):
709
- print("Please use one of the following formats:")
710
- print(" - .json")
711
- print(" - .csv")
712
- print(" - .little_r")
713
- print(" - .nc")
714
- return
299
+ def save_with_context(observations_batch):
300
+ save_observations_batch(
301
+ observations_batch,
302
+ output_file=output_file,
303
+ output_format=output_format,
304
+ output_dir=output_dir,
305
+ start_time=start_time,
306
+ end_time=end_time,
307
+ bucket_hours=bucket_hours,
308
+ csv_headers=csv_headers,
309
+ custom_save=custom_save,
310
+ prevent_overwrites=clear_batches
311
+ )
715
312
 
716
- # Convert start_time to datetime
717
- start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
313
+ result = iterate_through_observations(get_page, api_args, callback=callback, batch_callback=save_with_context, exit_at_end=exit_at_end, clear_batches=clear_batches, batch_size=batch_size)
314
+ if isinstance(result, int):
315
+ print(f"Processed {result} observations")
718
316
 
719
- # Calculate first center time that's after start_time
720
- hours_since_day_start = start_dt.hour + start_dt.minute / 60
721
- bucket_number = hours_since_day_start // bucket_hours
722
- first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
317
+ return result
723
318
 
724
319
 
725
- # Headers for CSV files
726
- headers = [
727
- "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
728
- "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
729
- ]
320
+ def iterate_through_observations(get_page, args, callback=None, batch_callback=None, exit_at_end=True, batch_size=10_000, clear_batches=True):
321
+ """
322
+ Repeatedly calls `get_page` with `args`
323
+ For each page fetched, it calls `callback` with the full response
324
+ Every `batch_size` observations fetched, it calls `batch_callback` with the batched observations (if provided)
325
+ Returns an array of all observations fetched if no batch_callback is provided
730
326
 
731
- if save_to_file:
732
- all_observations = {}
733
- else:
734
- buckets = {}
735
-
736
- # Initialize the polling loop
737
- current_timestamp = start_time
738
- has_next_page = True
739
- fetced_so_far = 0
740
-
741
- print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
742
- print("-----------------------------------------------------")
743
-
744
- while has_next_page:
745
- try:
746
- # Fetch observations
747
- observations_page = get_super_observations_page(
748
- since=current_timestamp,
749
- min_time=start_time,
750
- max_time=end_time,
751
- include_ids=True,
752
- include_mission_name=True
753
- )
754
-
755
- if observations_page is None:
756
- print("\n----------------------------------------------------------------------")
757
- print(f"Received null response from API. Retrying in {interval} seconds ...")
758
- print("----------------------------------------------------------------------")
759
- time.sleep(interval)
760
- continue
327
+ Args:
328
+ get_page (callable): Function to fetch a page of observations
329
+ args (dict): Arguments to pass to `get_page`
330
+ callback (callable): Function to call with each page of observations
331
+ batch_callback (callable): Function to call with a batch of observations
332
+ exit_at_end (bool): Whether to exit after fetching all observations or keep polling
333
+ batch_size (int): Number of observations to accumulate before calling `batch_callback`
334
+ clear_batches (bool): Whether to clear the batched observations after calling `batch_callback`
335
+ """
761
336
 
762
- observations = observations_page.get('observations', [])
763
- fetced_so_far = fetced_so_far + len(observations)
764
- print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
765
- print(f"Fetched {fetced_so_far} super observations")
766
- print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
767
- print("-----------------------------------------------------")
768
-
769
- # Invoke the callback with fetched super observations
770
- if callback:
771
- print("--------\nCallback\n--------")
772
- callback(observations)
773
-
774
- for obs in observations:
775
- if 'mission_name' not in obs:
776
- print("Warning: got an super observation without a mission name")
777
- continue
778
-
779
- timestamp = obs.get('timestamp')
780
- if not timestamp:
781
- continue
782
-
783
- try:
784
- obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
785
- except (OSError, ValueError, TypeError, OverflowError):
786
- continue
787
-
788
- mission_name = obs.get('mission_name', 'Unknown')
789
- obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
790
-
791
- processed_obs = {}
792
- for header in headers:
793
- value = obs.get(header)
794
- if value is None or value == '' or (isinstance(value, str) and not value.strip()):
795
- processed_obs[header] = 'None'
796
- else:
797
- processed_obs[header] = value
798
-
799
- obs_id = f"{timestamp}_{mission_name}"
800
-
801
- if save_to_file:
802
- all_observations[obs_id] = processed_obs
803
- else:
804
- if obs_time >= start_dt: # Only process observations after start time
805
- hours_diff = (obs_time - first_center).total_seconds() / 3600
806
- bucket_index = floor(hours_diff / bucket_hours)
807
- bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
808
- bucket_end = bucket_center + timedelta(hours=bucket_hours)
809
-
810
- if obs_time <= bucket_end: # Include observations up to the end of the bucket
811
- bucket_key = (bucket_center, mission_name)
812
- if bucket_key not in buckets:
813
- buckets[bucket_key] = {}
814
- buckets[bucket_key][obs_id] = processed_obs
815
-
816
- # Update pagination
817
- next_timestamp = observations_page.get('next_since')
818
- has_next_page = observations_page.get('has_next_page', False)
819
-
820
- if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
821
- print("-----------------------------------------------------\n")
822
- print("Fetching complete.")
823
- print("\n-----------------------------------------------------")
824
- break
337
+ batched_observations = []
338
+ since = args.get('since', 0)
339
+ processed_count = 0
825
340
 
826
- current_timestamp = next_timestamp
827
-
828
- except KeyboardInterrupt:
829
- print("\n\n\U0001F6D1 Received interrupt, stopping...")
830
- print("-----------------------------------------------------")
831
- print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
832
- print("-----------------------------------------------------")
833
- exit(3)
834
- except Exception as e:
835
- print(f"Error occurred: {e}")
836
- exit(1001)
837
-
838
- # Save data to a single file
839
- if save_to_file:
840
- # Create directory path if it doesn't exist
841
- directory = os.path.dirname(save_to_file)
842
- if directory and not os.path.isdir(directory):
843
- os.makedirs(directory, exist_ok=True)
844
- filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
845
- if float(obs['timestamp']) >= start_time}
846
- # Sort by timestamp
847
- sorted_observations = dict(sorted(filtered_observations.items(),
848
- key=lambda x: float(x[1]['timestamp'])))
849
-
850
- print(f"Saving {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
851
- print("This may take a while...")
852
- print("-----------------------------------------------------\n")
853
-
854
- if save_to_file.endswith('.nc'):
855
- first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
856
- convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
857
-
858
- elif save_to_file.endswith('.json'):
859
- with open(save_to_file, 'w', encoding='utf-8') as f:
860
- json.dump(sorted_observations, f, indent=4)
861
-
862
- elif save_to_file.endswith('.csv'):
863
- with open(save_to_file, mode='w', newline='') as file:
864
- writer = csv.DictWriter(file, fieldnames=headers)
865
- writer.writeheader()
866
- writer.writerows(sorted_observations.values())
867
-
868
- elif save_to_file.endswith('.little_r'):
869
- little_r_records = format_little_r(list(sorted_observations.items()))
870
- with open(save_to_file, 'w') as file:
871
- file.write('\n'.join(little_r_records))
872
-
873
- print(f"Saved {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
874
-
875
- # Save data to multiple file
876
- elif output_format:
877
- # Create output directory if specified
878
- if output_dir:
879
- os.makedirs(output_dir, exist_ok=True)
880
- print(f"Files will be saved to {output_dir}")
341
+ if args.get('min_time') is not None:
342
+ args['min_time'] = to_unix_timestamp(args['min_time'])
343
+ if since == 0:
344
+ since = args['min_time']
345
+
346
+ if args.get('max_time') is not None:
347
+ args['max_time'] = to_unix_timestamp(args['max_time'])
348
+
349
+ while True:
350
+ args = {**args, 'since': since}
351
+ response = get_page(**args)
352
+ if not response:
353
+ print("Received null response from API. Retrying in 10 seconds...")
354
+ time.sleep(10)
355
+ continue
356
+
357
+ observations = response.get('observations', [])
358
+
359
+ if callback:
360
+ callback(response)
881
361
  else:
882
- print(f"Files will be saved to {os.getcwd()}")
362
+ since_timestamp = since
363
+ if since_timestamp > 4_000_000_000: # in nanoseconds rather than seconds
364
+ since_timestamp /= 1_000_000_000
365
+ since_dt = datetime.fromtimestamp(since_timestamp, timezone.utc)
366
+ print(f"Fetched page with {len(observations)} observation(s) updated {since_dt} or later")
883
367
 
884
- print(f"Processing {fetced_so_far} super {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
885
- print("This may take a while...")
886
- print("-----------------------------------------------------\n")
887
- # Track statistics per mission
888
- mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
889
- total_observations_written = 0
890
-
891
- # Save bucketed data
892
- for (bucket_center, mission_name), observations in buckets.items():
893
- if observations:
894
- # Format hour to be the actual bucket center
895
- bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
896
-
897
- # Generate file name based on output format
898
- file_name_format = {
899
- 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
900
- 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
901
- 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
902
- 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
903
- }
904
- file_name = file_name_format[output_format] % (
905
- bucket_center.year, bucket_center.month, bucket_center.day,
906
- bucket_hour, bucket_hours)
907
-
908
- output_file = os.path.join(output_dir or '.', file_name)
909
-
910
- # Sort observations by timestamp within each bucket
911
- sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
912
-
913
- if output_format == 'netcdf':
914
- convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
915
-
916
- elif output_format == 'csv':
917
- with open(output_file, mode='w', newline='') as file:
918
- writer = csv.DictWriter(file, fieldnames=headers)
919
- writer.writeheader()
920
- writer.writerows(sorted_obs)
921
-
922
- elif output_format == 'json':
923
- sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
924
- with open(output_file, 'w', encoding='utf-8') as file:
925
- json.dump(sorted_obs_dict, file, indent=4)
926
-
927
- elif output_format == 'little_r':
928
- little_r_records = format_little_r(sorted_obs)
929
- with open(output_file, 'w') as file:
930
- file.write('\n'.join(little_r_records))
931
- total_observations_written += len(observations)
932
-
933
- # Update statistics
934
- if mission_name not in mission_stats:
935
- mission_stats[mission_name] = {'files': 0, 'observations': 0}
936
- mission_stats[mission_name]['files'] += 1
937
- mission_stats[mission_name]['observations'] += len(observations)
938
- # Print total super observations written
939
- print(f"Total super {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
940
- print("-----------------------------------------------------")
941
-
942
- # Print summary for each mission
943
- for mission_name, stats in mission_stats.items():
944
- print(f"Mission {mission_name}: Saved {stats['observations']} super {'observation' if stats['observations'] == 1 else 'observations'} across {stats['files']} {'file' if stats['files'] == 1 else 'files'}")
945
-
946
- print("-----------------------------------------------------")
947
- print("All super observations have been processed and saved.")
948
-
949
- def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
368
+ batched_observations.extend(observations)
369
+
370
+ processed_count += len(observations)
371
+
372
+ if batch_callback and (len(batched_observations) >= batch_size or not response['has_next_page']):
373
+ batch_callback(batched_observations)
374
+ if clear_batches:
375
+ batched_observations = []
376
+
377
+ if not response['has_next_page']:
378
+ print("No more data available.")
379
+ if exit_at_end:
380
+ break
381
+
382
+ time.sleep(60)
383
+ continue
384
+
385
+ since = response['next_since']
386
+
387
+ if batch_callback and len(batched_observations) > 0:
388
+ batch_callback(batched_observations)
389
+ if clear_batches:
390
+ batched_observations = []
391
+
392
+ if batch_callback:
393
+ return processed_count
394
+ else:
395
+ return batched_observations
396
+
397
+
398
+ def verify_observations_output_format(output_format):
399
+ valid_formats = ['json', 'csv', 'little_r', 'netcdf', 'nc']
400
+ if output_format in valid_formats:
401
+ return True
402
+
403
+ print("Please use one of the following formats:")
404
+ for fmt in valid_formats:
405
+ print(f" - {fmt}")
406
+
407
+ exit(1)
408
+
409
+ def get_observations(start_time, end_time=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, output_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None, custom_save=None, exit_at_end=True):
950
410
  """
951
- Continuously polls for super observations and saves to files in specified format.
952
- Will run indefinitely until interrupted.
411
+ Fetches observations between a start time and an optional end time and saves to files in specified format.
412
+ Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
413
+ For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
953
414
 
954
415
  Args:
955
- start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
956
- interval (int): Polling interval in seconds when no data is received (default: 60)
957
- bucket_hours (float): Size of time buckets in hours (default: 6.0)
958
- output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
959
- output_dir (str): Directory for bucket files (default: current directory)
960
- callback (callable): Optional callback for data processing
416
+ start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
417
+ representing the starting time of fetching data.
418
+ end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
419
+ representing the end time of fetching data. If not provided, current time is used as end time.
420
+
421
+ include_updated_at (bool): Include update timestamps in response.
422
+ mission_id (str): Filter observations by mission ID.
423
+ min_latitude (float): Minimum latitude boundary.
424
+ max_latitude (float): Maximum latitude boundary.
425
+ min_longitude (float): Minimum longitude boundary.
426
+ max_longitude (float): Maximum longitude boundary.
427
+
428
+ output_file (str): Saves all data to a single file instead of bucketing.
429
+ Supported formats are '.csv', '.json', '.little_r' and '.nc'
430
+ bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
431
+ output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
432
+ output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
433
+ callback (callable): Optional callback function that receives (super observations, metadata) before saving.
434
+ This allows custom processing or saving in custom formats.
435
+ custom_save (callable): Optional function to save observations in a custom format.
436
+ exit_at_end (bool): Whether to exit after fetching all observations or keep polling.
437
+ """
438
+
439
+ # Headers for CSV files
440
+ csv_headers = [
441
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
442
+ "pressure", "specific_humidity", "speed_u", "speed_v", "temperature", "mission_name", "mission_id"
443
+ ]
444
+
445
+ api_args = {
446
+ 'min_time': start_time,
447
+ 'max_time': end_time,
448
+ 'min_latitude': min_latitude,
449
+ 'max_latitude': max_latitude,
450
+ 'min_longitude': min_longitude,
451
+ 'max_longitude': max_longitude,
452
+ 'include_updated_at': include_updated_at,
453
+ 'mission_id': mission_id,
454
+ 'include_ids': True,
455
+ 'include_mission_name': True
456
+ }
457
+
458
+ return get_observations_core(api_args, csv_headers, get_page=get_observations_page, start_time=start_time, end_time=end_time, output_file=output_file, bucket_hours=bucket_hours, output_format=output_format, output_dir=output_dir, callback=callback, custom_save=custom_save, exit_at_end=exit_at_end)
459
+
460
+ def poll_observations(**kwargs):
461
+ """
462
+ Continuously polls for observations and saves to files in specified format.
463
+ Will run indefinitely until interrupted.
464
+ Same as get_observations, but runs in an infinite loop.
961
465
  """
466
+
962
467
  # Print warning about infinite loop
963
468
  print(" ___________________________________________________________________")
964
469
  print("| WARNING \U000026A0\U0000FE0F |")
@@ -966,217 +471,78 @@ def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_fo
966
471
  print("| |")
967
472
  print("| Press Ctrl + C anytime to exit. |")
968
473
  print("|___________________________________________________________________|\n\n")
969
- time.sleep(4)
970
-
971
- start_time = to_unix_timestamp(start_time)
972
474
 
973
- if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
974
- print("Please use one of the following formats:")
975
- print(" - json\n - csv\n - little_r\n - netcdf")
976
- return
475
+ get_observations(**kwargs, exit_at_end=False)
977
476
 
978
- if output_dir:
979
- os.makedirs(output_dir, exist_ok=True)
980
- print(f"\U0001F4C1 Files will be saved to {output_dir}")
981
- else:
982
- print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
983
-
984
- # Convert start_time to datetime
985
- start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
986
-
987
- # Calculate first center time that's after start_time
988
- hours_since_day_start = start_dt.hour + start_dt.minute / 60
989
- bucket_number = hours_since_day_start // bucket_hours
990
- first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
477
+ def get_super_observations(start_time, end_time=None, mission_id=None, include_updated_at=True, output_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None, custom_save=None, exit_at_end=True):
478
+ """
479
+ Fetches super observations between a start time and an optional end time and saves to files in specified format.
480
+ Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
481
+ For example, for 6-hour buckets centered on 00 UTC, the start time should be 21 UTC of the previous day.
991
482
 
992
- headers = [
483
+ Args:
484
+ start_time (str): A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
485
+ representing the starting time of fetching data.
486
+ end_time (str): Optional. A date string, supporting formats YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM and ISO strings,
487
+ representing the end time of fetching data. If not provided, current time is used as end time.
488
+ mission_id (str): Filter observations by mission ID.
489
+ include_updated_at (bool): Include update timestamps in response.
490
+ output_file (str): Saves all data to a single file instead of bucketing.
491
+ Supported formats are '.csv', '.json', '.little_r' and '.nc'
492
+ bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
493
+ output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
494
+ output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
495
+ callback (callable): Optional callback function that receives (super observations, metadata) before saving.
496
+ This allows custom processing or saving in custom formats.
497
+ custom_save (callable): Optional function to save observations in a custom format.
498
+ exit_at_end (bool): Whether to exit after fetching all observations or keep polling.
499
+ """
500
+ csv_headers = [
993
501
  "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
994
502
  "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
995
503
  ]
996
504
 
997
- buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
998
- current_timestamp = start_time
999
- fetched_so_far = 0
1000
- mission_stats = {}
1001
-
1002
- print(f"Starting continuous super observations polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
1003
- print(f"Polling interval: {interval} seconds")
1004
- print("-----------------------------------------------------")
1005
-
1006
- try:
1007
- while True:
1008
- observations_page = get_super_observations_page(
1009
- since=current_timestamp,
1010
- min_time=start_time,
1011
- include_ids=True,
1012
- include_mission_name=True
1013
- )
1014
-
1015
- if observations_page is None:
1016
- print(f"\nNull response from API. Retrying in {interval} seconds ...")
1017
- time.sleep(interval)
1018
- continue
505
+ api_args = {
506
+ 'min_time': start_time,
507
+ 'max_time': end_time,
508
+ 'mission_id': mission_id,
509
+ 'include_updated_at': include_updated_at,
510
+ 'include_ids': True,
511
+ 'include_mission_name': True
512
+ }
1019
513
 
1020
- observations = observations_page.get('observations', [])
1021
-
1022
- # Invoke the callback with fetched super observations
1023
- if callback:
1024
- print("--------\nCallback\n--------")
1025
- callback(observations)
1026
-
1027
- if observations:
1028
- fetched_so_far += len(observations)
1029
- print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
1030
- print(f"Fetched {fetched_so_far} super observations")
1031
- print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
1032
- print("-----------------------------------------------------")
1033
-
1034
- for obs in observations:
1035
- if 'mission_name' not in obs:
1036
- continue
1037
-
1038
- timestamp = obs.get('timestamp')
1039
- if not timestamp:
1040
- continue
1041
-
1042
- try:
1043
- obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
1044
- except (OSError, ValueError, TypeError, OverflowError):
1045
- continue
1046
-
1047
- mission_name = obs.get('mission_name', 'Unknown')
1048
- obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
1049
-
1050
- processed_obs = {
1051
- header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
1052
- for header in headers
1053
- }
1054
-
1055
- obs_id = f"{timestamp}_{mission_name}"
1056
-
1057
- if obs_time >= start_dt:
1058
- hours_diff = (obs_time - first_center).total_seconds() / 3600
1059
- bucket_index = floor(hours_diff / bucket_hours)
1060
- bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
1061
- bucket_end = bucket_center + timedelta(hours=bucket_hours)
1062
-
1063
- if obs_time <= bucket_end:
1064
- bucket_key = (bucket_center, mission_name)
1065
-
1066
- # Initialize bucket if needed
1067
- if bucket_key not in buckets:
1068
- buckets[bucket_key] = {
1069
- 'data': {},
1070
- 'last_write': 0,
1071
- 'data_hash': ''
1072
- }
1073
-
1074
- # Update bucket data
1075
- buckets[bucket_key]['data'][obs_id] = processed_obs
1076
-
1077
- # Track statistics
1078
- if mission_name not in mission_stats:
1079
- mission_stats[mission_name] = {'files': set(), 'observations': 0}
1080
- mission_stats[mission_name]['observations'] += 1
1081
-
1082
- # Calculate new data hash
1083
- sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
1084
- data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
1085
-
1086
- # Check if we should write the bucket
1087
- current_time = datetime.now(timezone.utc)
1088
- time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
1089
- data_changed = data_hash != buckets[bucket_key]['data_hash']
1090
-
1091
- # Write if it's been more than interval seconds since last write OR if data has changed
1092
- if (time_since_last_write >= interval or data_changed) and output_format:
1093
- bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
1094
-
1095
- file_name_format = {
1096
- 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
1097
- 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
1098
- 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
1099
- 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
1100
- }
1101
-
1102
- file_name = file_name_format[output_format] % (
1103
- bucket_center.year, bucket_center.month, bucket_center.day,
1104
- bucket_hour, bucket_hours)
1105
-
1106
- output_file = os.path.join(output_dir or '.', file_name)
1107
- sorted_obs = [obs for _, obs in sorted_data]
1108
-
1109
- # Write the file based on format
1110
- try:
1111
- if output_format == 'netcdf':
1112
- convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
1113
- elif output_format == 'csv':
1114
- with open(output_file, mode='w', newline='') as file:
1115
- writer = csv.DictWriter(file, fieldnames=headers)
1116
- writer.writeheader()
1117
- writer.writerows(sorted_obs)
1118
- elif output_format == 'json':
1119
- sorted_obs_dict = {k: v for k, v in sorted_data}
1120
- with open(output_file, 'w', encoding='utf-8') as file:
1121
- json.dump(sorted_obs_dict, file, indent=4)
1122
- elif output_format == 'little_r':
1123
- little_r_records = format_little_r(sorted_obs)
1124
- with open(output_file, 'w') as file:
1125
- file.write('\n'.join(little_r_records))
1126
-
1127
- buckets[bucket_key]['last_write'] = current_time.timestamp()
1128
- buckets[bucket_key]['data_hash'] = data_hash
1129
- mission_stats[mission_name]['files'].add(output_file)
1130
- except Exception as e:
1131
- print(f"Error writing bucket file {file_name}: {str(e)}")
1132
-
1133
- # Clean up old buckets
1134
- current_time = datetime.now(timezone.utc)
1135
- buckets = {
1136
- k: v for k, v in buckets.items()
1137
- if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
1138
- }
1139
-
1140
- next_timestamp = observations_page.get('next_since')
1141
- has_next_page = observations_page.get('has_next_page', False)
1142
-
1143
- if next_timestamp and next_timestamp > current_timestamp:
1144
- current_timestamp = next_timestamp
1145
- elif not has_next_page:
1146
- print("-----------------------------------------------------")
1147
- print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
1148
- print("-----------------------------------------------------")
1149
- time.sleep(interval)
1150
- continue
514
+ return get_observations_core(api_args, csv_headers, get_page=get_super_observations_page, start_time=start_time, end_time=end_time, output_file=output_file, bucket_hours=bucket_hours, output_format=output_format, output_dir=output_dir, callback=callback, custom_save=custom_save, exit_at_end=exit_at_end)
515
+
516
+ def poll_super_observations(**kwargs):
517
+ """
518
+ Continuously polls for super observations and saves to files in specified format.
519
+ Will run indefinitely until interrupted.
520
+ Same as get_super_observations, but runs in an infinite loop.
521
+ """
522
+
523
+ # Print warning about infinite loop
524
+ print(" ___________________________________________________________________")
525
+ print("| WARNING \U000026A0\U0000FE0F |")
526
+ print("| You are entering an endless loop. |")
527
+ print("| |")
528
+ print("| Press Ctrl + C anytime to exit. |")
529
+ print("|___________________________________________________________________|\n\n")
530
+
531
+ get_super_observations(**kwargs, exit_at_end=False)
1151
532
 
1152
- if not observations:
1153
- print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
1154
- print("-----------------------------------------------------")
1155
- time.sleep(interval)
1156
-
1157
- except KeyboardInterrupt:
1158
- print("\n\U0001F6D1 Received interrupt, stopping...")
1159
- print("-----------------------------------------------------")
1160
- for mission_name, stats in mission_stats.items():
1161
- print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
1162
- except Exception as e:
1163
- print(f"Error occurred: {str(e)}")
1164
- exit(1001)
1165
- finally:
1166
- print("-----------------------------------------------------")
1167
- print("Finished processing super observations.")
1168
533
 
1169
534
  # ------------
1170
535
  # METADATA
1171
536
  # ------------
1172
- def get_flying_missions(cli=None, save_to_file=None):
537
+ def get_flying_missions(output_file=None, print_results=False):
1173
538
  """
1174
539
  Retrieves a list of currently flying missions.
1175
540
  In CLI mode, displays missions in a formatted table.
1176
541
 
1177
542
  Args:
1178
- save_to_file (str): Optional path to save the response data.
543
+ output_file (str): Optional path to save the response data.
1179
544
  If provided, saves the data in CSV or JSON format.
545
+ print_results (bool): Whether to print the results in the CLI.
1180
546
 
1181
547
  Returns:
1182
548
  dict: The API response containing list of flying missions.
@@ -1187,34 +553,47 @@ def get_flying_missions(cli=None, save_to_file=None):
1187
553
  flying_missions = flying_missions_response.get("missions", [])
1188
554
 
1189
555
  # Display currently flying missions only if we are in cli and we don't save info in file
1190
- if flying_missions and cli and not save_to_file:
1191
- print("Currently flying missions:\n")
1192
-
1193
- # Define headers and data
1194
- headers = ["Index", "Mission ID", "Mission Name"]
1195
- rows = [
1196
- [str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
1197
- for i, mission in enumerate(flying_missions, start=1)
1198
- ]
1199
-
1200
- # Kinda overkill | but it's a good practice if we ever change missions naming convention
1201
- # Calculate column widths
1202
- col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
1203
-
1204
- # Display table
1205
- print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
1206
- print("".join("-" * col_width for col_width in col_widths))
1207
- for row in rows:
1208
- print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
1209
-
1210
- if save_to_file:
1211
- save_csv_json(save_to_file, flying_missions_response, csv_data_key='missions')
556
+ if print_results:
557
+ if flying_missions:
558
+ print("Currently flying missions:\n")
559
+
560
+ # Define headers and data
561
+ headers = ["Index", "Mission ID", "Mission Name"]
562
+ rows = [
563
+ [str(i), mission.get("id", "N/A"), mission.get("name", "Unnamed Mission")]
564
+ for i, mission in enumerate(flying_missions, start=1)
565
+ ]
566
+
567
+ # Kinda overkill | but it's a good practice if we ever change missions naming convention
568
+ # Calculate column widths
569
+ col_widths = [max(len(cell) for cell in col) + 2 for col in zip(headers, *rows)]
570
+
571
+ # Display table
572
+ print("".join(f"{headers[i]:<{col_widths[i]}}" for i in range(len(headers))))
573
+ print("".join("-" * col_width for col_width in col_widths))
574
+ for row in rows:
575
+ print("".join(f"{row[i]:<{col_widths[i]}}" for i in range(len(row))))
576
+ else:
577
+ print("No missions are currently flying.")
578
+
579
+ if output_file:
580
+ save_arbitrary_response(output_file, flying_missions_response, csv_data_key='missions')
1212
581
 
1213
- return flying_missions_response
582
+ return flying_missions
583
+
1214
584
 
1215
- def get_mission_launch_site(mission_id=None, save_to_file=None):
585
+ def get_mission_launch_site(mission_id=None, output_file=None, print_result=False):
1216
586
  """
1217
587
  Retrieves launch site information for a specified mission.
588
+
589
+ Args:
590
+ mission_id (str): The ID of the mission to fetch the launch site for.
591
+ output_file (str): Optional path to save the response data.
592
+ If provided, saves the data in CSV format.
593
+ print_result (bool): Whether to print the results in the CLI.
594
+
595
+ Returns:
596
+ dict: The API response containing the launch site information.
1218
597
  """
1219
598
  if not mission_id:
1220
599
  print("Must provide mission ID")
@@ -1223,34 +602,33 @@ def get_mission_launch_site(mission_id=None, save_to_file=None):
1223
602
  url = f"{DATA_API_BASE_URL}/missions/{mission_id}/launch_site.json"
1224
603
  response = make_api_request(url)
1225
604
 
1226
- if response and not save_to_file:
605
+ if response and print_result:
1227
606
  launch_site = response.get('launch_site')
1228
607
  if isinstance(launch_site, dict):
1229
- site_name = LAUNCH_SITES.get(launch_site.get('id'), 'N/A')
1230
608
  print("Mission launch site\n")
1231
- print(f"{'Location':<12} {site_name}")
609
+ print(f"{'ID':<12} {launch_site.get('id')}")
1232
610
  print(f"{'Latitude':<12} {launch_site.get('latitude', 'N/A')}")
1233
611
  print(f"{'Longitude':<12} {launch_site.get('longitude', 'N/A')}")
1234
612
  else:
1235
613
  print("Unable to display launch site details - unexpected format")
1236
614
 
1237
- if save_to_file:
1238
- save_csv_json(save_to_file, response, csv_data_key='launch_site')
615
+ if output_file:
616
+ save_arbitrary_response(output_file, response, csv_data_key='launch_site')
1239
617
 
1240
- return response
618
+ return response.get('launch_site')
1241
619
 
1242
- def get_predicted_path(mission_id=None, save_to_file=None):
620
+ def get_predicted_path(mission_id=None, output_file=None):
1243
621
  """
1244
622
  Fetches the predicted flight path for a given mission.
1245
623
  Displays currently flying missions if the provided mission ID is invalid.
1246
624
 
1247
625
  Args:
1248
626
  mission_id (str): The ID of the mission to fetch the prediction for.
1249
- save_to_file (str): Optional path to save the response data.
627
+ output_file (str): Optional path to save the response data.
1250
628
  If provided, saves the data in CSV format.
1251
629
 
1252
630
  Returns:
1253
- dict: The API response containing the predicted flight path data.
631
+ list: The API response containing the predicted flight path data.
1254
632
  """
1255
633
  if not mission_id:
1256
634
  print("To get the predicted flight path for a given mission you must provide a mission ID.")
@@ -1290,7 +668,7 @@ def get_predicted_path(mission_id=None, save_to_file=None):
1290
668
  url = f"{DATA_API_BASE_URL}/missions/{mission_id}/prediction.json"
1291
669
  response = make_api_request(url)
1292
670
 
1293
- if save_to_file:
1294
- save_csv_json(save_to_file, response, csv_data_key='prediction')
671
+ if output_file:
672
+ save_arbitrary_response(output_file, response, csv_data_key='prediction')
1295
673
 
1296
- return response
674
+ return response.get('prediction')