windborne 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windborne/data_api.py CHANGED
@@ -7,10 +7,19 @@ from math import floor
7
7
  from datetime import datetime, timezone, timedelta
8
8
  import csv
9
9
  import json
10
+ import hashlib
10
11
 
11
- def get_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
12
+ # UTC should be used across the lib
13
+
14
+ # ------------
15
+ # CORE RESOURCES
16
+ # ------------
17
+
18
+ # Observations
19
+ # ------------
20
+ def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
12
21
  """
13
- Retrieves observations based on specified filters including geographical bounds.
22
+ Retrieves observations page based on specified filters including geographical bounds.
14
23
 
15
24
  Args:
16
25
  since (str): Filter observations after this timestamp.
@@ -70,52 +79,7 @@ def get_observations(since=None, min_time=None, max_time=None, include_ids=None,
70
79
 
71
80
  return response
72
81
 
73
- def get_super_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
74
- """
75
- Retrieves super observations based on specified filters.
76
-
77
- Args:
78
- since (str): Filter observations after this timestamp.
79
- min_time (str): Minimum timestamp for observations.
80
- max_time (str): Maximum timestamp for observations.
81
- include_ids (bool): Include observation IDs in response.
82
- include_mission_name (bool): Include mission names in response.
83
- include_updated_at (bool): Include update timestamps in response.
84
- mission_id (str): Filter observations by mission ID.
85
- save_to_file (str): Optional path to save the response data.
86
- If provided, saves the data in CSV format.
87
-
88
- Returns:
89
- dict: The API response containing filtered super observations.
90
- """
91
-
92
- url = f"{DATA_API_BASE_URL}/super_observations.json"
93
-
94
- params = {}
95
- if since:
96
- params["since"] = to_unix_timestamp(since)
97
- if min_time:
98
- params["min_time"] = to_unix_timestamp(min_time)
99
- if max_time:
100
- params["max_time"] = to_unix_timestamp(max_time)
101
- if mission_id:
102
- params["mission_id"] = mission_id
103
- if include_ids:
104
- params["include_ids"] = True
105
- if include_mission_name:
106
- params["include_mission_name"] = True
107
- if include_updated_at:
108
- params["include_updated_at"] = True
109
-
110
- params = {k: v for k, v in params.items() if v is not None}
111
-
112
- response = make_api_request(url, params=params)
113
- if save_to_file:
114
- save_csv_json(save_to_file, response, csv_data_key='observations')
115
-
116
- return response
117
-
118
- def poll_observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
82
+ def observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
119
83
  """
120
84
  Fetches observations between a start time and an optional end time and saves to files in specified format.
121
85
  Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
@@ -140,6 +104,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
140
104
  Supported formats are '.csv', '.json', '.little_r' and '.nc'
141
105
  bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
142
106
  output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
107
+ output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
143
108
  callback (callable): Optional callback function that receives (super observations, metadata) before saving.
144
109
  This allows custom processing or saving in custom formats.
145
110
  """
@@ -165,7 +130,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
165
130
  return
166
131
 
167
132
  # Supported formats for saving into a single file:
168
- # NOTE: for poll_observations we handle .csv saving within poll_observations and not using save_csv_json
133
+ # NOTE: for observations we handle .csv saving within observations and not using save_csv_json
169
134
  # - .csv
170
135
  # - .json
171
136
  # - .little_r
@@ -201,12 +166,16 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
201
166
  # Initialize the polling loop
202
167
  current_timestamp = start_time
203
168
  has_next_page = True
169
+ fetced_so_far = 0
170
+
171
+ print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
172
+ print("-----------------------------------------------------")
204
173
 
205
174
 
206
175
  while has_next_page:
207
176
  try:
208
177
  # Fetch observations
209
- observations_page = get_observations(
178
+ observations_page = get_observations_page(
210
179
  since=current_timestamp,
211
180
  min_latitude=min_latitude,
212
181
  max_latitude=max_latitude,
@@ -226,11 +195,15 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
226
195
  continue
227
196
 
228
197
  observations = observations_page.get('observations', [])
229
- print(f"Fetched {len(observations)} observation(s)")
198
+ fetced_so_far = fetced_so_far + len(observations)
199
+ print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
200
+ print(f"Fetched {fetced_so_far} observations")
201
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
202
+ print("-----------------------------------------------------")
230
203
 
231
204
  # Invoke the callback with fetched observations
232
205
  if callback:
233
- print("/nCallback/n")
206
+ print("--------\nCallback\n--------")
234
207
  callback(observations)
235
208
 
236
209
  for obs in observations:
@@ -281,27 +254,41 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
281
254
 
282
255
  if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
283
256
  print("-----------------------------------------------------\n")
284
- print("No more pages available or reached end of time range.")
257
+ print("Fetching complete.")
285
258
  print("\n-----------------------------------------------------")
286
259
  break
287
260
 
288
261
  current_timestamp = next_timestamp
289
262
 
263
+ except KeyboardInterrupt:
264
+ print("\n\n\U0001F6D1 Received interrupt, stopping...")
265
+ print("-----------------------------------------------------")
266
+ print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
267
+ print("-----------------------------------------------------")
268
+ exit(3)
290
269
  except Exception as e:
291
270
  print(f"Error occurred: {e}")
292
271
  exit(1001)
293
272
 
294
273
  # Save data to a single file
295
274
  if save_to_file:
275
+ # Create directory path if it doesn't exist
276
+ directory = os.path.dirname(save_to_file)
277
+ if directory and not os.path.isdir(directory):
278
+ os.makedirs(directory, exist_ok=True)
296
279
  filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
297
280
  if float(obs['timestamp']) >= start_time}
298
281
  # Sort by timestamp
299
282
  sorted_observations = dict(sorted(filtered_observations.items(),
300
283
  key=lambda x: float(x[1]['timestamp'])))
301
284
 
285
+ print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
286
+ print("This may take a while...")
287
+ print("-----------------------------------------------------\n")
288
+
302
289
  if save_to_file.endswith('.nc'):
303
290
  first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
304
- convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
291
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
305
292
  elif save_to_file.endswith('.json'):
306
293
  with open(save_to_file, 'w', encoding='utf-8') as f:
307
294
  json.dump(sorted_observations, f, indent=4)
@@ -321,6 +308,15 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
321
308
 
322
309
  # Save data to multiple file
323
310
  elif output_format:
311
+ # Create output directory if specified
312
+ if output_dir:
313
+ os.makedirs(output_dir, exist_ok=True)
314
+ print(f"Files will be saved to {output_dir}")
315
+ else:
316
+ print(f"Files will be saved to {os.getcwd()}")
317
+ print(f"Processing {fetced_so_far} {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
318
+ print("This may take a while...")
319
+ print("-----------------------------------------------------\n")
324
320
  # Track statistics per mission
325
321
  mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
326
322
  total_observations_written = 0
@@ -329,48 +325,39 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
329
325
  for (bucket_center, mission_name), observations in buckets.items():
330
326
  if observations:
331
327
  # Format hour to be the actual bucket center
332
- bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
328
+ bucket_hour = int((bucket_center.hour + bucket_hours / 2) % 24)
333
329
 
334
- if output_format == 'netcdf':
335
- convert_to_netcdf(observations, bucket_center.timestamp())
330
+ # Generate file name based on output format
331
+ file_name_format = {
332
+ 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
333
+ 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
334
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
335
+ 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
336
+ }
337
+ file_name = file_name_format[output_format] % (
338
+ bucket_center.year, bucket_center.month, bucket_center.day,
339
+ bucket_hour, bucket_hours)
336
340
 
337
- if output_format == 'csv':
338
- output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
339
- (bucket_center.year, bucket_center.month, bucket_center.day,
340
- bucket_hour, bucket_hours))
341
+ output_file = os.path.join(output_dir or '.', file_name)
341
342
 
342
- os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
343
+ # Sort observations by timestamp within each bucket
344
+ sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
343
345
 
344
- # Sort observations by timestamp within each bucket
345
- sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
346
+ if output_format == 'netcdf':
347
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
346
348
 
349
+ elif output_format == 'csv':
347
350
  with open(output_file, mode='w', newline='') as file:
348
351
  writer = csv.DictWriter(file, fieldnames=headers)
349
352
  writer.writeheader()
350
353
  writer.writerows(sorted_obs)
351
354
 
352
355
  elif output_format == 'json':
353
- output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
354
- (bucket_center.year, bucket_center.month, bucket_center.day,
355
- bucket_hour, bucket_hours))
356
-
357
- os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
358
-
359
- # Sort observations by timestamp within each bucket
360
- sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
361
-
356
+ sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
362
357
  with open(output_file, 'w', encoding='utf-8') as file:
363
- json.dump(sorted_obs, file, indent=4)
358
+ json.dump(sorted_obs_dict, file, indent=4)
364
359
 
365
360
  elif output_format == 'little_r':
366
- output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
367
- (bucket_center.year, bucket_center.month, bucket_center.day,
368
- bucket_hour, bucket_hours))
369
-
370
- os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
371
-
372
- sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
373
-
374
361
  little_r_records = format_little_r(sorted_obs)
375
362
  with open(output_file, 'w') as file:
376
363
  file.write('\n'.join(little_r_records))
@@ -382,7 +369,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
382
369
  mission_stats[mission_name]['files'] += 1
383
370
  mission_stats[mission_name]['observations'] += len(observations)
384
371
  # Print total observations written
385
- print(f"Total {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
372
+ print(f"Saved {total_observations_written} {'observation.' if total_observations_written == 1 else 'observations.'}")
386
373
  print("-----------------------------------------------------")
387
374
 
388
375
  # Print summary for each mission
@@ -392,7 +379,286 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
392
379
  print("-----------------------------------------------------")
393
380
  print("All observations have been processed and saved.")
394
381
 
395
- def poll_super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
382
+ def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
383
+ """
384
+ Continuously polls for observations and saves to files in specified format.
385
+ Will run indefinitely until interrupted.
386
+
387
+ Args:
388
+ start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
389
+ include_ids (bool): Include observation IDs in response.
390
+ include_updated_at (bool): Include update timestamps in response.
391
+ mission_id (str): Filter observations by mission ID.
392
+ min_latitude (float): Minimum latitude boundary.
393
+ max_latitude (float): Maximum latitude boundary.
394
+ min_longitude (float): Minimum longitude boundary.
395
+ max_longitude (float): Maximum longitude boundary.
396
+ interval (int): Polling interval in seconds when no data is received (default: 60)
397
+ bucket_hours (float): Size of time buckets in hours (default: 6.0)
398
+ output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
399
+ output_dir (str): Directory for bucket files (default: current directory)
400
+ callback (callable): Optional callback for data processing
401
+ """
402
+ # Print warning about infinite loop
403
+ print(" ___________________________________________________________________")
404
+ print("| WARNING \U000026A0\U0000FE0F |")
405
+ print("| You are entering an endless loop. |")
406
+ print("| |")
407
+ print("| Press Ctrl + C anytime to exit. |")
408
+ print("|___________________________________________________________________|\n\n")
409
+ time.sleep(4)
410
+
411
+ start_time = to_unix_timestamp(start_time)
412
+
413
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
414
+ print("Please use one of the following formats:")
415
+ print(" - json\n - csv\n - little_r\n - netcdf")
416
+ return
417
+
418
+ if output_dir:
419
+ os.makedirs(output_dir, exist_ok=True)
420
+ print(f"\U0001F4C1 Files will be saved to {output_dir}")
421
+ else:
422
+ print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
423
+
424
+ # Convert start_time to datetime
425
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
426
+
427
+ # Calculate first center time that's after start_time
428
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
429
+ bucket_number = hours_since_day_start // bucket_hours
430
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
431
+
432
+ headers = [
433
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
434
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
435
+ ]
436
+
437
+ buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
438
+ current_timestamp = start_time
439
+ fetched_so_far = 0
440
+ mission_stats = {}
441
+
442
+ print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
443
+ print(f"Polling interval: {interval} seconds")
444
+ print("-----------------------------------------------------")
445
+
446
+ try:
447
+ while True:
448
+ observations_page = get_observations_page(
449
+ since=current_timestamp,
450
+ min_latitude=min_latitude,
451
+ max_latitude=max_latitude,
452
+ min_longitude=min_longitude,
453
+ max_longitude=max_longitude,
454
+ include_updated_at=include_updated_at,
455
+ mission_id=mission_id,
456
+ include_ids=include_ids,
457
+ include_mission_name=True
458
+ )
459
+
460
+ if observations_page is None:
461
+ print(f"\nNull response from API. Retrying in {interval} seconds ...")
462
+ time.sleep(interval)
463
+ continue
464
+
465
+ observations = observations_page.get('observations', [])
466
+
467
+ # Invoke the callback with fetched super observations
468
+ if callback:
469
+ print("--------\nCallback\n--------")
470
+ callback(observations)
471
+
472
+ if observations:
473
+ fetched_so_far += len(observations)
474
+ print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
475
+ print(f"Fetched {fetched_so_far} observations")
476
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
477
+ print("-----------------------------------------------------")
478
+
479
+ for obs in observations:
480
+ if 'mission_name' not in obs:
481
+ continue
482
+
483
+ timestamp = obs.get('timestamp')
484
+ if not timestamp:
485
+ continue
486
+
487
+ try:
488
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
489
+ except (OSError, ValueError, TypeError, OverflowError):
490
+ continue
491
+
492
+ mission_name = obs.get('mission_name', 'Unknown')
493
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
494
+
495
+ processed_obs = {
496
+ header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
497
+ for header in headers
498
+ }
499
+
500
+ obs_id = f"{timestamp}_{mission_name}"
501
+
502
+ if obs_time >= start_dt:
503
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
504
+ bucket_index = floor(hours_diff / bucket_hours)
505
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
506
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
507
+
508
+ if obs_time <= bucket_end:
509
+ bucket_key = (bucket_center, mission_name)
510
+
511
+ # Initialize bucket if needed
512
+ if bucket_key not in buckets:
513
+ buckets[bucket_key] = {
514
+ 'data': {},
515
+ 'last_write': 0,
516
+ 'data_hash': ''
517
+ }
518
+
519
+ # Update bucket data
520
+ buckets[bucket_key]['data'][obs_id] = processed_obs
521
+
522
+ # Track statistics
523
+ if mission_name not in mission_stats:
524
+ mission_stats[mission_name] = {'files': set(), 'observations': 0}
525
+ mission_stats[mission_name]['observations'] += 1
526
+
527
+ # Calculate new data hash
528
+ sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
529
+ data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
530
+
531
+ # Check if we should write the bucket
532
+ current_time = datetime.now(timezone.utc)
533
+ time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
534
+ data_changed = data_hash != buckets[bucket_key]['data_hash']
535
+
536
+ # Write if it's been more than interval seconds since last write OR if data has changed
537
+ if (time_since_last_write >= interval or data_changed) and output_format:
538
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
539
+
540
+ file_name_format = {
541
+ 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
542
+ 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
543
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
544
+ 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
545
+ }
546
+
547
+ file_name = file_name_format[output_format] % (
548
+ bucket_center.year, bucket_center.month, bucket_center.day,
549
+ bucket_hour, bucket_hours)
550
+
551
+ output_file = os.path.join(output_dir or '.', file_name)
552
+ sorted_obs = [obs for _, obs in sorted_data]
553
+
554
+ # Write the file based on format
555
+ try:
556
+ if output_format == 'netcdf':
557
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
558
+ elif output_format == 'csv':
559
+ with open(output_file, mode='w', newline='') as file:
560
+ writer = csv.DictWriter(file, fieldnames=headers)
561
+ writer.writeheader()
562
+ writer.writerows(sorted_obs)
563
+ elif output_format == 'json':
564
+ sorted_obs_dict = {k: v for k, v in sorted_data}
565
+ with open(output_file, 'w', encoding='utf-8') as file:
566
+ json.dump(sorted_obs_dict, file, indent=4)
567
+ elif output_format == 'little_r':
568
+ little_r_records = format_little_r(sorted_obs)
569
+ with open(output_file, 'w') as file:
570
+ file.write('\n'.join(little_r_records))
571
+
572
+ buckets[bucket_key]['last_write'] = current_time.timestamp()
573
+ buckets[bucket_key]['data_hash'] = data_hash
574
+ mission_stats[mission_name]['files'].add(output_file)
575
+ except Exception as e:
576
+ print(f"Error writing bucket file {file_name}: {str(e)}")
577
+
578
+ # Clean up old buckets
579
+ current_time = datetime.now(timezone.utc)
580
+ buckets = {
581
+ k: v for k, v in buckets.items()
582
+ if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
583
+ }
584
+
585
+ next_timestamp = observations_page.get('next_since')
586
+ has_next_page = observations_page.get('has_next_page', False)
587
+
588
+ if next_timestamp and next_timestamp > current_timestamp:
589
+ current_timestamp = next_timestamp
590
+ elif not has_next_page:
591
+ print("-----------------------------------------------------")
592
+ print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
593
+ print("-----------------------------------------------------")
594
+ time.sleep(interval)
595
+ continue
596
+
597
+ if not observations:
598
+ print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
599
+ print("-----------------------------------------------------")
600
+ time.sleep(interval)
601
+
602
+ except KeyboardInterrupt:
603
+ print("\n\n\U0001F6D1 Received interrupt, stopping...")
604
+ print("-----------------------------------------------------")
605
+ for mission_name, stats in mission_stats.items():
606
+ print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
607
+ except Exception as e:
608
+ print(f"Error occurred: {str(e)}")
609
+ exit(1001)
610
+ finally:
611
+ print("-----------------------------------------------------")
612
+ print("Finished processing observations.")
613
+
614
+ # Super Observations
615
+ # ------------
616
+ def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
617
+ """
618
+ Retrieves super observations page based on specified filters.
619
+
620
+ Args:
621
+ since (str): Filter observations after this timestamp.
622
+ min_time (str): Minimum timestamp for observations.
623
+ max_time (str): Maximum timestamp for observations.
624
+ include_ids (bool): Include observation IDs in response.
625
+ include_mission_name (bool): Include mission names in response.
626
+ include_updated_at (bool): Include update timestamps in response.
627
+ mission_id (str): Filter observations by mission ID.
628
+ save_to_file (str): Optional path to save the response data.
629
+ If provided, saves the data in CSV format.
630
+
631
+ Returns:
632
+ dict: The API response containing filtered super observations.
633
+ """
634
+
635
+ url = f"{DATA_API_BASE_URL}/super_observations.json"
636
+
637
+ params = {}
638
+ if since:
639
+ params["since"] = to_unix_timestamp(since)
640
+ if min_time:
641
+ params["min_time"] = to_unix_timestamp(min_time)
642
+ if max_time:
643
+ params["max_time"] = to_unix_timestamp(max_time)
644
+ if mission_id:
645
+ params["mission_id"] = mission_id
646
+ if include_ids:
647
+ params["include_ids"] = True
648
+ if include_mission_name:
649
+ params["include_mission_name"] = True
650
+ if include_updated_at:
651
+ params["include_updated_at"] = True
652
+
653
+ params = {k: v for k, v in params.items() if v is not None}
654
+
655
+ response = make_api_request(url, params=params)
656
+ if save_to_file:
657
+ save_csv_json(save_to_file, response, csv_data_key='observations')
658
+
659
+ return response
660
+
661
+ def super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
396
662
  """
397
663
  Fetches super observations between a start time and an optional end time and saves to files in specified format.
398
664
  Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
@@ -408,6 +674,7 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
408
674
  Supported formats are '.csv', '.json', '.little_r' and '.nc'
409
675
  bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
410
676
  output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
677
+ output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
411
678
  callback (callable): Optional callback function that receives (super observations, metadata) before saving.
412
679
  This allows custom processing or saving in custom formats.
413
680
  """
@@ -469,12 +736,15 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
469
736
  # Initialize the polling loop
470
737
  current_timestamp = start_time
471
738
  has_next_page = True
739
+ fetced_so_far = 0
472
740
 
741
+ print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
742
+ print("-----------------------------------------------------")
473
743
 
474
744
  while has_next_page:
475
745
  try:
476
746
  # Fetch observations
477
- observations_page = get_super_observations(
747
+ observations_page = get_super_observations_page(
478
748
  since=current_timestamp,
479
749
  min_time=start_time,
480
750
  max_time=end_time,
@@ -490,13 +760,15 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
490
760
  continue
491
761
 
492
762
  observations = observations_page.get('observations', [])
493
- print(f"Fetched {len(observations)} super observation(s)")
763
+ fetced_so_far = fetced_so_far + len(observations)
764
+ print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
765
+ print(f"Fetched {fetced_so_far} super observations")
766
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
767
+ print("-----------------------------------------------------")
494
768
 
495
- # Invoke the callback with fetched observations
769
+ # Invoke the callback with fetched super observations
496
770
  if callback:
497
- print("--------")
498
- print("Callback")
499
- print("--------")
771
+ print("--------\nCallback\n--------")
500
772
  callback(observations)
501
773
 
502
774
  for obs in observations:
@@ -547,27 +819,41 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
547
819
 
548
820
  if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
549
821
  print("-----------------------------------------------------\n")
550
- print("No more pages available or reached end of time range.")
822
+ print("Fetching complete.")
551
823
  print("\n-----------------------------------------------------")
552
824
  break
553
825
 
554
826
  current_timestamp = next_timestamp
555
827
 
828
+ except KeyboardInterrupt:
829
+ print("\n\n\U0001F6D1 Received interrupt, stopping...")
830
+ print("-----------------------------------------------------")
831
+ print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
832
+ print("-----------------------------------------------------")
833
+ exit(3)
556
834
  except Exception as e:
557
835
  print(f"Error occurred: {e}")
558
836
  exit(1001)
559
837
 
560
838
  # Save data to a single file
561
839
  if save_to_file:
840
+ # Create directory path if it doesn't exist
841
+ directory = os.path.dirname(save_to_file)
842
+ if directory and not os.path.isdir(directory):
843
+ os.makedirs(directory, exist_ok=True)
562
844
  filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
563
845
  if float(obs['timestamp']) >= start_time}
564
846
  # Sort by timestamp
565
847
  sorted_observations = dict(sorted(filtered_observations.items(),
566
848
  key=lambda x: float(x[1]['timestamp'])))
567
849
 
850
+ print(f"Saving {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
851
+ print("This may take a while...")
852
+ print("-----------------------------------------------------\n")
853
+
568
854
  if save_to_file.endswith('.nc'):
569
855
  first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
570
- convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
856
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
571
857
 
572
858
  elif save_to_file.endswith('.json'):
573
859
  with open(save_to_file, 'w', encoding='utf-8') as f:
@@ -588,6 +874,16 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
588
874
 
589
875
  # Save data to multiple file
590
876
  elif output_format:
877
+ # Create output directory if specified
878
+ if output_dir:
879
+ os.makedirs(output_dir, exist_ok=True)
880
+ print(f"Files will be saved to {output_dir}")
881
+ else:
882
+ print(f"Files will be saved to {os.getcwd()}")
883
+
884
+ print(f"Processing {fetced_so_far} super {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
885
+ print("This may take a while...")
886
+ print("-----------------------------------------------------\n")
591
887
  # Track statistics per mission
592
888
  mission_stats = {} # {mission_name: {'files': 0, 'observations': 0}}
593
889
  total_observations_written = 0
@@ -598,46 +894,37 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
598
894
  # Format hour to be the actual bucket center
599
895
  bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
600
896
 
601
- if output_format == 'netcdf':
602
- convert_to_netcdf(observations, bucket_center.timestamp())
897
+ # Generate file name based on output format
898
+ file_name_format = {
899
+ 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
900
+ 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
901
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
902
+ 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
903
+ }
904
+ file_name = file_name_format[output_format] % (
905
+ bucket_center.year, bucket_center.month, bucket_center.day,
906
+ bucket_hour, bucket_hours)
603
907
 
604
- if output_format == 'csv':
605
- output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
606
- (bucket_center.year, bucket_center.month, bucket_center.day,
607
- bucket_hour, bucket_hours))
908
+ output_file = os.path.join(output_dir or '.', file_name)
608
909
 
609
- os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
910
+ # Sort observations by timestamp within each bucket
911
+ sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
610
912
 
611
- # Sort observations by timestamp within each bucket
612
- sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
913
+ if output_format == 'netcdf':
914
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
613
915
 
916
+ elif output_format == 'csv':
614
917
  with open(output_file, mode='w', newline='') as file:
615
918
  writer = csv.DictWriter(file, fieldnames=headers)
616
919
  writer.writeheader()
617
920
  writer.writerows(sorted_obs)
618
921
 
619
922
  elif output_format == 'json':
620
- output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
621
- (bucket_center.year, bucket_center.month, bucket_center.day,
622
- bucket_hour, bucket_hours))
623
-
624
- os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
625
-
626
- # Sort observations by timestamp within each bucket
627
- sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
628
-
923
+ sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
629
924
  with open(output_file, 'w', encoding='utf-8') as file:
630
- json.dump(sorted_obs, file, indent=4)
925
+ json.dump(sorted_obs_dict, file, indent=4)
631
926
 
632
927
  elif output_format == 'little_r':
633
- output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
634
- (bucket_center.year, bucket_center.month, bucket_center.day,
635
- bucket_hour, bucket_hours))
636
-
637
- os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
638
-
639
- sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
640
-
641
928
  little_r_records = format_little_r(sorted_obs)
642
929
  with open(output_file, 'w') as file:
643
930
  file.write('\n'.join(little_r_records))
@@ -659,6 +946,229 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
659
946
  print("-----------------------------------------------------")
660
947
  print("All super observations have been processed and saved.")
661
948
 
949
+ def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
950
+ """
951
+ Continuously polls for super observations and saves to files in specified format.
952
+ Will run indefinitely until interrupted.
953
+
954
+ Args:
955
+ start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
956
+ interval (int): Polling interval in seconds when no data is received (default: 60)
957
+ bucket_hours (float): Size of time buckets in hours (default: 6.0)
958
+ output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
959
+ output_dir (str): Directory for bucket files (default: current directory)
960
+ callback (callable): Optional callback for data processing
961
+ """
962
+ # Print warning about infinite loop
963
+ print(" ___________________________________________________________________")
964
+ print("| WARNING \U000026A0\U0000FE0F |")
965
+ print("| You are entering an endless loop. |")
966
+ print("| |")
967
+ print("| Press Ctrl + C anytime to exit. |")
968
+ print("|___________________________________________________________________|\n\n")
969
+ time.sleep(4)
970
+
971
+ start_time = to_unix_timestamp(start_time)
972
+
973
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
974
+ print("Please use one of the following formats:")
975
+ print(" - json\n - csv\n - little_r\n - netcdf")
976
+ return
977
+
978
+ if output_dir:
979
+ os.makedirs(output_dir, exist_ok=True)
980
+ print(f"\U0001F4C1 Files will be saved to {output_dir}")
981
+ else:
982
+ print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
983
+
984
+ # Convert start_time to datetime
985
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
986
+
987
+ # Calculate first center time that's after start_time
988
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
989
+ bucket_number = hours_since_day_start // bucket_hours
990
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
991
+
992
+ headers = [
993
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
994
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
995
+ ]
996
+
997
+ buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
998
+ current_timestamp = start_time
999
+ fetched_so_far = 0
1000
+ mission_stats = {}
1001
+
1002
+ print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
1003
+ print(f"Polling interval: {interval} seconds")
1004
+ print("-----------------------------------------------------")
1005
+
1006
+ try:
1007
+ while True:
1008
+ observations_page = get_super_observations_page(
1009
+ since=current_timestamp,
1010
+ min_time=start_time,
1011
+ include_ids=True,
1012
+ include_mission_name=True
1013
+ )
1014
+
1015
+ if observations_page is None:
1016
+ print(f"\nNull response from API. Retrying in {interval} seconds ...")
1017
+ time.sleep(interval)
1018
+ continue
1019
+
1020
+ observations = observations_page.get('observations', [])
1021
+
1022
+ # Invoke the callback with fetched super observations
1023
+ if callback:
1024
+ print("--------\nCallback\n--------")
1025
+ callback(observations)
1026
+
1027
+ if observations:
1028
+ fetched_so_far += len(observations)
1029
+ print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
1030
+ print(f"Fetched {fetched_so_far} super observations")
1031
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
1032
+ print("-----------------------------------------------------")
1033
+
1034
+ for obs in observations:
1035
+ if 'mission_name' not in obs:
1036
+ continue
1037
+
1038
+ timestamp = obs.get('timestamp')
1039
+ if not timestamp:
1040
+ continue
1041
+
1042
+ try:
1043
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
1044
+ except (OSError, ValueError, TypeError, OverflowError):
1045
+ continue
1046
+
1047
+ mission_name = obs.get('mission_name', 'Unknown')
1048
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
1049
+
1050
+ processed_obs = {
1051
+ header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
1052
+ for header in headers
1053
+ }
1054
+
1055
+ obs_id = f"{timestamp}_{mission_name}"
1056
+
1057
+ if obs_time >= start_dt:
1058
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
1059
+ bucket_index = floor(hours_diff / bucket_hours)
1060
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
1061
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
1062
+
1063
+ if obs_time <= bucket_end:
1064
+ bucket_key = (bucket_center, mission_name)
1065
+
1066
+ # Initialize bucket if needed
1067
+ if bucket_key not in buckets:
1068
+ buckets[bucket_key] = {
1069
+ 'data': {},
1070
+ 'last_write': 0,
1071
+ 'data_hash': ''
1072
+ }
1073
+
1074
+ # Update bucket data
1075
+ buckets[bucket_key]['data'][obs_id] = processed_obs
1076
+
1077
+ # Track statistics
1078
+ if mission_name not in mission_stats:
1079
+ mission_stats[mission_name] = {'files': set(), 'observations': 0}
1080
+ mission_stats[mission_name]['observations'] += 1
1081
+
1082
+ # Calculate new data hash
1083
+ sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
1084
+ data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
1085
+
1086
+ # Check if we should write the bucket
1087
+ current_time = datetime.now(timezone.utc)
1088
+ time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
1089
+ data_changed = data_hash != buckets[bucket_key]['data_hash']
1090
+
1091
+ # Write if it's been more than interval seconds since last write OR if data has changed
1092
+ if (time_since_last_write >= interval or data_changed) and output_format:
1093
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
1094
+
1095
+ file_name_format = {
1096
+ 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
1097
+ 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
1098
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
1099
+ 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
1100
+ }
1101
+
1102
+ file_name = file_name_format[output_format] % (
1103
+ bucket_center.year, bucket_center.month, bucket_center.day,
1104
+ bucket_hour, bucket_hours)
1105
+
1106
+ output_file = os.path.join(output_dir or '.', file_name)
1107
+ sorted_obs = [obs for _, obs in sorted_data]
1108
+
1109
+ # Write the file based on format
1110
+ try:
1111
+ if output_format == 'netcdf':
1112
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
1113
+ elif output_format == 'csv':
1114
+ with open(output_file, mode='w', newline='') as file:
1115
+ writer = csv.DictWriter(file, fieldnames=headers)
1116
+ writer.writeheader()
1117
+ writer.writerows(sorted_obs)
1118
+ elif output_format == 'json':
1119
+ sorted_obs_dict = {k: v for k, v in sorted_data}
1120
+ with open(output_file, 'w', encoding='utf-8') as file:
1121
+ json.dump(sorted_obs_dict, file, indent=4)
1122
+ elif output_format == 'little_r':
1123
+ little_r_records = format_little_r(sorted_obs)
1124
+ with open(output_file, 'w') as file:
1125
+ file.write('\n'.join(little_r_records))
1126
+
1127
+ buckets[bucket_key]['last_write'] = current_time.timestamp()
1128
+ buckets[bucket_key]['data_hash'] = data_hash
1129
+ mission_stats[mission_name]['files'].add(output_file)
1130
+ except Exception as e:
1131
+ print(f"Error writing bucket file {file_name}: {str(e)}")
1132
+
1133
+ # Clean up old buckets
1134
+ current_time = datetime.now(timezone.utc)
1135
+ buckets = {
1136
+ k: v for k, v in buckets.items()
1137
+ if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
1138
+ }
1139
+
1140
+ next_timestamp = observations_page.get('next_since')
1141
+ has_next_page = observations_page.get('has_next_page', False)
1142
+
1143
+ if next_timestamp and next_timestamp > current_timestamp:
1144
+ current_timestamp = next_timestamp
1145
+ elif not has_next_page:
1146
+ print("-----------------------------------------------------")
1147
+ print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
1148
+ print("-----------------------------------------------------")
1149
+ time.sleep(interval)
1150
+ continue
1151
+
1152
+ if not observations:
1153
+ print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
1154
+ print("-----------------------------------------------------")
1155
+ time.sleep(interval)
1156
+
1157
+ except KeyboardInterrupt:
1158
+ print("\n\U0001F6D1 Received interrupt, stopping...")
1159
+ print("-----------------------------------------------------")
1160
+ for mission_name, stats in mission_stats.items():
1161
+ print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
1162
+ except Exception as e:
1163
+ print(f"Error occurred: {str(e)}")
1164
+ exit(1001)
1165
+ finally:
1166
+ print("-----------------------------------------------------")
1167
+ print("Finished processing super observations.")
1168
+
1169
+ # ------------
1170
+ # METADATA
1171
+ # ------------
662
1172
  def get_flying_missions(cli=None, save_to_file=None):
663
1173
  """
664
1174
  Retrieves a list of currently flying missions.