windborne 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windborne/__init__.py CHANGED
@@ -14,6 +14,9 @@ from .data_api import (
14
14
  get_super_observations_page,
15
15
  super_observations,
16
16
 
17
+ poll_super_observations,
18
+ poll_observations,
19
+
17
20
  get_flying_missions,
18
21
  get_mission_launch_site,
19
22
  get_predicted_path,
@@ -50,6 +53,9 @@ __all__ = [
50
53
  "get_super_observations_page",
51
54
  "super_observations",
52
55
 
56
+ "poll_super_observations",
57
+ "poll_observations",
58
+
53
59
  "get_flying_missions",
54
60
  "get_mission_launch_site",
55
61
  "get_predicted_path",
windborne/cli.py CHANGED
@@ -3,8 +3,13 @@ import argparse
3
3
  from . import (
4
4
  super_observations,
5
5
  observations,
6
+
6
7
  get_observations_page,
7
8
  get_super_observations_page,
9
+
10
+ poll_super_observations,
11
+ poll_observations,
12
+
8
13
  get_flying_missions,
9
14
  get_mission_launch_site,
10
15
  get_predicted_path,
@@ -42,7 +47,7 @@ def main():
42
47
  super_obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
43
48
  super_obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
44
49
  super_obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
45
- super_obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to multiple files (csv or little_r)')
50
+ super_obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to or to multiple files (csv, json, netcdf or little_r)')
46
51
 
47
52
  # Observations Command
48
53
  obs_parser = subparsers.add_parser('observations', help='Poll observations within a time range')
@@ -58,7 +63,7 @@ def main():
58
63
  obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
59
64
  obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
60
65
  obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
61
- obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to multiple files (csv or little_r)')
66
+ obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to multiple files (csv, json, netcdf or little_r)')
62
67
 
63
68
 
64
69
  # Get Observations Page Command
@@ -87,6 +92,29 @@ def main():
87
92
  super_obs_page_parser.add_argument('-u', '--include-updated-at', action='store_true', help='Include update timestamps')
88
93
  super_obs_page_parser.add_argument('output', nargs='?', help='Output file')
89
94
 
95
+ # Poll Super Observations Command
96
+ poll_super_obs_parser = subparsers.add_parser('poll-super-observations', help='Continuously polls for super observations and saves to files in specified format.')
97
+ poll_super_obs_parser.add_argument('start_time', help='Starting time (YYYY-MM-DD_HH:MM, "YYYY-MM-DD HH:MM:SS" or YYYY-MM-DDTHH:MM:SS.fffZ)')
98
+ poll_super_obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
99
+ poll_super_obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
100
+ poll_super_obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
101
+ poll_super_obs_parser.add_argument('output', help='Save output to multiple files (csv, json, netcdf or little_r)')
102
+
103
+ # Poll Observations Command
104
+ poll_obs_parser = subparsers.add_parser('poll-observations', help='Continuously polls for observations and saves to files in specified format.')
105
+ poll_obs_parser.add_argument('start_time', help='Starting time (YYYY-MM-DD_HH:MM, "YYYY-MM-DD HH:MM:SS" or YYYY-MM-DDTHH:MM:SS.fffZ)')
106
+ poll_obs_parser.add_argument('-m', '--mission-id', help='Filter observations by mission ID')
107
+ poll_obs_parser.add_argument('-ml', '--min-latitude', type=float, help='Minimum latitude filter')
108
+ poll_obs_parser.add_argument('-xl', '--max-latitude', type=float, help='Maximum latitude filter')
109
+ poll_obs_parser.add_argument('-mg', '--min-longitude', type=float, help='Minimum longitude filter')
110
+ poll_obs_parser.add_argument('-xg', '--max-longitude', type=float, help='Maximum longitude filter')
111
+ poll_obs_parser.add_argument('-id', '--include-ids', action='store_true', help='Include observation IDs')
112
+ poll_obs_parser.add_argument('-u', '--include-updated-at', action='store_true', help='Include update timestamps')
113
+ poll_obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
114
+ poll_obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
115
+ poll_obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
116
+ poll_obs_parser.add_argument('output', help='Save output to multiple files (csv, json, netcdf or little_r)')
117
+
90
118
  # Get Flying Missions Command
91
119
  flying_parser = subparsers.add_parser('flying-missions', help='Get currently flying missions')
92
120
  flying_parser.add_argument('output', nargs='?', help='Output file')
@@ -226,6 +254,37 @@ def main():
226
254
  output_format=output_format
227
255
  )
228
256
 
257
+ elif args.command == 'poll-super-observations':
258
+ output_format = args.output
259
+ output_dir = args.output_dir
260
+
261
+ poll_super_observations(
262
+ start_time=args.start_time,
263
+ interval=args.interval,
264
+ bucket_hours=args.bucket_hours,
265
+ output_dir=output_dir,
266
+ output_format=output_format
267
+ )
268
+
269
+ elif args.command == 'poll-observations':
270
+ output_format = args.output
271
+ output_dir = args.output_dir
272
+
273
+ poll_observations(
274
+ start_time=args.start_time,
275
+ include_ids=args.include_ids,
276
+ include_updated_at=args.include_updated_at,
277
+ mission_id=args.mission_id,
278
+ min_latitude=args.min_latitude,
279
+ max_latitude=args.max_latitude,
280
+ min_longitude=args.min_longitude,
281
+ max_longitude=args.max_longitude,
282
+ interval=args.interval,
283
+ bucket_hours=args.bucket_hours,
284
+ output_dir=output_dir,
285
+ output_format=output_format
286
+ )
287
+
229
288
  elif args.command == 'observations':
230
289
  # Error handling is performed within observations
231
290
  # and we display the appropriate error messages
windborne/data_api.py CHANGED
@@ -7,6 +7,9 @@ from math import floor
7
7
  from datetime import datetime, timezone, timedelta
8
8
  import csv
9
9
  import json
10
+ import hashlib
11
+
12
+ # UTC should be used across the lib
10
13
 
11
14
  # ------------
12
15
  # CORE RESOURCES
@@ -165,6 +168,9 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
165
168
  has_next_page = True
166
169
  fetced_so_far = 0
167
170
 
171
+ print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
172
+ print("-----------------------------------------------------")
173
+
168
174
 
169
175
  while has_next_page:
170
176
  try:
@@ -191,13 +197,13 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
191
197
  observations = observations_page.get('observations', [])
192
198
  fetced_so_far = fetced_so_far + len(observations)
193
199
  print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
194
- print(f"Fetched {fetced_so_far} observation(s)")
195
- print(f"Current time: {datetime.fromtimestamp(print_current_timestamp).strftime('%Y-%m-%d %H:%M:%S')}")
200
+ print(f"Fetched {fetced_so_far} observations")
201
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
196
202
  print("-----------------------------------------------------")
197
203
 
198
204
  # Invoke the callback with fetched observations
199
205
  if callback:
200
- print("\nCallback\n")
206
+ print("--------\nCallback\n--------")
201
207
  callback(observations)
202
208
 
203
209
  for obs in observations:
@@ -254,6 +260,12 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
254
260
 
255
261
  current_timestamp = next_timestamp
256
262
 
263
+ except KeyboardInterrupt:
264
+ print("\n\n\U0001F6D1 Received interrupt, stopping...")
265
+ print("-----------------------------------------------------")
266
+ print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
267
+ print("-----------------------------------------------------")
268
+ exit(3)
257
269
  except Exception as e:
258
270
  print(f"Error occurred: {e}")
259
271
  exit(1001)
@@ -276,7 +288,7 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
276
288
 
277
289
  if save_to_file.endswith('.nc'):
278
290
  first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
279
- convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
291
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
280
292
  elif save_to_file.endswith('.json'):
281
293
  with open(save_to_file, 'w', encoding='utf-8') as f:
282
294
  json.dump(sorted_observations, f, indent=4)
@@ -319,6 +331,7 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
319
331
  file_name_format = {
320
332
  'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
321
333
  'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
334
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
322
335
  'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
323
336
  }
324
337
  file_name = file_name_format[output_format] % (
@@ -331,7 +344,7 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
331
344
  sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
332
345
 
333
346
  if output_format == 'netcdf':
334
- convert_to_netcdf(sorted_obs, bucket_center.timestamp())
347
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
335
348
 
336
349
  elif output_format == 'csv':
337
350
  with open(output_file, mode='w', newline='') as file:
@@ -366,6 +379,237 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
366
379
  print("-----------------------------------------------------")
367
380
  print("All observations have been processed and saved.")
368
381
 
382
+ def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
383
+ """
384
+ Continuously polls for observations and saves to files in specified format.
385
+ Will run indefinitely until interrupted.
386
+
387
+ Args:
388
+ start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
389
+ include_ids (bool): Include observation IDs in response.
390
+ include_updated_at (bool): Include update timestamps in response.
391
+ mission_id (str): Filter observations by mission ID.
392
+ min_latitude (float): Minimum latitude boundary.
393
+ max_latitude (float): Maximum latitude boundary.
394
+ min_longitude (float): Minimum longitude boundary.
395
+ max_longitude (float): Maximum longitude boundary.
396
+ interval (int): Polling interval in seconds when no data is received (default: 60)
397
+ bucket_hours (float): Size of time buckets in hours (default: 6.0)
398
+ output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
399
+ output_dir (str): Directory for bucket files (default: current directory)
400
+ callback (callable): Optional callback for data processing
401
+ """
402
+ # Print warning about infinite loop
403
+ print(" ___________________________________________________________________")
404
+ print("| WARNING \U000026A0\U0000FE0F |")
405
+ print("| You are entering an endless loop. |")
406
+ print("| |")
407
+ print("| Press Ctrl + C anytime to exit. |")
408
+ print("|___________________________________________________________________|\n\n")
409
+ time.sleep(4)
410
+
411
+ start_time = to_unix_timestamp(start_time)
412
+
413
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
414
+ print("Please use one of the following formats:")
415
+ print(" - json\n - csv\n - little_r\n - netcdf")
416
+ return
417
+
418
+ if output_dir:
419
+ os.makedirs(output_dir, exist_ok=True)
420
+ print(f"\U0001F4C1 Files will be saved to {output_dir}")
421
+ else:
422
+ print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
423
+
424
+ # Convert start_time to datetime
425
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
426
+
427
+ # Calculate first center time that's after start_time
428
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
429
+ bucket_number = hours_since_day_start // bucket_hours
430
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
431
+
432
+ headers = [
433
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
434
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
435
+ ]
436
+
437
+ buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
438
+ current_timestamp = start_time
439
+ fetched_so_far = 0
440
+ mission_stats = {}
441
+
442
+ print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
443
+ print(f"Polling interval: {interval} seconds")
444
+ print("-----------------------------------------------------")
445
+
446
+ try:
447
+ while True:
448
+ observations_page = get_observations_page(
449
+ since=current_timestamp,
450
+ min_latitude=min_latitude,
451
+ max_latitude=max_latitude,
452
+ min_longitude=min_longitude,
453
+ max_longitude=max_longitude,
454
+ include_updated_at=include_updated_at,
455
+ mission_id=mission_id,
456
+ include_ids=include_ids,
457
+ include_mission_name=True
458
+ )
459
+
460
+ if observations_page is None:
461
+ print(f"\nNull response from API. Retrying in {interval} seconds ...")
462
+ time.sleep(interval)
463
+ continue
464
+
465
+ observations = observations_page.get('observations', [])
466
+
467
+ # Invoke the callback with fetched super observations
468
+ if callback:
469
+ print("--------\nCallback\n--------")
470
+ callback(observations)
471
+
472
+ if observations:
473
+ fetched_so_far += len(observations)
474
+ print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
475
+ print(f"Fetched {fetched_so_far} observations")
476
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
477
+ print("-----------------------------------------------------")
478
+
479
+ for obs in observations:
480
+ if 'mission_name' not in obs:
481
+ continue
482
+
483
+ timestamp = obs.get('timestamp')
484
+ if not timestamp:
485
+ continue
486
+
487
+ try:
488
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
489
+ except (OSError, ValueError, TypeError, OverflowError):
490
+ continue
491
+
492
+ mission_name = obs.get('mission_name', 'Unknown')
493
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
494
+
495
+ processed_obs = {
496
+ header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
497
+ for header in headers
498
+ }
499
+
500
+ obs_id = f"{timestamp}_{mission_name}"
501
+
502
+ if obs_time >= start_dt:
503
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
504
+ bucket_index = floor(hours_diff / bucket_hours)
505
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
506
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
507
+
508
+ if obs_time <= bucket_end:
509
+ bucket_key = (bucket_center, mission_name)
510
+
511
+ # Initialize bucket if needed
512
+ if bucket_key not in buckets:
513
+ buckets[bucket_key] = {
514
+ 'data': {},
515
+ 'last_write': 0,
516
+ 'data_hash': ''
517
+ }
518
+
519
+ # Update bucket data
520
+ buckets[bucket_key]['data'][obs_id] = processed_obs
521
+
522
+ # Track statistics
523
+ if mission_name not in mission_stats:
524
+ mission_stats[mission_name] = {'files': set(), 'observations': 0}
525
+ mission_stats[mission_name]['observations'] += 1
526
+
527
+ # Calculate new data hash
528
+ sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
529
+ data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
530
+
531
+ # Check if we should write the bucket
532
+ current_time = datetime.now(timezone.utc)
533
+ time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
534
+ data_changed = data_hash != buckets[bucket_key]['data_hash']
535
+
536
+ # Write if it's been more than interval seconds since last write OR if data has changed
537
+ if (time_since_last_write >= interval or data_changed) and output_format:
538
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
539
+
540
+ file_name_format = {
541
+ 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
542
+ 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
543
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
544
+ 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
545
+ }
546
+
547
+ file_name = file_name_format[output_format] % (
548
+ bucket_center.year, bucket_center.month, bucket_center.day,
549
+ bucket_hour, bucket_hours)
550
+
551
+ output_file = os.path.join(output_dir or '.', file_name)
552
+ sorted_obs = [obs for _, obs in sorted_data]
553
+
554
+ # Write the file based on format
555
+ try:
556
+ if output_format == 'netcdf':
557
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
558
+ elif output_format == 'csv':
559
+ with open(output_file, mode='w', newline='') as file:
560
+ writer = csv.DictWriter(file, fieldnames=headers)
561
+ writer.writeheader()
562
+ writer.writerows(sorted_obs)
563
+ elif output_format == 'json':
564
+ sorted_obs_dict = {k: v for k, v in sorted_data}
565
+ with open(output_file, 'w', encoding='utf-8') as file:
566
+ json.dump(sorted_obs_dict, file, indent=4)
567
+ elif output_format == 'little_r':
568
+ little_r_records = format_little_r(sorted_obs)
569
+ with open(output_file, 'w') as file:
570
+ file.write('\n'.join(little_r_records))
571
+
572
+ buckets[bucket_key]['last_write'] = current_time.timestamp()
573
+ buckets[bucket_key]['data_hash'] = data_hash
574
+ mission_stats[mission_name]['files'].add(output_file)
575
+ except Exception as e:
576
+ print(f"Error writing bucket file {file_name}: {str(e)}")
577
+
578
+ # Clean up old buckets
579
+ current_time = datetime.now(timezone.utc)
580
+ buckets = {
581
+ k: v for k, v in buckets.items()
582
+ if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
583
+ }
584
+
585
+ next_timestamp = observations_page.get('next_since')
586
+ has_next_page = observations_page.get('has_next_page', False)
587
+
588
+ if next_timestamp and next_timestamp > current_timestamp:
589
+ current_timestamp = next_timestamp
590
+ elif not has_next_page:
591
+ print("-----------------------------------------------------")
592
+ print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
593
+ print("-----------------------------------------------------")
594
+ time.sleep(interval)
595
+ continue
596
+
597
+ if not observations:
598
+ print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
599
+ print("-----------------------------------------------------")
600
+ time.sleep(interval)
601
+
602
+ except KeyboardInterrupt:
603
+ print("\n\n\U0001F6D1 Received interrupt, stopping...")
604
+ print("-----------------------------------------------------")
605
+ for mission_name, stats in mission_stats.items():
606
+ print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
607
+ except Exception as e:
608
+ print(f"Error occurred: {str(e)}")
609
+ exit(1001)
610
+ finally:
611
+ print("-----------------------------------------------------")
612
+ print("Finished processing observations.")
369
613
 
370
614
  # Super Observations
371
615
  # ------------
@@ -494,6 +738,8 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
494
738
  has_next_page = True
495
739
  fetced_so_far = 0
496
740
 
741
+ print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
742
+ print("-----------------------------------------------------")
497
743
 
498
744
  while has_next_page:
499
745
  try:
@@ -516,15 +762,13 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
516
762
  observations = observations_page.get('observations', [])
517
763
  fetced_so_far = fetced_so_far + len(observations)
518
764
  print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
519
- print(f"Fetched {fetced_so_far} super observation(s)")
520
- print(f"Current time: {datetime.fromtimestamp(print_current_timestamp).strftime('%Y-%m-%d %H:%M:%S')}")
765
+ print(f"Fetched {fetced_so_far} super observations")
766
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
521
767
  print("-----------------------------------------------------")
522
768
 
523
- # Invoke the callback with fetched observations
769
+ # Invoke the callback with fetched super observations
524
770
  if callback:
525
- print("--------")
526
- print("Callback")
527
- print("--------")
771
+ print("--------\nCallback\n--------")
528
772
  callback(observations)
529
773
 
530
774
  for obs in observations:
@@ -581,6 +825,12 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
581
825
 
582
826
  current_timestamp = next_timestamp
583
827
 
828
+ except KeyboardInterrupt:
829
+ print("\n\n\U0001F6D1 Received interrupt, stopping...")
830
+ print("-----------------------------------------------------")
831
+ print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
832
+ print("-----------------------------------------------------")
833
+ exit(3)
584
834
  except Exception as e:
585
835
  print(f"Error occurred: {e}")
586
836
  exit(1001)
@@ -603,7 +853,7 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
603
853
 
604
854
  if save_to_file.endswith('.nc'):
605
855
  first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
606
- convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
856
+ convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
607
857
 
608
858
  elif save_to_file.endswith('.json'):
609
859
  with open(save_to_file, 'w', encoding='utf-8') as f:
@@ -648,6 +898,7 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
648
898
  file_name_format = {
649
899
  'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
650
900
  'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
901
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
651
902
  'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
652
903
  }
653
904
  file_name = file_name_format[output_format] % (
@@ -660,7 +911,7 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
660
911
  sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
661
912
 
662
913
  if output_format == 'netcdf':
663
- convert_to_netcdf(sorted_obs, bucket_center.timestamp())
914
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
664
915
 
665
916
  elif output_format == 'csv':
666
917
  with open(output_file, mode='w', newline='') as file:
@@ -695,6 +946,225 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
695
946
  print("-----------------------------------------------------")
696
947
  print("All super observations have been processed and saved.")
697
948
 
949
+ def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
950
+ """
951
+ Continuously polls for super observations and saves to files in specified format.
952
+ Will run indefinitely until interrupted.
953
+
954
+ Args:
955
+ start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
956
+ interval (int): Polling interval in seconds when no data is received (default: 60)
957
+ bucket_hours (float): Size of time buckets in hours (default: 6.0)
958
+ output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
959
+ output_dir (str): Directory for bucket files (default: current directory)
960
+ callback (callable): Optional callback for data processing
961
+ """
962
+ # Print warning about infinite loop
963
+ print(" ___________________________________________________________________")
964
+ print("| WARNING \U000026A0\U0000FE0F |")
965
+ print("| You are entering an endless loop. |")
966
+ print("| |")
967
+ print("| Press Ctrl + C anytime to exit. |")
968
+ print("|___________________________________________________________________|\n\n")
969
+ time.sleep(4)
970
+
971
+ start_time = to_unix_timestamp(start_time)
972
+
973
+ if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
974
+ print("Please use one of the following formats:")
975
+ print(" - json\n - csv\n - little_r\n - netcdf")
976
+ return
977
+
978
+ if output_dir:
979
+ os.makedirs(output_dir, exist_ok=True)
980
+ print(f"\U0001F4C1 Files will be saved to {output_dir}")
981
+ else:
982
+ print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
983
+
984
+ # Convert start_time to datetime
985
+ start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
986
+
987
+ # Calculate first center time that's after start_time
988
+ hours_since_day_start = start_dt.hour + start_dt.minute / 60
989
+ bucket_number = hours_since_day_start // bucket_hours
990
+ first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
991
+
992
+ headers = [
993
+ "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
994
+ "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
995
+ ]
996
+
997
+ buckets = {} # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
998
+ current_timestamp = start_time
999
+ fetched_so_far = 0
1000
+ mission_stats = {}
1001
+
1002
+ print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
1003
+ print(f"Polling interval: {interval} seconds")
1004
+ print("-----------------------------------------------------")
1005
+
1006
+ try:
1007
+ while True:
1008
+ observations_page = get_super_observations_page(
1009
+ since=current_timestamp,
1010
+ min_time=start_time,
1011
+ include_ids=True,
1012
+ include_mission_name=True
1013
+ )
1014
+
1015
+ if observations_page is None:
1016
+ print(f"\nNull response from API. Retrying in {interval} seconds ...")
1017
+ time.sleep(interval)
1018
+ continue
1019
+
1020
+ observations = observations_page.get('observations', [])
1021
+
1022
+ # Invoke the callback with fetched super observations
1023
+ if callback:
1024
+ print("--------\nCallback\n--------")
1025
+ callback(observations)
1026
+
1027
+ if observations:
1028
+ fetched_so_far += len(observations)
1029
+ print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
1030
+ print(f"Fetched {fetched_so_far} super observations")
1031
+ print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
1032
+ print("-----------------------------------------------------")
1033
+
1034
+ for obs in observations:
1035
+ if 'mission_name' not in obs:
1036
+ continue
1037
+
1038
+ timestamp = obs.get('timestamp')
1039
+ if not timestamp:
1040
+ continue
1041
+
1042
+ try:
1043
+ obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
1044
+ except (OSError, ValueError, TypeError, OverflowError):
1045
+ continue
1046
+
1047
+ mission_name = obs.get('mission_name', 'Unknown')
1048
+ obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
1049
+
1050
+ processed_obs = {
1051
+ header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
1052
+ for header in headers
1053
+ }
1054
+
1055
+ obs_id = f"{timestamp}_{mission_name}"
1056
+
1057
+ if obs_time >= start_dt:
1058
+ hours_diff = (obs_time - first_center).total_seconds() / 3600
1059
+ bucket_index = floor(hours_diff / bucket_hours)
1060
+ bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
1061
+ bucket_end = bucket_center + timedelta(hours=bucket_hours)
1062
+
1063
+ if obs_time <= bucket_end:
1064
+ bucket_key = (bucket_center, mission_name)
1065
+
1066
+ # Initialize bucket if needed
1067
+ if bucket_key not in buckets:
1068
+ buckets[bucket_key] = {
1069
+ 'data': {},
1070
+ 'last_write': 0,
1071
+ 'data_hash': ''
1072
+ }
1073
+
1074
+ # Update bucket data
1075
+ buckets[bucket_key]['data'][obs_id] = processed_obs
1076
+
1077
+ # Track statistics
1078
+ if mission_name not in mission_stats:
1079
+ mission_stats[mission_name] = {'files': set(), 'observations': 0}
1080
+ mission_stats[mission_name]['observations'] += 1
1081
+
1082
+ # Calculate new data hash
1083
+ sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
1084
+ data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
1085
+
1086
+ # Check if we should write the bucket
1087
+ current_time = datetime.now(timezone.utc)
1088
+ time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
1089
+ data_changed = data_hash != buckets[bucket_key]['data_hash']
1090
+
1091
+ # Write if it's been more than interval seconds since last write OR if data has changed
1092
+ if (time_since_last_write >= interval or data_changed) and output_format:
1093
+ bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
1094
+
1095
+ file_name_format = {
1096
+ 'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
1097
+ 'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
1098
+ 'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
1099
+ 'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
1100
+ }
1101
+
1102
+ file_name = file_name_format[output_format] % (
1103
+ bucket_center.year, bucket_center.month, bucket_center.day,
1104
+ bucket_hour, bucket_hours)
1105
+
1106
+ output_file = os.path.join(output_dir or '.', file_name)
1107
+ sorted_obs = [obs for _, obs in sorted_data]
1108
+
1109
+ # Write the file based on format
1110
+ try:
1111
+ if output_format == 'netcdf':
1112
+ convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
1113
+ elif output_format == 'csv':
1114
+ with open(output_file, mode='w', newline='') as file:
1115
+ writer = csv.DictWriter(file, fieldnames=headers)
1116
+ writer.writeheader()
1117
+ writer.writerows(sorted_obs)
1118
+ elif output_format == 'json':
1119
+ sorted_obs_dict = {k: v for k, v in sorted_data}
1120
+ with open(output_file, 'w', encoding='utf-8') as file:
1121
+ json.dump(sorted_obs_dict, file, indent=4)
1122
+ elif output_format == 'little_r':
1123
+ little_r_records = format_little_r(sorted_obs)
1124
+ with open(output_file, 'w') as file:
1125
+ file.write('\n'.join(little_r_records))
1126
+
1127
+ buckets[bucket_key]['last_write'] = current_time.timestamp()
1128
+ buckets[bucket_key]['data_hash'] = data_hash
1129
+ mission_stats[mission_name]['files'].add(output_file)
1130
+ except Exception as e:
1131
+ print(f"Error writing bucket file {file_name}: {str(e)}")
1132
+
1133
+ # Clean up old buckets
1134
+ current_time = datetime.now(timezone.utc)
1135
+ buckets = {
1136
+ k: v for k, v in buckets.items()
1137
+ if current_time - k[0] <= timedelta(hours=bucket_hours * 2) # Keep slightly longer for potential updates
1138
+ }
1139
+
1140
+ next_timestamp = observations_page.get('next_since')
1141
+ has_next_page = observations_page.get('has_next_page', False)
1142
+
1143
+ if next_timestamp and next_timestamp > current_timestamp:
1144
+ current_timestamp = next_timestamp
1145
+ elif not has_next_page:
1146
+ print("-----------------------------------------------------")
1147
+ print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
1148
+ print("-----------------------------------------------------")
1149
+ time.sleep(interval)
1150
+ continue
1151
+
1152
+ if not observations:
1153
+ print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
1154
+ print("-----------------------------------------------------")
1155
+ time.sleep(interval)
1156
+
1157
+ except KeyboardInterrupt:
1158
+ print("\n\U0001F6D1 Received interrupt, stopping...")
1159
+ print("-----------------------------------------------------")
1160
+ for mission_name, stats in mission_stats.items():
1161
+ print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
1162
+ except Exception as e:
1163
+ print(f"Error occurred: {str(e)}")
1164
+ exit(1001)
1165
+ finally:
1166
+ print("-----------------------------------------------------")
1167
+ print("Finished processing super observations.")
698
1168
 
699
1169
  # ------------
700
1170
  # METADATA
windborne/utils.py CHANGED
@@ -337,7 +337,7 @@ def save_csv_json(save_to_file, response, csv_data_key=None):
337
337
  print("Unsupported file format. Please use either .json or .csv.")
338
338
  exit(4)
339
339
 
340
- def convert_to_netcdf(data, curtime, output_filename=None):
340
+ def convert_to_netcdf(data, curtime, output_filename):
341
341
  # This module outputs data in netcdf format for the WMO ISARRA program. The output format is netcdf
342
342
  # and the style (variable names, file names, etc.) are described here:
343
343
  # https://github.com/synoptic/wmo-uasdc/tree/main/raw_uas_to_netCDF
@@ -362,10 +362,16 @@ def convert_to_netcdf(data, curtime, output_filename=None):
362
362
 
363
363
  # Convert dictionary to list for DataFrame
364
364
  data_list = []
365
- for obs_id, obs_data in data.items():
366
- # Convert 'None' strings to None type
367
- clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
368
- data_list.append(clean_data)
365
+ if isinstance(data, dict):
366
+ # If input is dictionary, convert to list
367
+ for obs_id, obs_data in data.items():
368
+ clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
369
+ data_list.append(clean_data)
370
+ else:
371
+ # If input is already a list
372
+ for obs_data in data:
373
+ clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
374
+ data_list.append(clean_data)
369
375
 
370
376
  # Put the data in a panda dataframe in order to easily push to xarray then netcdf output
371
377
  df = pd.DataFrame(data_list)
@@ -381,16 +387,16 @@ def convert_to_netcdf(data, curtime, output_filename=None):
381
387
 
382
388
  # Build the filename and save some variables for use later
383
389
  mt = datetime.fromtimestamp(curtime, tz=timezone.utc)
390
+
391
+ is_multi_mission = True
392
+
384
393
  # Handle dropsondes
385
394
  mission_name = str(df['mission_name'].iloc[0]) if (not df.empty and not pd.isna(df['mission_name'].iloc[0])) else ' '
395
+ # Dropsondes name is ''
396
+ if mission_name == ' ':
397
+ is_multi_mission = False
386
398
 
387
- is_multi_mission = False
388
-
389
- if output_filename:
390
- output_file = output_filename
391
- is_multi_mission = True # we should calculate this directly, rather than relying on the filename
392
- else:
393
- output_file = f"WindBorne_{mission_name}_{mt.year:04d}-{mt.month:02d}-{mt.day:02d}_{mt.hour:02d}.nc"
399
+ output_file = output_filename
394
400
 
395
401
  # Derived quantities calculated here:
396
402
 
@@ -495,8 +501,7 @@ def convert_to_netcdf(data, curtime, output_filename=None):
495
501
  }
496
502
  ds['mission_name'].attrs = {
497
503
  'long_name': 'Mission name',
498
- 'description': 'Which balloon collected the data',
499
- '_FillValue': ''
504
+ 'description': 'Which balloon collected the data'
500
505
  }
501
506
 
502
507
  # Add Global Attributes synonymous across all UASDC providers
@@ -572,7 +577,7 @@ def format_little_r(observations):
572
577
  """
573
578
  little_r_records = []
574
579
 
575
- for obs_id, point in observations:
580
+ for point in observations:
576
581
  # Observation time
577
582
  observation_time = datetime.fromtimestamp(point['timestamp'], tz=timezone.utc)
578
583
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: windborne
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: A Python library for interacting with WindBorne Data and Forecasts API
5
5
  Author-email: WindBorne Systems <data@windbornesystems.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -0,0 +1,11 @@
1
+ windborne/__init__.py,sha256=aDFnZEPGmulZ-VVAVD-0maK3UFeLl9PxUyxp_qZ85Gk,1894
2
+ windborne/cli.py,sha256=Qp6wu3ZbXwnpmHa3odr0sjIJ3DOhtraQblUGwKWEKWc,36416
3
+ windborne/config.py,sha256=FYIBRiIuii5igAFQlOsHUa6u2i1kKnO1yZE7QfQJvUg,1688
4
+ windborne/data_api.py,sha256=TtOgzD-ONRFswPbAIiMaCANp_IaP4g8OvNExqZ_81iA,63414
5
+ windborne/forecasts_api.py,sha256=AYuhFRls_XvzuNB55NF0w3y-_ocYwPxmI6C1lIyFkgM,16865
6
+ windborne/utils.py,sha256=cpQZ79EB8T0Cy5ygwHsbTEE4XjQ0xYX_sN-Ags8AYJw,39718
7
+ windborne-1.0.7.dist-info/METADATA,sha256=R6AQZUik0LbU1ofYCGFYL8dWWLyzoIoLhyW3TVrP_Ng,1264
8
+ windborne-1.0.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
+ windborne-1.0.7.dist-info/entry_points.txt,sha256=j_YrqdCDrCd7p5MIwQ2BYwNXEi95VNANzLRJmcXEg1U,49
10
+ windborne-1.0.7.dist-info/top_level.txt,sha256=PE9Lauriu5S5REf7JKhXprufZ_V5RiZ_TnfnrLGJrmE,10
11
+ windborne-1.0.7.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- windborne/__init__.py,sha256=tNnX9BrdgFNjy9NY6ucobCVAgV3KYwUydEAuwxdaiqQ,1784
2
- windborne/cli.py,sha256=YlIz9KgsJumaCxz64uwAxfUUTJzc2rPEwzwGajCT9Jw,32838
3
- windborne/config.py,sha256=FYIBRiIuii5igAFQlOsHUa6u2i1kKnO1yZE7QfQJvUg,1688
4
- windborne/data_api.py,sha256=uggo2Y5U36ptvpSYgCUHNFwbNlczsCx7OUrf_OwlvtE,37629
5
- windborne/forecasts_api.py,sha256=AYuhFRls_XvzuNB55NF0w3y-_ocYwPxmI6C1lIyFkgM,16865
6
- windborne/utils.py,sha256=Zp9oTWfbISmJ9nO893RrW6MkqOwCDaFgsszmYaRgJSg,39670
7
- windborne-1.0.6.dist-info/METADATA,sha256=Fx7tFudf2QWvgLmkzfmTr4BNy2id_rKoOj-J5W4ZqVQ,1264
8
- windborne-1.0.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
9
- windborne-1.0.6.dist-info/entry_points.txt,sha256=j_YrqdCDrCd7p5MIwQ2BYwNXEi95VNANzLRJmcXEg1U,49
10
- windborne-1.0.6.dist-info/top_level.txt,sha256=PE9Lauriu5S5REf7JKhXprufZ_V5RiZ_TnfnrLGJrmE,10
11
- windborne-1.0.6.dist-info/RECORD,,