PyPI - windborne - Versions diffs - 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

windborne 1.0.6py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

windborne/__init__.py +6 -0
windborne/cli.py +61 -2
windborne/data_api.py +483 -13
windborne/utils.py +20 -15
{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/METADATA +1 -1
windborne-1.0.7.dist-info/RECORD +11 -0
windborne-1.0.6.dist-info/RECORD +0 -11
{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/WHEEL +0 -0
{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/entry_points.txt +0 -0
{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/top_level.txt +0 -0

windborne/__init__.py CHANGED Viewed

@@ -14,6 +14,9 @@ from .data_api import (
     get_super_observations_page,
     super_observations,
+    poll_super_observations,
+    poll_observations,
     get_flying_missions,
     get_mission_launch_site,
     get_predicted_path,
@@ -50,6 +53,9 @@ __all__ = [
     "get_super_observations_page",
     "super_observations",
+    "poll_super_observations",
+    "poll_observations",
     "get_flying_missions",
     "get_mission_launch_site",
     "get_predicted_path",

windborne/cli.py CHANGED Viewed

@@ -3,8 +3,13 @@ import argparse
 from . import (
     super_observations,
     observations,
     get_observations_page,
     get_super_observations_page,
+    poll_super_observations,
+    poll_observations,
     get_flying_missions,
     get_mission_launch_site,
     get_predicted_path,
@@ -42,7 +47,7 @@ def main():
     super_obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
     super_obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
     super_obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
-    super_obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to multiple files (csv or little_r)')
+    super_obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to or to multiple files (csv, json, netcdf or little_r)')
     # Observations Command
     obs_parser = subparsers.add_parser('observations', help='Poll observations within a time range')
@@ -58,7 +63,7 @@ def main():
     obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
     obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
     obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
-    obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to multiple files (csv or little_r)')
+    obs_parser.add_argument('output', help='Save output to a single file (filename.csv, filename.json or filename.little_r) or to multiple files (csv, json, netcdf or little_r)')
     # Get Observations Page Command
@@ -87,6 +92,29 @@ def main():
     super_obs_page_parser.add_argument('-u', '--include-updated-at', action='store_true', help='Include update timestamps')
     super_obs_page_parser.add_argument('output', nargs='?', help='Output file')
+    # Poll Super Observations Command
+    poll_super_obs_parser = subparsers.add_parser('poll-super-observations', help='Continuously polls for super observations and saves to files in specified format.')
+    poll_super_obs_parser.add_argument('start_time', help='Starting time (YYYY-MM-DD_HH:MM, "YYYY-MM-DD HH:MM:SS" or YYYY-MM-DDTHH:MM:SS.fffZ)')
+    poll_super_obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
+    poll_super_obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
+    poll_super_obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
+    poll_super_obs_parser.add_argument('output', help='Save output to multiple files (csv, json, netcdf or little_r)')
+    # Poll Observations Command
+    poll_obs_parser = subparsers.add_parser('poll-observations', help='Continuously polls for observations and saves to files in specified format.')
+    poll_obs_parser.add_argument('start_time', help='Starting time (YYYY-MM-DD_HH:MM, "YYYY-MM-DD HH:MM:SS" or YYYY-MM-DDTHH:MM:SS.fffZ)')
+    poll_obs_parser.add_argument('-m', '--mission-id', help='Filter observations by mission ID')
+    poll_obs_parser.add_argument('-ml', '--min-latitude', type=float, help='Minimum latitude filter')
+    poll_obs_parser.add_argument('-xl', '--max-latitude', type=float, help='Maximum latitude filter')
+    poll_obs_parser.add_argument('-mg', '--min-longitude', type=float, help='Minimum longitude filter')
+    poll_obs_parser.add_argument('-xg', '--max-longitude', type=float, help='Maximum longitude filter')
+    poll_obs_parser.add_argument('-id', '--include-ids', action='store_true', help='Include observation IDs')
+    poll_obs_parser.add_argument('-u', '--include-updated-at', action='store_true', help='Include update timestamps')
+    poll_obs_parser.add_argument('-i', '--interval', type=int, default=60, help='Polling interval in seconds')
+    poll_obs_parser.add_argument('-b', '--bucket-hours', type=float, default=6.0, help='Hours per bucket')
+    poll_obs_parser.add_argument('-d', '--output-dir', help='Directory path where the separate files should be saved. If not provided, files will be saved in current directory.')
+    poll_obs_parser.add_argument('output', help='Save output to multiple files (csv, json, netcdf or little_r)')
     # Get Flying Missions Command
     flying_parser = subparsers.add_parser('flying-missions', help='Get currently flying missions')
     flying_parser.add_argument('output', nargs='?', help='Output file')
@@ -226,6 +254,37 @@ def main():
             output_format=output_format
         )
+    elif args.command == 'poll-super-observations':
+        output_format = args.output
+        output_dir = args.output_dir
+        poll_super_observations(
+            start_time=args.start_time,
+            interval=args.interval,
+            bucket_hours=args.bucket_hours,
+            output_dir=output_dir,
+            output_format=output_format
+        )
+    elif args.command == 'poll-observations':
+        output_format = args.output
+        output_dir = args.output_dir
+        poll_observations(
+            start_time=args.start_time,
+            include_ids=args.include_ids,
+            include_updated_at=args.include_updated_at,
+            mission_id=args.mission_id,
+            min_latitude=args.min_latitude,
+            max_latitude=args.max_latitude,
+            min_longitude=args.min_longitude,
+            max_longitude=args.max_longitude,
+            interval=args.interval,
+            bucket_hours=args.bucket_hours,
+            output_dir=output_dir,
+            output_format=output_format
+        )
     elif args.command == 'observations':
         # Error handling is performed within observations
         # and we display the appropriate error messages

windborne/data_api.py CHANGED Viewed

@@ -7,6 +7,9 @@ from math import floor
 from datetime import datetime, timezone, timedelta
 import csv
 import json
+import hashlib
+# UTC should be used across the lib
 # ------------
 # CORE RESOURCES
@@ -165,6 +168,9 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
     has_next_page = True
     fetced_so_far = 0
+    print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
+    print("-----------------------------------------------------")
     while has_next_page:
         try:
@@ -191,13 +197,13 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
             observations = observations_page.get('observations', [])
             fetced_so_far = fetced_so_far + len(observations)
             print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
-            print(f"Fetched {fetced_so_far} observation(s)")
-            print(f"Current time: {datetime.fromtimestamp(print_current_timestamp).strftime('%Y-%m-%d %H:%M:%S')}")
+            print(f"Fetched {fetced_so_far} observations")
+            print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
             print("-----------------------------------------------------")
             # Invoke the callback with fetched observations
             if callback:
-                print("\nCallback\n")
+                print("--------\nCallback\n--------")
                 callback(observations)
             for obs in observations:
@@ -254,6 +260,12 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
             current_timestamp = next_timestamp
+        except KeyboardInterrupt:
+            print("\n\n\U0001F6D1 Received interrupt, stopping...")
+            print("-----------------------------------------------------")
+            print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
+            print("-----------------------------------------------------")
+            exit(3)
         except Exception as e:
             print(f"Error occurred: {e}")
             exit(1001)
@@ -276,7 +288,7 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
         if save_to_file.endswith('.nc'):
             first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
-            convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
+            convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
         elif save_to_file.endswith('.json'):
             with open(save_to_file, 'w', encoding='utf-8') as f:
                 json.dump(sorted_observations, f, indent=4)
@@ -319,6 +331,7 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
                 file_name_format = {
                     'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
                     'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
                     'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
                 }
                 file_name = file_name_format[output_format] % (
@@ -331,7 +344,7 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
                 sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
                 if output_format == 'netcdf':
-                    convert_to_netcdf(sorted_obs, bucket_center.timestamp())
+                    convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
                 elif output_format == 'csv':
                     with open(output_file, mode='w', newline='') as file:
@@ -366,6 +379,237 @@ def observations(start_time, end_time=None, include_ids=None, include_updated_at
     print("-----------------------------------------------------")
     print("All observations have been processed and saved.")
+def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
+    """
+    Continuously polls for observations and saves to files in specified format.
+    Will run indefinitely until interrupted.
+    Args:
+        start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
+        include_ids (bool): Include observation IDs in response.
+        include_updated_at (bool): Include update timestamps in response.
+        mission_id (str): Filter observations by mission ID.
+        min_latitude (float): Minimum latitude boundary.
+        max_latitude (float): Maximum latitude boundary.
+        min_longitude (float): Minimum longitude boundary.
+        max_longitude (float): Maximum longitude boundary.
+        interval (int): Polling interval in seconds when no data is received (default: 60)
+        bucket_hours (float): Size of time buckets in hours (default: 6.0)
+        output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
+        output_dir (str): Directory for bucket files (default: current directory)
+        callback (callable): Optional callback for data processing
+    """
+    # Print warning about infinite loop
+    print(" ___________________________________________________________________")
+    print("|                          WARNING  \U000026A0\U0000FE0F                               |")
+    print("|                 You are entering an endless loop.                 |")
+    print("|                                                                   |")
+    print("|                 Press Ctrl + C anytime to exit.                   |")
+    print("|___________________________________________________________________|\n\n")
+    time.sleep(4)
+    start_time = to_unix_timestamp(start_time)
+    if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
+        print("Please use one of the following formats:")
+        print("  - json\n  - csv\n  - little_r\n  - netcdf")
+        return
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"\U0001F4C1 Files will be saved to {output_dir}")
+    else:
+        print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
+    # Convert start_time to datetime
+    start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
+    # Calculate first center time that's after start_time
+    hours_since_day_start = start_dt.hour + start_dt.minute / 60
+    bucket_number = hours_since_day_start // bucket_hours
+    first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
+    headers = [
+        "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
+        "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
+    ]
+    buckets = {}  # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
+    current_timestamp = start_time
+    fetched_so_far = 0
+    mission_stats = {}
+    print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Polling interval: {interval} seconds")
+    print("-----------------------------------------------------")
+    try:
+        while True:
+            observations_page = get_observations_page(
+                since=current_timestamp,
+                min_latitude=min_latitude,
+                max_latitude=max_latitude,
+                min_longitude=min_longitude,
+                max_longitude=max_longitude,
+                include_updated_at=include_updated_at,
+                mission_id=mission_id,
+                include_ids=include_ids,
+                include_mission_name=True
+            )
+            if observations_page is None:
+                print(f"\nNull response from API. Retrying in {interval} seconds ...")
+                time.sleep(interval)
+                continue
+            observations = observations_page.get('observations', [])
+            # Invoke the callback with fetched super observations
+            if callback:
+                print("--------\nCallback\n--------")
+                callback(observations)
+            if observations:
+                fetched_so_far += len(observations)
+                print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
+                print(f"Fetched {fetched_so_far} observations")
+                print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+                print("-----------------------------------------------------")
+                for obs in observations:
+                    if 'mission_name' not in obs:
+                        continue
+                    timestamp = obs.get('timestamp')
+                    if not timestamp:
+                        continue
+                    try:
+                        obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+                    except (OSError, ValueError, TypeError, OverflowError):
+                        continue
+                    mission_name = obs.get('mission_name', 'Unknown')
+                    obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
+                    processed_obs = {
+                        header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
+                        for header in headers
+                    }
+                    obs_id = f"{timestamp}_{mission_name}"
+                    if obs_time >= start_dt:
+                        hours_diff = (obs_time - first_center).total_seconds() / 3600
+                        bucket_index = floor(hours_diff / bucket_hours)
+                        bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
+                        bucket_end = bucket_center + timedelta(hours=bucket_hours)
+                        if obs_time <= bucket_end:
+                            bucket_key = (bucket_center, mission_name)
+                            # Initialize bucket if needed
+                            if bucket_key not in buckets:
+                                buckets[bucket_key] = {
+                                    'data': {},
+                                    'last_write': 0,
+                                    'data_hash': ''
+                                }
+                            # Update bucket data
+                            buckets[bucket_key]['data'][obs_id] = processed_obs
+                            # Track statistics
+                            if mission_name not in mission_stats:
+                                mission_stats[mission_name] = {'files': set(), 'observations': 0}
+                            mission_stats[mission_name]['observations'] += 1
+                            # Calculate new data hash
+                            sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
+                            data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
+                            # Check if we should write the bucket
+                            current_time = datetime.now(timezone.utc)
+                            time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
+                            data_changed = data_hash != buckets[bucket_key]['data_hash']
+                            # Write if it's been more than interval seconds since last write OR if data has changed
+                            if (time_since_last_write >= interval or data_changed) and output_format:
+                                bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
+                                file_name_format = {
+                                    'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
+                                    'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
+                                    'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
+                                }
+                                file_name = file_name_format[output_format] % (
+                                    bucket_center.year, bucket_center.month, bucket_center.day,
+                                    bucket_hour, bucket_hours)
+                                output_file = os.path.join(output_dir or '.', file_name)
+                                sorted_obs = [obs for _, obs in sorted_data]
+                                # Write the file based on format
+                                try:
+                                    if output_format == 'netcdf':
+                                        convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
+                                    elif output_format == 'csv':
+                                        with open(output_file, mode='w', newline='') as file:
+                                            writer = csv.DictWriter(file, fieldnames=headers)
+                                            writer.writeheader()
+                                            writer.writerows(sorted_obs)
+                                    elif output_format == 'json':
+                                        sorted_obs_dict = {k: v for k, v in sorted_data}
+                                        with open(output_file, 'w', encoding='utf-8') as file:
+                                            json.dump(sorted_obs_dict, file, indent=4)
+                                    elif output_format == 'little_r':
+                                        little_r_records = format_little_r(sorted_obs)
+                                        with open(output_file, 'w') as file:
+                                            file.write('\n'.join(little_r_records))
+                                    buckets[bucket_key]['last_write'] = current_time.timestamp()
+                                    buckets[bucket_key]['data_hash'] = data_hash
+                                    mission_stats[mission_name]['files'].add(output_file)
+                                except Exception as e:
+                                    print(f"Error writing bucket file {file_name}: {str(e)}")
+                # Clean up old buckets
+                current_time = datetime.now(timezone.utc)
+                buckets = {
+                    k: v for k, v in buckets.items()
+                    if current_time - k[0] <= timedelta(hours=bucket_hours * 2)  # Keep slightly longer for potential updates
+                }
+            next_timestamp = observations_page.get('next_since')
+            has_next_page = observations_page.get('has_next_page', False)
+            if next_timestamp and next_timestamp > current_timestamp:
+                current_timestamp = next_timestamp
+            elif not has_next_page:
+                print("-----------------------------------------------------")
+                print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+                continue
+            if not observations:
+                print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+    except KeyboardInterrupt:
+        print("\n\n\U0001F6D1 Received interrupt, stopping...")
+        print("-----------------------------------------------------")
+        for mission_name, stats in mission_stats.items():
+            print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        exit(1001)
+    finally:
+        print("-----------------------------------------------------")
+        print("Finished processing observations.")
 # Super Observations
 # ------------
@@ -494,6 +738,8 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
     has_next_page = True
     fetced_so_far = 0
+    print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+    print("-----------------------------------------------------")
     while has_next_page:
         try:
@@ -516,15 +762,13 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
             observations = observations_page.get('observations', [])
             fetced_so_far = fetced_so_far + len(observations)
             print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
-            print(f"Fetched {fetced_so_far} super observation(s)")
-            print(f"Current time: {datetime.fromtimestamp(print_current_timestamp).strftime('%Y-%m-%d %H:%M:%S')}")
+            print(f"Fetched {fetced_so_far} super observations")
+            print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
             print("-----------------------------------------------------")
-            # Invoke the callback with fetched observations
+            # Invoke the callback with fetched super observations
             if callback:
-                print("--------")
-                print("Callback")
-                print("--------")
+                print("--------\nCallback\n--------")
                 callback(observations)
             for obs in observations:
@@ -581,6 +825,12 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
             current_timestamp = next_timestamp
+        except KeyboardInterrupt:
+            print("\n\n\U0001F6D1 Received interrupt, stopping...")
+            print("-----------------------------------------------------")
+            print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
+            print("-----------------------------------------------------")
+            exit(3)
         except Exception as e:
             print(f"Error occurred: {e}")
             exit(1001)
@@ -603,7 +853,7 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
         if save_to_file.endswith('.nc'):
             first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
-            convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
+            convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
         elif save_to_file.endswith('.json'):
             with open(save_to_file, 'w', encoding='utf-8') as f:
@@ -648,6 +898,7 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
                 file_name_format = {
                     'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
                     'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
                     'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
                 }
                 file_name = file_name_format[output_format] % (
@@ -660,7 +911,7 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
                 sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
                 if output_format == 'netcdf':
-                    convert_to_netcdf(sorted_obs, bucket_center.timestamp())
+                    convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
                 elif output_format == 'csv':
                     with open(output_file, mode='w', newline='') as file:
@@ -695,6 +946,225 @@ def super_observations(start_time, end_time=None, interval=60, save_to_file=None
     print("-----------------------------------------------------")
     print("All super observations have been processed and saved.")
+def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
+    """
+    Continuously polls for super observations and saves to files in specified format.
+    Will run indefinitely until interrupted.
+    Args:
+        start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
+        interval (int): Polling interval in seconds when no data is received (default: 60)
+        bucket_hours (float): Size of time buckets in hours (default: 6.0)
+        output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
+        output_dir (str): Directory for bucket files (default: current directory)
+        callback (callable): Optional callback for data processing
+    """
+    # Print warning about infinite loop
+    print(" ___________________________________________________________________")
+    print("|                          WARNING  \U000026A0\U0000FE0F                               |")
+    print("|                 You are entering an endless loop.                 |")
+    print("|                                                                   |")
+    print("|                 Press Ctrl + C anytime to exit.                   |")
+    print("|___________________________________________________________________|\n\n")
+    time.sleep(4)
+    start_time = to_unix_timestamp(start_time)
+    if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
+        print("Please use one of the following formats:")
+        print("  - json\n  - csv\n  - little_r\n  - netcdf")
+        return
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"\U0001F4C1 Files will be saved to {output_dir}")
+    else:
+        print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
+    # Convert start_time to datetime
+    start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
+    # Calculate first center time that's after start_time
+    hours_since_day_start = start_dt.hour + start_dt.minute / 60
+    bucket_number = hours_since_day_start // bucket_hours
+    first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
+    headers = [
+        "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
+        "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
+    ]
+    buckets = {}  # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
+    current_timestamp = start_time
+    fetched_so_far = 0
+    mission_stats = {}
+    print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
+    print(f"Polling interval: {interval} seconds")
+    print("-----------------------------------------------------")
+    try:
+        while True:
+            observations_page = get_super_observations_page(
+                since=current_timestamp,
+                min_time=start_time,
+                include_ids=True,
+                include_mission_name=True
+            )
+            if observations_page is None:
+                print(f"\nNull response from API. Retrying in {interval} seconds ...")
+                time.sleep(interval)
+                continue
+            observations = observations_page.get('observations', [])
+            # Invoke the callback with fetched super observations
+            if callback:
+                print("--------\nCallback\n--------")
+                callback(observations)
+            if observations:
+                fetched_so_far += len(observations)
+                print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
+                print(f"Fetched {fetched_so_far} super observations")
+                print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+                print("-----------------------------------------------------")
+                for obs in observations:
+                    if 'mission_name' not in obs:
+                        continue
+                    timestamp = obs.get('timestamp')
+                    if not timestamp:
+                        continue
+                    try:
+                        obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+                    except (OSError, ValueError, TypeError, OverflowError):
+                        continue
+                    mission_name = obs.get('mission_name', 'Unknown')
+                    obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
+                    processed_obs = {
+                        header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
+                        for header in headers
+                    }
+                    obs_id = f"{timestamp}_{mission_name}"
+                    if obs_time >= start_dt:
+                        hours_diff = (obs_time - first_center).total_seconds() / 3600
+                        bucket_index = floor(hours_diff / bucket_hours)
+                        bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
+                        bucket_end = bucket_center + timedelta(hours=bucket_hours)
+                        if obs_time <= bucket_end:
+                            bucket_key = (bucket_center, mission_name)
+                            # Initialize bucket if needed
+                            if bucket_key not in buckets:
+                                buckets[bucket_key] = {
+                                    'data': {},
+                                    'last_write': 0,
+                                    'data_hash': ''
+                                }
+                            # Update bucket data
+                            buckets[bucket_key]['data'][obs_id] = processed_obs
+                            # Track statistics
+                            if mission_name not in mission_stats:
+                                mission_stats[mission_name] = {'files': set(), 'observations': 0}
+                            mission_stats[mission_name]['observations'] += 1
+                            # Calculate new data hash
+                            sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
+                            data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
+                            # Check if we should write the bucket
+                            current_time = datetime.now(timezone.utc)
+                            time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
+                            data_changed = data_hash != buckets[bucket_key]['data_hash']
+                            # Write if it's been more than interval seconds since last write OR if data has changed
+                            if (time_since_last_write >= interval or data_changed) and output_format:
+                                bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
+                                file_name_format = {
+                                    'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
+                                    'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
+                                    'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
+                                }
+                                file_name = file_name_format[output_format] % (
+                                    bucket_center.year, bucket_center.month, bucket_center.day,
+                                    bucket_hour, bucket_hours)
+                                output_file = os.path.join(output_dir or '.', file_name)
+                                sorted_obs = [obs for _, obs in sorted_data]
+                                # Write the file based on format
+                                try:
+                                    if output_format == 'netcdf':
+                                        convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
+                                    elif output_format == 'csv':
+                                        with open(output_file, mode='w', newline='') as file:
+                                            writer = csv.DictWriter(file, fieldnames=headers)
+                                            writer.writeheader()
+                                            writer.writerows(sorted_obs)
+                                    elif output_format == 'json':
+                                        sorted_obs_dict = {k: v for k, v in sorted_data}
+                                        with open(output_file, 'w', encoding='utf-8') as file:
+                                            json.dump(sorted_obs_dict, file, indent=4)
+                                    elif output_format == 'little_r':
+                                        little_r_records = format_little_r(sorted_obs)
+                                        with open(output_file, 'w') as file:
+                                            file.write('\n'.join(little_r_records))
+                                    buckets[bucket_key]['last_write'] = current_time.timestamp()
+                                    buckets[bucket_key]['data_hash'] = data_hash
+                                    mission_stats[mission_name]['files'].add(output_file)
+                                except Exception as e:
+                                    print(f"Error writing bucket file {file_name}: {str(e)}")
+                # Clean up old buckets
+                current_time = datetime.now(timezone.utc)
+                buckets = {
+                    k: v for k, v in buckets.items()
+                    if current_time - k[0] <= timedelta(hours=bucket_hours * 2)  # Keep slightly longer for potential updates
+                }
+            next_timestamp = observations_page.get('next_since')
+            has_next_page = observations_page.get('has_next_page', False)
+            if next_timestamp and next_timestamp > current_timestamp:
+                current_timestamp = next_timestamp
+            elif not has_next_page:
+                print("-----------------------------------------------------")
+                print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+                continue
+            if not observations:
+                print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+    except KeyboardInterrupt:
+        print("\n\U0001F6D1 Received interrupt, stopping...")
+        print("-----------------------------------------------------")
+        for mission_name, stats in mission_stats.items():
+            print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        exit(1001)
+    finally:
+        print("-----------------------------------------------------")
+        print("Finished processing super observations.")
 # ------------
 # METADATA

windborne/utils.py CHANGED Viewed

@@ -337,7 +337,7 @@ def save_csv_json(save_to_file, response, csv_data_key=None):
         print("Unsupported file format. Please use either .json or .csv.")
         exit(4)
-def convert_to_netcdf(data, curtime, output_filename=None):
+def convert_to_netcdf(data, curtime, output_filename):
     # This module outputs data in netcdf format for the WMO ISARRA program.  The output format is netcdf
     #   and the style (variable names, file names, etc.) are described here:
     #  https://github.com/synoptic/wmo-uasdc/tree/main/raw_uas_to_netCDF
@@ -362,10 +362,16 @@ def convert_to_netcdf(data, curtime, output_filename=None):
     # Convert dictionary to list for DataFrame
     data_list = []
-    for obs_id, obs_data in data.items():
-        # Convert 'None' strings to None type
-        clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
-        data_list.append(clean_data)
+    if isinstance(data, dict):
+        # If input is dictionary, convert to list
+        for obs_id, obs_data in data.items():
+            clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
+            data_list.append(clean_data)
+    else:
+        # If input is already a list
+        for obs_data in data:
+            clean_data = {k: None if v == 'None' else v for k, v in obs_data.items()}
+            data_list.append(clean_data)
     # Put the data in a panda dataframe in order to easily push to xarray then netcdf output
     df = pd.DataFrame(data_list)
@@ -381,16 +387,16 @@ def convert_to_netcdf(data, curtime, output_filename=None):
     # Build the filename and save some variables for use later
     mt = datetime.fromtimestamp(curtime, tz=timezone.utc)
+    is_multi_mission = True
     # Handle dropsondes
     mission_name = str(df['mission_name'].iloc[0]) if (not df.empty and not pd.isna(df['mission_name'].iloc[0])) else ' '
+    # Dropsondes name is ''
+    if mission_name == ' ':
+        is_multi_mission = False
-    is_multi_mission = False
-    if output_filename:
-        output_file = output_filename
-        is_multi_mission = True  # we should calculate this directly, rather than relying on the filename
-    else:
-        output_file = f"WindBorne_{mission_name}_{mt.year:04d}-{mt.month:02d}-{mt.day:02d}_{mt.hour:02d}.nc"
+    output_file = output_filename
     # Derived quantities calculated here:
@@ -495,8 +501,7 @@ def convert_to_netcdf(data, curtime, output_filename=None):
     }
     ds['mission_name'].attrs = {
         'long_name': 'Mission name',
-        'description': 'Which balloon collected the data',
-        '_FillValue': ''
+        'description': 'Which balloon collected the data'
     }
     # Add Global Attributes synonymous across all UASDC providers
@@ -572,7 +577,7 @@ def format_little_r(observations):
     """
     little_r_records = []
-    for obs_id, point in observations:
+    for point in observations:
         # Observation time
         observation_time = datetime.fromtimestamp(point['timestamp'], tz=timezone.utc)

{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: windborne
-Version: 1.0.6
+Version: 1.0.7
 Summary: A Python library for interacting with WindBorne Data and Forecasts API
 Author-email: WindBorne Systems <data@windbornesystems.com>
 Classifier: Programming Language :: Python :: 3

windborne-1.0.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+windborne/__init__.py,sha256=aDFnZEPGmulZ-VVAVD-0maK3UFeLl9PxUyxp_qZ85Gk,1894
+windborne/cli.py,sha256=Qp6wu3ZbXwnpmHa3odr0sjIJ3DOhtraQblUGwKWEKWc,36416
+windborne/config.py,sha256=FYIBRiIuii5igAFQlOsHUa6u2i1kKnO1yZE7QfQJvUg,1688
+windborne/data_api.py,sha256=TtOgzD-ONRFswPbAIiMaCANp_IaP4g8OvNExqZ_81iA,63414
+windborne/forecasts_api.py,sha256=AYuhFRls_XvzuNB55NF0w3y-_ocYwPxmI6C1lIyFkgM,16865
+windborne/utils.py,sha256=cpQZ79EB8T0Cy5ygwHsbTEE4XjQ0xYX_sN-Ags8AYJw,39718
+windborne-1.0.7.dist-info/METADATA,sha256=R6AQZUik0LbU1ofYCGFYL8dWWLyzoIoLhyW3TVrP_Ng,1264
+windborne-1.0.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+windborne-1.0.7.dist-info/entry_points.txt,sha256=j_YrqdCDrCd7p5MIwQ2BYwNXEi95VNANzLRJmcXEg1U,49
+windborne-1.0.7.dist-info/top_level.txt,sha256=PE9Lauriu5S5REf7JKhXprufZ_V5RiZ_TnfnrLGJrmE,10
+windborne-1.0.7.dist-info/RECORD,,

windborne-1.0.6.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-windborne/__init__.py,sha256=tNnX9BrdgFNjy9NY6ucobCVAgV3KYwUydEAuwxdaiqQ,1784
-windborne/cli.py,sha256=YlIz9KgsJumaCxz64uwAxfUUTJzc2rPEwzwGajCT9Jw,32838
-windborne/config.py,sha256=FYIBRiIuii5igAFQlOsHUa6u2i1kKnO1yZE7QfQJvUg,1688
-windborne/data_api.py,sha256=uggo2Y5U36ptvpSYgCUHNFwbNlczsCx7OUrf_OwlvtE,37629
-windborne/forecasts_api.py,sha256=AYuhFRls_XvzuNB55NF0w3y-_ocYwPxmI6C1lIyFkgM,16865
-windborne/utils.py,sha256=Zp9oTWfbISmJ9nO893RrW6MkqOwCDaFgsszmYaRgJSg,39670
-windborne-1.0.6.dist-info/METADATA,sha256=Fx7tFudf2QWvgLmkzfmTr4BNy2id_rKoOj-J5W4ZqVQ,1264
-windborne-1.0.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-windborne-1.0.6.dist-info/entry_points.txt,sha256=j_YrqdCDrCd7p5MIwQ2BYwNXEi95VNANzLRJmcXEg1U,49
-windborne-1.0.6.dist-info/top_level.txt,sha256=PE9Lauriu5S5REf7JKhXprufZ_V5RiZ_TnfnrLGJrmE,10
-windborne-1.0.6.dist-info/RECORD,,

{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{windborne-1.0.6.dist-info → windborne-1.0.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

windborne 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

windborne 1.0.6py3-none-any.whl → 1.0.7py3-none-any.whl