PyPI - windborne - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

windborne 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

windborne/__init__.py +11 -4
windborne/cli.py +168 -75
windborne/data_api.py +629 -119
windborne/utils.py +40 -16
{windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/METADATA +1 -1
windborne-1.0.7.dist-info/RECORD +11 -0
windborne-1.0.5.dist-info/RECORD +0 -11
{windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/WHEEL +0 -0
{windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/entry_points.txt +0 -0
{windborne-1.0.5.dist-info → windborne-1.0.7.dist-info}/top_level.txt +0 -0

windborne/data_api.py CHANGED Viewed

@@ -7,10 +7,19 @@ from math import floor
 from datetime import datetime, timezone, timedelta
 import csv
 import json
+import hashlib
-def get_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
+# UTC should be used across the lib
+# ------------
+# CORE RESOURCES
+# ------------
+# Observations
+# ------------
+def get_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=True, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, save_to_file=None):
     """
-    Retrieves observations based on specified filters including geographical bounds.
+    Retrieves observations page based on specified filters including geographical bounds.
     Args:
         since (str): Filter observations after this timestamp.
@@ -70,52 +79,7 @@ def get_observations(since=None, min_time=None, max_time=None, include_ids=None,
     return response
-def get_super_observations(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
-    """
-    Retrieves super observations based on specified filters.
-    Args:
-        since (str): Filter observations after this timestamp.
-        min_time (str): Minimum timestamp for observations.
-        max_time (str): Maximum timestamp for observations.
-        include_ids (bool): Include observation IDs in response.
-        include_mission_name (bool): Include mission names in response.
-        include_updated_at (bool): Include update timestamps in response.
-        mission_id (str): Filter observations by mission ID.
-        save_to_file (str): Optional path to save the response data.
-                           If provided, saves the data in CSV format.
-    Returns:
-        dict: The API response containing filtered super observations.
-    """
-    url = f"{DATA_API_BASE_URL}/super_observations.json"
-    params = {}
-    if since:
-        params["since"] = to_unix_timestamp(since)
-    if min_time:
-        params["min_time"] = to_unix_timestamp(min_time)
-    if max_time:
-        params["max_time"] = to_unix_timestamp(max_time)
-    if mission_id:
-        params["mission_id"] = mission_id
-    if include_ids:
-        params["include_ids"] = True
-    if include_mission_name:
-        params["include_mission_name"] = True
-    if include_updated_at:
-        params["include_updated_at"] = True
-    params = {k: v for k, v in params.items() if v is not None}
-    response = make_api_request(url, params=params)
-    if save_to_file:
-        save_csv_json(save_to_file, response, csv_data_key='observations')
-    return response
-def poll_observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
+def observations(start_time, end_time=None, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
     """
     Fetches observations between a start time and an optional end time and saves to files in specified format.
     Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
@@ -140,6 +104,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
                             Supported formats are '.csv', '.json', '.little_r' and '.nc'
         bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
         output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
+        output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
         callback (callable): Optional callback function that receives (super observations, metadata) before saving.
                              This allows custom processing or saving in custom formats.
     """
@@ -165,7 +130,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
         return
     # Supported formats for saving into a single file:
-    # NOTE: for poll_observations we handle .csv saving within poll_observations and not using save_csv_json
+    # NOTE: for observations we handle .csv saving within observations and not using save_csv_json
     #   - .csv
     #   - .json
     #   - .little_r
@@ -201,12 +166,16 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
     # Initialize the polling loop
     current_timestamp = start_time
     has_next_page = True
+    fetced_so_far = 0
+    print(f"Starting polling observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
+    print("-----------------------------------------------------")
     while has_next_page:
         try:
             # Fetch observations
-            observations_page = get_observations(
+            observations_page = get_observations_page(
                 since=current_timestamp,
                 min_latitude=min_latitude,
                 max_latitude=max_latitude,
@@ -226,11 +195,15 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
                 continue
             observations = observations_page.get('observations', [])
-            print(f"Fetched {len(observations)} observation(s)")
+            fetced_so_far = fetced_so_far + len(observations)
+            print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
+            print(f"Fetched {fetced_so_far} observations")
+            print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+            print("-----------------------------------------------------")
             # Invoke the callback with fetched observations
             if callback:
-                print("/nCallback/n")
+                print("--------\nCallback\n--------")
                 callback(observations)
             for obs in observations:
@@ -281,27 +254,41 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
             if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
                 print("-----------------------------------------------------\n")
-                print("No more pages available or reached end of time range.")
+                print("Fetching complete.")
                 print("\n-----------------------------------------------------")
                 break
             current_timestamp = next_timestamp
+        except KeyboardInterrupt:
+            print("\n\n\U0001F6D1 Received interrupt, stopping...")
+            print("-----------------------------------------------------")
+            print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
+            print("-----------------------------------------------------")
+            exit(3)
         except Exception as e:
             print(f"Error occurred: {e}")
             exit(1001)
     # Save data to a single file
     if save_to_file:
+        # Create directory path if it doesn't exist
+        directory = os.path.dirname(save_to_file)
+        if directory and not os.path.isdir(directory):
+            os.makedirs(directory, exist_ok=True)
         filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
                                  if float(obs['timestamp']) >= start_time}
         # Sort by timestamp
         sorted_observations = dict(sorted(filtered_observations.items(),
                                           key=lambda x: float(x[1]['timestamp'])))
+        print(f"Saving {len(sorted_observations)} {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
+        print("This may take a while...")
+        print("-----------------------------------------------------\n")
         if save_to_file.endswith('.nc'):
             first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
-            convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
+            convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
         elif save_to_file.endswith('.json'):
             with open(save_to_file, 'w', encoding='utf-8') as f:
                 json.dump(sorted_observations, f, indent=4)
@@ -321,6 +308,15 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
     # Save data to multiple file
     elif output_format:
+        # Create output directory if specified
+        if output_dir:
+            os.makedirs(output_dir, exist_ok=True)
+            print(f"Files will be saved to {output_dir}")
+        else:
+            print(f"Files will be saved to {os.getcwd()}")
+        print(f"Processing {fetced_so_far} {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
+        print("This may take a while...")
+        print("-----------------------------------------------------\n")
         # Track statistics per mission
         mission_stats = {}  # {mission_name: {'files': 0, 'observations': 0}}
         total_observations_written = 0
@@ -329,48 +325,39 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
         for (bucket_center, mission_name), observations in buckets.items():
             if observations:
                 # Format hour to be the actual bucket center
-                bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
+                bucket_hour = int((bucket_center.hour + bucket_hours / 2) % 24)
-                if output_format == 'netcdf':
-                    convert_to_netcdf(observations, bucket_center.timestamp())
+                # Generate file name based on output format
+                file_name_format = {
+                    'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
+                    'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
+                    'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
+                }
+                file_name = file_name_format[output_format] % (
+                    bucket_center.year, bucket_center.month, bucket_center.day,
+                    bucket_hour, bucket_hours)
-                if output_format == 'csv':
-                    output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
-                                   (bucket_center.year, bucket_center.month, bucket_center.day,
-                                    bucket_hour, bucket_hours))
+                output_file = os.path.join(output_dir or '.', file_name)
-                    os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
+                # Sort observations by timestamp within each bucket
+                sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
-                    # Sort observations by timestamp within each bucket
-                    sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
+                if output_format == 'netcdf':
+                    convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
+                elif output_format == 'csv':
                     with open(output_file, mode='w', newline='') as file:
                         writer = csv.DictWriter(file, fieldnames=headers)
                         writer.writeheader()
                         writer.writerows(sorted_obs)
                 elif output_format == 'json':
-                    output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
-                                   (bucket_center.year, bucket_center.month, bucket_center.day,
-                                    bucket_hour, bucket_hours))
-                    os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
-                    # Sort observations by timestamp within each bucket
-                    sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
+                    sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
                     with open(output_file, 'w', encoding='utf-8') as file:
-                        json.dump(sorted_obs, file, indent=4)
+                        json.dump(sorted_obs_dict, file, indent=4)
                 elif output_format == 'little_r':
-                    output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
-                                   (bucket_center.year, bucket_center.month, bucket_center.day,
-                                    bucket_hour, bucket_hours))
-                    os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
-                    sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
                     little_r_records = format_little_r(sorted_obs)
                     with open(output_file, 'w') as file:
                         file.write('\n'.join(little_r_records))
@@ -382,7 +369,7 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
                 mission_stats[mission_name]['files'] += 1
                 mission_stats[mission_name]['observations'] += len(observations)
         # Print total observations written
-        print(f"Total {'observation' if total_observations_written == 1 else 'observations'} written: {total_observations_written}")
+        print(f"Saved {total_observations_written} {'observation.' if total_observations_written == 1 else 'observations.'}")
         print("-----------------------------------------------------")
         # Print summary for each mission
@@ -392,7 +379,286 @@ def poll_observations(start_time, end_time=None, include_ids=None, include_updat
     print("-----------------------------------------------------")
     print("All observations have been processed and saved.")
-def poll_super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, callback=None):
+def poll_observations(start_time, include_ids=None, include_updated_at=None, mission_id=None, min_latitude=None, max_latitude=None, min_longitude=None, max_longitude=None, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
+    """
+    Continuously polls for observations and saves to files in specified format.
+    Will run indefinitely until interrupted.
+    Args:
+        start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
+        include_ids (bool): Include observation IDs in response.
+        include_updated_at (bool): Include update timestamps in response.
+        mission_id (str): Filter observations by mission ID.
+        min_latitude (float): Minimum latitude boundary.
+        max_latitude (float): Maximum latitude boundary.
+        min_longitude (float): Minimum longitude boundary.
+        max_longitude (float): Maximum longitude boundary.
+        interval (int): Polling interval in seconds when no data is received (default: 60)
+        bucket_hours (float): Size of time buckets in hours (default: 6.0)
+        output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
+        output_dir (str): Directory for bucket files (default: current directory)
+        callback (callable): Optional callback for data processing
+    """
+    # Print warning about infinite loop
+    print(" ___________________________________________________________________")
+    print("|                          WARNING  \U000026A0\U0000FE0F                               |")
+    print("|                 You are entering an endless loop.                 |")
+    print("|                                                                   |")
+    print("|                 Press Ctrl + C anytime to exit.                   |")
+    print("|___________________________________________________________________|\n\n")
+    time.sleep(4)
+    start_time = to_unix_timestamp(start_time)
+    if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
+        print("Please use one of the following formats:")
+        print("  - json\n  - csv\n  - little_r\n  - netcdf")
+        return
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"\U0001F4C1 Files will be saved to {output_dir}")
+    else:
+        print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
+    # Convert start_time to datetime
+    start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
+    # Calculate first center time that's after start_time
+    hours_since_day_start = start_dt.hour + start_dt.minute / 60
+    bucket_number = hours_since_day_start // bucket_hours
+    first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
+    headers = [
+        "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
+        "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
+    ]
+    buckets = {}  # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
+    current_timestamp = start_time
+    fetched_so_far = 0
+    mission_stats = {}
+    print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"Polling interval: {interval} seconds")
+    print("-----------------------------------------------------")
+    try:
+        while True:
+            observations_page = get_observations_page(
+                since=current_timestamp,
+                min_latitude=min_latitude,
+                max_latitude=max_latitude,
+                min_longitude=min_longitude,
+                max_longitude=max_longitude,
+                include_updated_at=include_updated_at,
+                mission_id=mission_id,
+                include_ids=include_ids,
+                include_mission_name=True
+            )
+            if observations_page is None:
+                print(f"\nNull response from API. Retrying in {interval} seconds ...")
+                time.sleep(interval)
+                continue
+            observations = observations_page.get('observations', [])
+            # Invoke the callback with fetched super observations
+            if callback:
+                print("--------\nCallback\n--------")
+                callback(observations)
+            if observations:
+                fetched_so_far += len(observations)
+                print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
+                print(f"Fetched {fetched_so_far} observations")
+                print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+                print("-----------------------------------------------------")
+                for obs in observations:
+                    if 'mission_name' not in obs:
+                        continue
+                    timestamp = obs.get('timestamp')
+                    if not timestamp:
+                        continue
+                    try:
+                        obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+                    except (OSError, ValueError, TypeError, OverflowError):
+                        continue
+                    mission_name = obs.get('mission_name', 'Unknown')
+                    obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
+                    processed_obs = {
+                        header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
+                        for header in headers
+                    }
+                    obs_id = f"{timestamp}_{mission_name}"
+                    if obs_time >= start_dt:
+                        hours_diff = (obs_time - first_center).total_seconds() / 3600
+                        bucket_index = floor(hours_diff / bucket_hours)
+                        bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
+                        bucket_end = bucket_center + timedelta(hours=bucket_hours)
+                        if obs_time <= bucket_end:
+                            bucket_key = (bucket_center, mission_name)
+                            # Initialize bucket if needed
+                            if bucket_key not in buckets:
+                                buckets[bucket_key] = {
+                                    'data': {},
+                                    'last_write': 0,
+                                    'data_hash': ''
+                                }
+                            # Update bucket data
+                            buckets[bucket_key]['data'][obs_id] = processed_obs
+                            # Track statistics
+                            if mission_name not in mission_stats:
+                                mission_stats[mission_name] = {'files': set(), 'observations': 0}
+                            mission_stats[mission_name]['observations'] += 1
+                            # Calculate new data hash
+                            sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
+                            data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
+                            # Check if we should write the bucket
+                            current_time = datetime.now(timezone.utc)
+                            time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
+                            data_changed = data_hash != buckets[bucket_key]['data_hash']
+                            # Write if it's been more than interval seconds since last write OR if data has changed
+                            if (time_since_last_write >= interval or data_changed) and output_format:
+                                bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
+                                file_name_format = {
+                                    'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
+                                    'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
+                                    'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
+                                }
+                                file_name = file_name_format[output_format] % (
+                                    bucket_center.year, bucket_center.month, bucket_center.day,
+                                    bucket_hour, bucket_hours)
+                                output_file = os.path.join(output_dir or '.', file_name)
+                                sorted_obs = [obs for _, obs in sorted_data]
+                                # Write the file based on format
+                                try:
+                                    if output_format == 'netcdf':
+                                        convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
+                                    elif output_format == 'csv':
+                                        with open(output_file, mode='w', newline='') as file:
+                                            writer = csv.DictWriter(file, fieldnames=headers)
+                                            writer.writeheader()
+                                            writer.writerows(sorted_obs)
+                                    elif output_format == 'json':
+                                        sorted_obs_dict = {k: v for k, v in sorted_data}
+                                        with open(output_file, 'w', encoding='utf-8') as file:
+                                            json.dump(sorted_obs_dict, file, indent=4)
+                                    elif output_format == 'little_r':
+                                        little_r_records = format_little_r(sorted_obs)
+                                        with open(output_file, 'w') as file:
+                                            file.write('\n'.join(little_r_records))
+                                    buckets[bucket_key]['last_write'] = current_time.timestamp()
+                                    buckets[bucket_key]['data_hash'] = data_hash
+                                    mission_stats[mission_name]['files'].add(output_file)
+                                except Exception as e:
+                                    print(f"Error writing bucket file {file_name}: {str(e)}")
+                # Clean up old buckets
+                current_time = datetime.now(timezone.utc)
+                buckets = {
+                    k: v for k, v in buckets.items()
+                    if current_time - k[0] <= timedelta(hours=bucket_hours * 2)  # Keep slightly longer for potential updates
+                }
+            next_timestamp = observations_page.get('next_since')
+            has_next_page = observations_page.get('has_next_page', False)
+            if next_timestamp and next_timestamp > current_timestamp:
+                current_timestamp = next_timestamp
+            elif not has_next_page:
+                print("-----------------------------------------------------")
+                print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+                continue
+            if not observations:
+                print(f"\U0001F503 No new super observations data available.\n Retrying getting new observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+    except KeyboardInterrupt:
+        print("\n\n\U0001F6D1 Received interrupt, stopping...")
+        print("-----------------------------------------------------")
+        for mission_name, stats in mission_stats.items():
+            print(f"Mission {mission_name}: {stats['observations']} observations across {len(stats['files'])} files")
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        exit(1001)
+    finally:
+        print("-----------------------------------------------------")
+        print("Finished processing observations.")
+# Super Observations
+# ------------
+def get_super_observations_page(since=None, min_time=None, max_time=None, include_ids=None, include_mission_name=None, include_updated_at=None, mission_id=None, save_to_file=None):
+    """
+    Retrieves super observations page based on specified filters.
+    Args:
+        since (str): Filter observations after this timestamp.
+        min_time (str): Minimum timestamp for observations.
+        max_time (str): Maximum timestamp for observations.
+        include_ids (bool): Include observation IDs in response.
+        include_mission_name (bool): Include mission names in response.
+        include_updated_at (bool): Include update timestamps in response.
+        mission_id (str): Filter observations by mission ID.
+        save_to_file (str): Optional path to save the response data.
+                           If provided, saves the data in CSV format.
+    Returns:
+        dict: The API response containing filtered super observations.
+    """
+    url = f"{DATA_API_BASE_URL}/super_observations.json"
+    params = {}
+    if since:
+        params["since"] = to_unix_timestamp(since)
+    if min_time:
+        params["min_time"] = to_unix_timestamp(min_time)
+    if max_time:
+        params["max_time"] = to_unix_timestamp(max_time)
+    if mission_id:
+        params["mission_id"] = mission_id
+    if include_ids:
+        params["include_ids"] = True
+    if include_mission_name:
+        params["include_mission_name"] = True
+    if include_updated_at:
+        params["include_updated_at"] = True
+    params = {k: v for k, v in params.items() if v is not None}
+    response = make_api_request(url, params=params)
+    if save_to_file:
+        save_csv_json(save_to_file, response, csv_data_key='observations')
+    return response
+def super_observations(start_time, end_time=None, interval=60, save_to_file=None, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
     """
     Fetches super observations between a start time and an optional end time and saves to files in specified format.
     Files are broken up into time buckets, with filenames containing the time at the mid-point of the bucket.
@@ -408,6 +674,7 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
                             Supported formats are '.csv', '.json', '.little_r' and '.nc'
         bucket_hours (int): Optional. Size of time buckets in hours. Defaults to 6 hours.
         output_format (str): Optional. Format to save data in separate files. Supported formats are 'json, 'csv', 'little_r' and 'netcdf'.
+        output_dir (str): Optional. Directory path where the separate files should be saved. If not provided, files will be saved in current directory.
         callback (callable): Optional callback function that receives (super observations, metadata) before saving.
                              This allows custom processing or saving in custom formats.
     """
@@ -469,12 +736,15 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
     # Initialize the polling loop
     current_timestamp = start_time
     has_next_page = True
+    fetced_so_far = 0
+    print(f"Starting polling super observations\nfrom {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} to {datetime.fromtimestamp(end_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+    print("-----------------------------------------------------")
     while has_next_page:
         try:
             # Fetch observations
-            observations_page = get_super_observations(
+            observations_page = get_super_observations_page(
                 since=current_timestamp,
                 min_time=start_time,
                 max_time=end_time,
@@ -490,13 +760,15 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
                 continue
             observations = observations_page.get('observations', [])
-            print(f"Fetched {len(observations)} super observation(s)")
+            fetced_so_far = fetced_so_far + len(observations)
+            print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
+            print(f"Fetched {fetced_so_far} super observations")
+            print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+            print("-----------------------------------------------------")
-            # Invoke the callback with fetched observations
+            # Invoke the callback with fetched super observations
             if callback:
-                print("--------")
-                print("Callback")
-                print("--------")
+                print("--------\nCallback\n--------")
                 callback(observations)
             for obs in observations:
@@ -547,27 +819,41 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
             if not has_next_page or not next_timestamp or next_timestamp <= current_timestamp:
                 print("-----------------------------------------------------\n")
-                print("No more pages available or reached end of time range.")
+                print("Fetching complete.")
                 print("\n-----------------------------------------------------")
                 break
             current_timestamp = next_timestamp
+        except KeyboardInterrupt:
+            print("\n\n\U0001F6D1 Received interrupt, stopping...")
+            print("-----------------------------------------------------")
+            print("Requested data was not saved!\nRun again and do not interrupt the run to save data.")
+            print("-----------------------------------------------------")
+            exit(3)
         except Exception as e:
             print(f"Error occurred: {e}")
             exit(1001)
     # Save data to a single file
     if save_to_file:
+        # Create directory path if it doesn't exist
+        directory = os.path.dirname(save_to_file)
+        if directory and not os.path.isdir(directory):
+            os.makedirs(directory, exist_ok=True)
         filtered_observations = {obs_id: obs for obs_id, obs in all_observations.items()
                                  if float(obs['timestamp']) >= start_time}
         # Sort by timestamp
         sorted_observations = dict(sorted(filtered_observations.items(),
                                           key=lambda x: float(x[1]['timestamp'])))
+        print(f"Saving {len(sorted_observations)} super {'observation' if len(sorted_observations) == 1 else 'observations'} to {save_to_file}")
+        print("This may take a while...")
+        print("-----------------------------------------------------\n")
         if save_to_file.endswith('.nc'):
             first_obs_timestamp = float(next(iter(sorted_observations.values()))['timestamp'])
-            convert_to_netcdf(sorted_observations, first_obs_timestamp, output_filename=save_to_file)
+            convert_to_netcdf(sorted_observations, first_obs_timestamp, save_to_file)
         elif save_to_file.endswith('.json'):
             with open(save_to_file, 'w', encoding='utf-8') as f:
@@ -588,6 +874,16 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
     # Save data to multiple file
     elif output_format:
+        # Create output directory if specified
+        if output_dir:
+            os.makedirs(output_dir, exist_ok=True)
+            print(f"Files will be saved to {output_dir}")
+        else:
+            print(f"Files will be saved to {os.getcwd()}")
+        print(f"Processing {fetced_so_far} super {'observation' if fetced_so_far == 1 else 'observations'} and save them over multiple files.")
+        print("This may take a while...")
+        print("-----------------------------------------------------\n")
         # Track statistics per mission
         mission_stats = {}  # {mission_name: {'files': 0, 'observations': 0}}
         total_observations_written = 0
@@ -598,46 +894,37 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
                 # Format hour to be the actual bucket center
                 bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
-                if output_format == 'netcdf':
-                    convert_to_netcdf(observations, bucket_center.timestamp())
+                # Generate file name based on output format
+                file_name_format = {
+                    'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
+                    'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
+                    'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
+                }
+                file_name = file_name_format[output_format] % (
+                    bucket_center.year, bucket_center.month, bucket_center.day,
+                    bucket_hour, bucket_hours)
-                if output_format == 'csv':
-                    output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv" %
-                                   (bucket_center.year, bucket_center.month, bucket_center.day,
-                                    bucket_hour, bucket_hours))
+                output_file = os.path.join(output_dir or '.', file_name)
-                    os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
+                # Sort observations by timestamp within each bucket
+                sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
-                    # Sort observations by timestamp within each bucket
-                    sorted_obs = sorted(observations.values(), key=lambda x: int(x['timestamp']))
+                if output_format == 'netcdf':
+                    convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
+                elif output_format == 'csv':
                     with open(output_file, mode='w', newline='') as file:
                         writer = csv.DictWriter(file, fieldnames=headers)
                         writer.writeheader()
                         writer.writerows(sorted_obs)
                 elif output_format == 'json':
-                    output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json" %
-                                   (bucket_center.year, bucket_center.month, bucket_center.day,
-                                    bucket_hour, bucket_hours))
-                    os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
-                    # Sort observations by timestamp within each bucket
-                    sorted_obs = dict(sorted(observations.items(), key=lambda x: int(x[1]['timestamp'])))
+                    sorted_obs_dict = {k: v for k, v in sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))}
                     with open(output_file, 'w', encoding='utf-8') as file:
-                        json.dump(sorted_obs, file, indent=4)
+                        json.dump(sorted_obs_dict, file, indent=4)
                 elif output_format == 'little_r':
-                    output_file = (f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r" %
-                                   (bucket_center.year, bucket_center.month, bucket_center.day,
-                                    bucket_hour, bucket_hours))
-                    os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True)
-                    sorted_obs = sorted(observations.items(), key=lambda x: int(x[1]['timestamp']))
                     little_r_records = format_little_r(sorted_obs)
                     with open(output_file, 'w') as file:
                         file.write('\n'.join(little_r_records))
@@ -659,6 +946,229 @@ def poll_super_observations(start_time, end_time=None, interval=60, save_to_file
     print("-----------------------------------------------------")
     print("All super observations have been processed and saved.")
+def poll_super_observations(start_time, interval=60, bucket_hours=6.0, output_format=None, output_dir=None, callback=None):
+    """
+    Continuously polls for super observations and saves to files in specified format.
+    Will run indefinitely until interrupted.
+    Args:
+        start_time (str): Starting time in YYYY-MM-DD HH:MM:SS, YYYY-MM-DD_HH:MM or ISO format
+        interval (int): Polling interval in seconds when no data is received (default: 60)
+        bucket_hours (float): Size of time buckets in hours (default: 6.0)
+        output_format (str): Format for bucket files ('json', 'csv', 'little_r', 'netcdf')
+        output_dir (str): Directory for bucket files (default: current directory)
+        callback (callable): Optional callback for data processing
+    """
+    # Print warning about infinite loop
+    print(" ___________________________________________________________________")
+    print("|                          WARNING  \U000026A0\U0000FE0F                               |")
+    print("|                 You are entering an endless loop.                 |")
+    print("|                                                                   |")
+    print("|                 Press Ctrl + C anytime to exit.                   |")
+    print("|___________________________________________________________________|\n\n")
+    time.sleep(4)
+    start_time = to_unix_timestamp(start_time)
+    if output_format and output_format not in ['json', 'csv', 'little_r', 'netcdf']:
+        print("Please use one of the following formats:")
+        print("  - json\n  - csv\n  - little_r\n  - netcdf")
+        return
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"\U0001F4C1 Files will be saved to {output_dir}")
+    else:
+        print(f"\U0001F4C1 Files will be saved to {os.getcwd()}")
+    # Convert start_time to datetime
+    start_dt = datetime.fromtimestamp(start_time, tz=timezone.utc)
+    # Calculate first center time that's after start_time
+    hours_since_day_start = start_dt.hour + start_dt.minute / 60
+    bucket_number = hours_since_day_start // bucket_hours
+    first_center = start_dt.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(hours=(bucket_number + 1) * bucket_hours)
+    headers = [
+        "timestamp", "id", "time", "latitude", "longitude", "altitude", "humidity",
+        "mission_name", "pressure", "specific_humidity", "speed_u", "speed_v", "temperature"
+    ]
+    buckets = {}  # {(bucket_center, mission_name): {'data': {}, 'last_write': timestamp, 'data_hash': str}}
+    current_timestamp = start_time
+    fetched_so_far = 0
+    mission_stats = {}
+    print(f"Starting continuous polling from {datetime.fromtimestamp(start_time, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
+    print(f"Polling interval: {interval} seconds")
+    print("-----------------------------------------------------")
+    try:
+        while True:
+            observations_page = get_super_observations_page(
+                since=current_timestamp,
+                min_time=start_time,
+                include_ids=True,
+                include_mission_name=True
+            )
+            if observations_page is None:
+                print(f"\nNull response from API. Retrying in {interval} seconds ...")
+                time.sleep(interval)
+                continue
+            observations = observations_page.get('observations', [])
+            # Invoke the callback with fetched super observations
+            if callback:
+                print("--------\nCallback\n--------")
+                callback(observations)
+            if observations:
+                fetched_so_far += len(observations)
+                print_current_timestamp = current_timestamp if current_timestamp < 1e11 else current_timestamp / 1e9
+                print(f"Fetched {fetched_so_far} super observations")
+                print(f"Current time: {datetime.fromtimestamp(print_current_timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}")
+                print("-----------------------------------------------------")
+                for obs in observations:
+                    if 'mission_name' not in obs:
+                        continue
+                    timestamp = obs.get('timestamp')
+                    if not timestamp:
+                        continue
+                    try:
+                        obs_time = datetime.fromtimestamp(timestamp, tz=timezone.utc)
+                    except (OSError, ValueError, TypeError, OverflowError):
+                        continue
+                    mission_name = obs.get('mission_name', 'Unknown')
+                    obs['time'] = obs_time.replace(tzinfo=timezone.utc).isoformat()
+                    processed_obs = {
+                        header: obs.get(header) if obs.get(header) not in [None, '', ' '] else 'None'
+                        for header in headers
+                    }
+                    obs_id = f"{timestamp}_{mission_name}"
+                    if obs_time >= start_dt:
+                        hours_diff = (obs_time - first_center).total_seconds() / 3600
+                        bucket_index = floor(hours_diff / bucket_hours)
+                        bucket_center = first_center + timedelta(hours=bucket_index * bucket_hours)
+                        bucket_end = bucket_center + timedelta(hours=bucket_hours)
+                        if obs_time <= bucket_end:
+                            bucket_key = (bucket_center, mission_name)
+                            # Initialize bucket if needed
+                            if bucket_key not in buckets:
+                                buckets[bucket_key] = {
+                                    'data': {},
+                                    'last_write': 0,
+                                    'data_hash': ''
+                                }
+                            # Update bucket data
+                            buckets[bucket_key]['data'][obs_id] = processed_obs
+                            # Track statistics
+                            if mission_name not in mission_stats:
+                                mission_stats[mission_name] = {'files': set(), 'observations': 0}
+                            mission_stats[mission_name]['observations'] += 1
+                            # Calculate new data hash
+                            sorted_data = sorted(buckets[bucket_key]['data'].items(), key=lambda x: int(x[1]['timestamp']))
+                            data_hash = hashlib.md5(str(sorted_data).encode()).hexdigest()
+                            # Check if we should write the bucket
+                            current_time = datetime.now(timezone.utc)
+                            time_since_last_write = current_time.timestamp() - buckets[bucket_key]['last_write']
+                            data_changed = data_hash != buckets[bucket_key]['data_hash']
+                            # Write if it's been more than interval seconds since last write OR if data has changed
+                            if (time_since_last_write >= interval or data_changed) and output_format:
+                                bucket_hour = int((bucket_center.hour + bucket_hours/2) % 24)
+                                file_name_format = {
+                                    'csv': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.csv",
+                                    'json': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.json",
+                                    'netcdf': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d_%dh.nc",
+                                    'little_r': f"WindBorne_{mission_name}_%04d-%02d-%02d_%02d-00_%dh.little_r"
+                                }
+                                file_name = file_name_format[output_format] % (
+                                    bucket_center.year, bucket_center.month, bucket_center.day,
+                                    bucket_hour, bucket_hours)
+                                output_file = os.path.join(output_dir or '.', file_name)
+                                sorted_obs = [obs for _, obs in sorted_data]
+                                # Write the file based on format
+                                try:
+                                    if output_format == 'netcdf':
+                                        convert_to_netcdf(sorted_obs, bucket_center.timestamp(), output_file)
+                                    elif output_format == 'csv':
+                                        with open(output_file, mode='w', newline='') as file:
+                                            writer = csv.DictWriter(file, fieldnames=headers)
+                                            writer.writeheader()
+                                            writer.writerows(sorted_obs)
+                                    elif output_format == 'json':
+                                        sorted_obs_dict = {k: v for k, v in sorted_data}
+                                        with open(output_file, 'w', encoding='utf-8') as file:
+                                            json.dump(sorted_obs_dict, file, indent=4)
+                                    elif output_format == 'little_r':
+                                        little_r_records = format_little_r(sorted_obs)
+                                        with open(output_file, 'w') as file:
+                                            file.write('\n'.join(little_r_records))
+                                    buckets[bucket_key]['last_write'] = current_time.timestamp()
+                                    buckets[bucket_key]['data_hash'] = data_hash
+                                    mission_stats[mission_name]['files'].add(output_file)
+                                except Exception as e:
+                                    print(f"Error writing bucket file {file_name}: {str(e)}")
+                # Clean up old buckets
+                current_time = datetime.now(timezone.utc)
+                buckets = {
+                    k: v for k, v in buckets.items()
+                    if current_time - k[0] <= timedelta(hours=bucket_hours * 2)  # Keep slightly longer for potential updates
+                }
+            next_timestamp = observations_page.get('next_since')
+            has_next_page = observations_page.get('has_next_page', False)
+            if next_timestamp and next_timestamp > current_timestamp:
+                current_timestamp = next_timestamp
+            elif not has_next_page:
+                print("-----------------------------------------------------")
+                print(f"\U0001F503 Latest super observations data have been processed.\nRetrying getting new super observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+                continue
+            if not observations:
+                print(f"\U0001F503 No new super observations data available.\n Retrying getting new super observations data in {interval} seconds...")
+                print("-----------------------------------------------------")
+                time.sleep(interval)
+    except KeyboardInterrupt:
+        print("\n\U0001F6D1 Received interrupt, stopping...")
+        print("-----------------------------------------------------")
+        for mission_name, stats in mission_stats.items():
+            print(f"Mission {mission_name}: {stats['observations']} super observations across {len(stats['files'])} files")
+    except Exception as e:
+        print(f"Error occurred: {str(e)}")
+        exit(1001)
+    finally:
+        print("-----------------------------------------------------")
+        print("Finished processing super observations.")
+# ------------
+# METADATA
+# ------------
 def get_flying_missions(cli=None, save_to_file=None):
     """
     Retrieves a list of currently flying missions.

windborne 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

windborne 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl