PyPI - terrakio-core - Versions diffs - 0.4.96__tar.gz → 0.4.98__tar.gz - Mend

terrakio-core 0.4.96tar.gz → 0.4.98tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{terrakio_core-0.4.96 → terrakio_core-0.4.98}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: terrakio-core
-Version: 0.4.96
+Version: 0.4.98
 Summary: Core package for the terrakio-python-api
 Requires-Python: >=3.11
 Requires-Dist: aiofiles>=24.1.0

{terrakio_core-0.4.96 → terrakio_core-0.4.98}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "terrakio-core"
-version = "0.4.96"
+version = "0.4.98"
 description = "Core package for the terrakio-python-api"
 readme = "README.md"
 requires-python = ">=3.11"

{terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ Terrakio Core
 Core components for Terrakio API clients.
 """
-__version__ = "0.4.96"
+__version__ = "0.4.98"
 from .async_client import AsyncClient
 from .sync_client import SyncClient as Client

{terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/zonal_stats.py RENAMED Viewed

@@ -9,18 +9,18 @@ from typing import Optional
 # Third-party library imports
 import aiohttp
 import geopandas as gpd
-import nest_asyncio
 import pandas as pd
 import pyproj
 import xarray as xr
 from geopandas import GeoDataFrame
 from shapely.geometry import box, mapping, shape
 from shapely.ops import transform
+import threading
+from concurrent.futures import ThreadPoolExecutor
 # Local imports
 from .geoquries import request_geoquery_list
-nest_asyncio.apply()
 class cloud_object(gpd.GeoDataFrame):
     """
     This class is a class used for cloud
@@ -36,12 +36,51 @@ class cloud_object(gpd.GeoDataFrame):
         self.client = client
         self.job_name = job_name
+    def __repr__(self):
+        return (
+            f"<CloudZonalStats job_id='{self.job_id}', job_name='{self.job_name}'>\n"
+            f"Call .head(n) to fetch a preview GeoDataFrame when the job completes."
+        )
+    def _repr_html_(self):
+        # Jupyter HTML-friendly representation to avoid auto-rendering an empty DataFrame
+        return (
+            f"<div style='font-family:system-ui,Segoe UI,Helvetica,Arial,sans-serif'>"
+            f"<strong>Cloud Zonal Stats</strong><br/>"
+            f"job_id: <code>{self.job_id}</code><br/>"
+            f"job_name: <code>{self.job_name}</code><br/>"
+            f"<em>Use <code>.head(n)</code> to retrieve a preview once the job is completed.</em>"
+            f"</div>"
+        )
     def head(self, n = 5):
         """
         Returns the first n files stored in the cloud bucket.
         """
-        return asyncio.run(self._head_async(n))
+        # Detect if we're inside an existing event loop (e.g., Jupyter)
+        in_running_loop = False
+        try:
+            asyncio.get_running_loop()
+            in_running_loop = True
+        except RuntimeError:
+            in_running_loop = False
+        if in_running_loop:
+            # Run the async function in a separate thread with its own loop
+            def run_async_in_thread():
+                new_loop = asyncio.new_event_loop()
+                try:
+                    return new_loop.run_until_complete(self._head_async(n))
+                finally:
+                    new_loop.close()
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                future = executor.submit(run_async_in_thread)
+                return future.result()
+        else:
+            # No running loop - safe to use asyncio.run
+            return asyncio.run(self._head_async(n))
     async def _head_async(self, n = 5):
         """
         Returns the first n files stored in the cloud bucket.
@@ -120,12 +159,54 @@ class cloud_object(gpd.GeoDataFrame):
                             })
                             self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
+                            # Derive id values from json metadata (prefer 'file', fallback to 'group')
+                            id_values = []
+                            for i in range(min_length):
+                                entry = json_data[i] if i < len(json_data) else {}
+                                id_candidate = entry.get('file') or entry.get('group') or ''
+                                if isinstance(id_candidate, str) and id_candidate.startswith('file_'):
+                                    id_val = id_candidate[len('file_'):]
+                                elif isinstance(id_candidate, str) and id_candidate.startswith('group_'):
+                                    id_val = id_candidate[len('group_'):]
+                                else:
+                                    id_val = str(id_candidate) if id_candidate else str(i)
+                                id_values.append(id_val)
+                            # Geometry to id mapping using WKB to avoid precision issues
+                            geom_to_id = {geometries[i].wkb: id_values[i] for i in range(min_length)}
                             try:
                                 expanded_gdf = expand_on_variables_and_time(gdf)
+                                # Attach id as first index level, geometry second, time third if present
+                                if hasattr(expanded_gdf.index, 'names') and 'geometry' in expanded_gdf.index.names:
+                                    if isinstance(expanded_gdf.index, pd.MultiIndex):
+                                        geometry_index = expanded_gdf.index.get_level_values('geometry')
+                                    else:
+                                        geometry_index = expanded_gdf.index
+                                    id_col = [geom_to_id.get(geom.wkb) for geom in geometry_index]
+                                    expanded_gdf['id'] = id_col
+                                    expanded_gdf = expanded_gdf.reset_index()
+                                    if 'time' in expanded_gdf.columns:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
+                                    else:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
+                                else:
+                                    # geometry exists as a column
+                                    id_col = [geom_to_id.get(geom.wkb) for geom in expanded_gdf['geometry']]
+                                    expanded_gdf['id'] = id_col
+                                    if 'time' in expanded_gdf.columns:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
+                                    else:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
                                 return expanded_gdf
                             except NameError:
                                 self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
-                                return gdf
+                                # Set id on raw gdf and index appropriately
+                                gdf['id'] = id_values
+                                return gdf.set_index(['id', 'geometry'])
                         else:
                             self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
@@ -513,6 +594,7 @@ async def zonal_stats(
         job_name = await client.mass_stats.track_job([mass_stats_id])
         job_name = job_name[mass_stats_id]["name"]
         cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
         return cloud_files_object
     quries = []
@@ -539,5 +621,33 @@ async def zonal_stats(
             "is_cloud_backed": False,
         }
         gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
+        # If an id_column is provided, attach it to the result and include in the index
+        if id_column is not None and id_column in gdf.columns:
+            # Build a mapping from input geometries to id values (use WKB for robust equality)
+            geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
+            # Determine geometry values in the result (index may be geometry or (geometry, time))
+            if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
+                if isinstance(gdf_with_datasets.index, pd.MultiIndex):
+                    geometry_index = gdf_with_datasets.index.get_level_values('geometry')
+                else:
+                    geometry_index = gdf_with_datasets.index
+                id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
+                gdf_with_datasets[id_column] = id_values
+                # Reset index to control index composition precisely, then set to desired levels
+                gdf_with_datasets = gdf_with_datasets.reset_index()
+                if 'time' in gdf_with_datasets.columns:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
+                else:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
+            else:
+                # geometry exists as a column
+                id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
+                gdf_with_datasets[id_column] = id_values
+                if 'time' in gdf_with_datasets.columns:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
+                else:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
     return gdf_with_datasets

{terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/dataset_management.py RENAMED Viewed

@@ -69,7 +69,7 @@ class DatasetManagement:
             name: Name of the dataset (required)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -142,7 +142,7 @@ class DatasetManagement:
             append: Whether to append data or replace (default: True)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -162,6 +162,10 @@ class DatasetManagement:
         Raises:
             APIError: If the API request fails
         """
+        # Sort dates_iso8601 chronologically if provided
+        if dates_iso8601 is not None:
+            dates_iso8601 = sorted(dates_iso8601)
         params = {"collection": collection, "append": str(append).lower()}
         payload = {"name": name}
         param_mapping = {
@@ -215,7 +219,7 @@ class DatasetManagement:
             append: Whether to append data or replace (default: True)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -236,6 +240,10 @@ class DatasetManagement:
         Raises:
             APIError: If the API request fails
         """
+        # Sort dates_iso8601 chronologically if provided
+        if dates_iso8601 is not None:
+            dates_iso8601 = sorted(dates_iso8601)
         params = {"collection": collection, "append": str(append).lower()}
         payload = {"name": name}
         param_mapping = {
@@ -289,7 +297,7 @@ class DatasetManagement:
             name: Name of the dataset (required)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type