PyPI - terrakio-core - Versions diffs - 0.4.95__tar.gz → 0.4.97__tar.gz - Mend

terrakio-core 0.4.95tar.gz → 0.4.97tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of terrakio-core might be problematic. Click here for more details.

Files changed (24) hide show

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: terrakio-core
-Version: 0.4.95
+Version: 0.4.97
 Summary: Core package for the terrakio-python-api
 Requires-Python: >=3.11
 Requires-Dist: aiofiles>=24.1.0

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "terrakio-core"
-version = "0.4.95"
+version = "0.4.97"
 description = "Core package for the terrakio-python-api"
 readme = "README.md"
 requires-python = ">=3.11"

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ Terrakio Core
 Core components for Terrakio API clients.
 """
-__version__ = "0.4.95"
+__version__ = "0.4.97"
 from .async_client import AsyncClient
 from .sync_client import SyncClient as Client

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/accessors.py RENAMED Viewed

@@ -673,9 +673,61 @@ class GeoXarrayAccessor:
             result_gdf[col] = new_data
         return result_gdf
     def _apply_cloud_reduction(self, reduction_func: str, dim: Optional[Union[str, List[str]]] = None,
-                               columns: Optional[List[str]] = None, **kwargs):
+                            columns: Optional[List[str]] = None, **kwargs):
+        if hasattr(self._obj, 'job_id') and self._obj.job_id and self._client:
+            import asyncio
+            import concurrent.futures
+            def check_job_status():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                try:
+                    return loop.run_until_complete(
+                        self._client.mass_stats.track_job([self._obj.job_id])
+                    )
+                finally:
+                    loop.close()
+            try:
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(check_job_status)
+                    track_info = future.result(timeout=10)  # Short timeout for status check
+                    job_info = track_info[self._obj.job_id]
+                    status = job_info['status']
+                    if status in ["Failed", "Cancelled", "Error"]:
+                        raise RuntimeError(f"The zonal stats job (job_id: {self._obj.job_id}) has failed, cancelled, or errored. Please check the job status!")
+                    elif status != "Completed":
+                        # Job is still running - include progress information
+                        completed = job_info.get('completed', 0)
+                        total = job_info.get('total', 1)
+                        progress = completed / total if total > 0 else 0
+                        percentage = progress * 100
+                        # Create progress bar
+                        bar_length = 30  # Shorter bar for error message
+                        filled_length = int(bar_length * progress)
+                        bar = '█' * filled_length + '░' * (bar_length - filled_length)
+                        raise RuntimeError(
+                            f"The zonal stats job (job_id: {self._obj.job_id}) is still running. "
+                            f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
+                            f"Please come back at a later time!"
+                        )
+            except concurrent.futures.TimeoutError:
+                self._client.logger.warning("Timeout checking job status, proceeding with reduction")
+            except Exception as e:
+                if "still running" in str(e) or "failed" in str(e).lower():
+                    raise  # Re-raise our custom errors
+                else:
+                    self._client.logger.warning(f"Could not check job status: {e}, proceeding with reduction")
         current_time = time.time()
         chain_reset_threshold = 0.01
@@ -835,20 +887,25 @@ class GeoXarrayAccessor:
             "from io import BytesIO",
             "import tempfile",
             "import os",
+            "import traceback",
             "",
             "def consume(filename, file_bytes, metadata):",
         ]
         script_lines.extend([
+            "    tmp_file = None",
+            "    nc_tmp_file = None",
+            "    ds = None",
             "    ",
             "    try:",
             "        with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file:",
             "            tmp_file.write(file_bytes)",
             "            tmp_file.flush()",
-            "            ds = xr.open_dataset(tmp_file.name, engine='scipy')",
+            "            ds = xr.open_dataset(tmp_file.name, engine='h5netcdf')",
             "        ",
         ])
+        # Add operations without excessive debugging
         for i, op in enumerate(self._pending_operations):
             op_type = op['type']
             params = op['params']
@@ -882,8 +939,13 @@ class GeoXarrayAccessor:
             '            output_filename = f"{base_filename}_processed.csv"',
             "            csv_data = result_df.to_csv(index=False).encode()",
             "            ",
-            "            ds.close()",
-            "            os.unlink(tmp_file.name)",
+            "            if ds is not None:",
+            "                ds.close()",
+            "            if tmp_file and hasattr(tmp_file, 'name'):",
+            "                try:",
+            "                    os.unlink(tmp_file.name)",
+            "                except:",
+            "                    pass",
             "            return output_filename, csv_data",
             "        else:",
             "            # Output as NetCDF - still has dimensions",
@@ -897,24 +959,44 @@ class GeoXarrayAccessor:
             "                netcdf_data = f.read()",
             "            ",
             "            # Clean up temp files",
-            "            os.unlink(nc_tmp_file.name)",
+            "            try:",
+            "                os.unlink(nc_tmp_file.name)",
+            "            except:",
+            "                pass",
             "            ",
-            "            ds.close()",
-            "            os.unlink(tmp_file.name)",
+            "            if ds is not None:",
+            "                ds.close()",
+            "            if tmp_file and hasattr(tmp_file, 'name'):",
+            "                try:",
+            "                    os.unlink(tmp_file.name)",
+            "                except:",
+            "                    pass",
             "            return output_filename, netcdf_data",
         ])
         script_lines.extend([
             "        ",
             "    except Exception as e:",
-            "        try:",
-            "            os.unlink(tmp_file.name)",
-            "        except:",
-            "            pass",
-            "        try:",
-            "            os.unlink(nc_tmp_file.name)",
-            "        except:",
-            "            pass",
+            "        ",
+            "        # Clean up resources",
+            "        if ds is not None:",
+            "            try:",
+            "                ds.close()",
+            "            except:",
+            "                pass",
+            "        ",
+            "        if tmp_file and hasattr(tmp_file, 'name'):",
+            "            try:",
+            "                os.unlink(tmp_file.name)",
+            "            except:",
+            "                pass",
+            "        ",
+            "        if nc_tmp_file and hasattr(nc_tmp_file, 'name'):",
+            "            try:",
+            "                os.unlink(nc_tmp_file.name)",
+            "            except:",
+            "                pass",
+            "        ",
             "        return None, None",
         ])

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/convenience_functions/geoquries.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import xarray as xr
 import geopandas as gpd
 from shapely.geometry import shape

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/convenience_functions/zonal_stats.py RENAMED Viewed

@@ -139,8 +139,23 @@ class cloud_object(gpd.GeoDataFrame):
             raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) has failed, cancelled, or errored. Please check the job status!")
         else:
-            raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) is still running. Please come back at a later time!")
+            # Job is still running - include progress information
+            completed = job_info.get('completed', 0)
+            total = job_info.get('total', 1)
+            progress = completed / total if total > 0 else 0
+            percentage = progress * 100
+            # Create progress bar
+            bar_length = 30  # Shorter bar for error message
+            filled_length = int(bar_length * progress)
+            bar = '█' * filled_length + '░' * (bar_length - filled_length)
+            raise RuntimeError(
+                f"The zonal stats job (job_id: {self.job_id}) is still running. "
+                f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
+                f"Please come back at a later time!"
+            )
 def expand_on_time(gdf):
     """
     Expand datasets on time dimension - each time becomes a new row.
@@ -498,6 +513,35 @@ async def zonal_stats(
         job_name = await client.mass_stats.track_job([mass_stats_id])
         job_name = job_name[mass_stats_id]["name"]
         cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
+        # Attach id column behavior to cloud object via a wrapper method
+        async def _head_with_id(n = 5):
+            result_gdf = await cloud_files_object._head_async(n)
+            if id_column is not None and id_column in gdf.columns:
+                geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
+                if hasattr(result_gdf.index, 'names') and 'geometry' in result_gdf.index.names:
+                    if isinstance(result_gdf.index, pd.MultiIndex):
+                        geometry_index = result_gdf.index.get_level_values('geometry')
+                    else:
+                        geometry_index = result_gdf.index
+                    id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
+                    result_gdf[id_column] = id_values
+                    result_gdf = result_gdf.reset_index()
+                    if 'time' in result_gdf.columns:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
+                    else:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry'])
+                else:
+                    id_values = [geometry_to_id.get(geom.wkb) for geom in result_gdf['geometry']]
+                    result_gdf[id_column] = id_values
+                    if 'time' in result_gdf.columns:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
+                    else:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry'])
+            return result_gdf
+        # Monkey-patch a convenience method without modifying original class contract
+        cloud_files_object.head_with_id = lambda n=5: asyncio.run(_head_with_id(n))
         return cloud_files_object
     quries = []
@@ -524,5 +568,33 @@ async def zonal_stats(
             "is_cloud_backed": False,
         }
         gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
+        # If an id_column is provided, attach it to the result and include in the index
+        if id_column is not None and id_column in gdf.columns:
+            # Build a mapping from input geometries to id values (use WKB for robust equality)
+            geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
+            # Determine geometry values in the result (index may be geometry or (geometry, time))
+            if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
+                if isinstance(gdf_with_datasets.index, pd.MultiIndex):
+                    geometry_index = gdf_with_datasets.index.get_level_values('geometry')
+                else:
+                    geometry_index = gdf_with_datasets.index
+                id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
+                gdf_with_datasets[id_column] = id_values
+                # Reset index to control index composition precisely, then set to desired levels
+                gdf_with_datasets = gdf_with_datasets.reset_index()
+                if 'time' in gdf_with_datasets.columns:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
+                else:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
+            else:
+                # geometry exists as a column
+                id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
+                gdf_with_datasets[id_column] = id_values
+                if 'time' in gdf_with_datasets.columns:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
+                else:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
     return gdf_with_datasets

{terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/dataset_management.py RENAMED Viewed

@@ -69,7 +69,7 @@ class DatasetManagement:
             name: Name of the dataset (required)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -142,7 +142,7 @@ class DatasetManagement:
             append: Whether to append data or replace (default: True)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -162,6 +162,10 @@ class DatasetManagement:
         Raises:
             APIError: If the API request fails
         """
+        # Sort dates_iso8601 chronologically if provided
+        if dates_iso8601 is not None:
+            dates_iso8601 = sorted(dates_iso8601)
         params = {"collection": collection, "append": str(append).lower()}
         payload = {"name": name}
         param_mapping = {
@@ -215,7 +219,7 @@ class DatasetManagement:
             append: Whether to append data or replace (default: True)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -236,6 +240,10 @@ class DatasetManagement:
         Raises:
             APIError: If the API request fails
         """
+        # Sort dates_iso8601 chronologically if provided
+        if dates_iso8601 is not None:
+            dates_iso8601 = sorted(dates_iso8601)
         params = {"collection": collection, "append": str(append).lower()}
         payload = {"name": name}
         param_mapping = {
@@ -289,7 +297,7 @@ class DatasetManagement:
             name: Name of the dataset (required)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type