PyPI - terrakio-core - Versions diffs - 0.4.95__py3-none-any.whl → 0.4.97__py3-none-any.whl - Mend

terrakio-core 0.4.95py3-none-any.whl → 0.4.97py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of terrakio-core might be problematic. Click here for more details.

Files changed (8) hide show

terrakio_core/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ Terrakio Core
 Core components for Terrakio API clients.
 """
-__version__ = "0.4.95"
+__version__ = "0.4.97"
 from .async_client import AsyncClient
 from .sync_client import SyncClient as Client

terrakio_core/accessors.py CHANGED Viewed

@@ -673,9 +673,61 @@ class GeoXarrayAccessor:
             result_gdf[col] = new_data
         return result_gdf
     def _apply_cloud_reduction(self, reduction_func: str, dim: Optional[Union[str, List[str]]] = None,
-                               columns: Optional[List[str]] = None, **kwargs):
+                            columns: Optional[List[str]] = None, **kwargs):
+        if hasattr(self._obj, 'job_id') and self._obj.job_id and self._client:
+            import asyncio
+            import concurrent.futures
+            def check_job_status():
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                try:
+                    return loop.run_until_complete(
+                        self._client.mass_stats.track_job([self._obj.job_id])
+                    )
+                finally:
+                    loop.close()
+            try:
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future = executor.submit(check_job_status)
+                    track_info = future.result(timeout=10)  # Short timeout for status check
+                    job_info = track_info[self._obj.job_id]
+                    status = job_info['status']
+                    if status in ["Failed", "Cancelled", "Error"]:
+                        raise RuntimeError(f"The zonal stats job (job_id: {self._obj.job_id}) has failed, cancelled, or errored. Please check the job status!")
+                    elif status != "Completed":
+                        # Job is still running - include progress information
+                        completed = job_info.get('completed', 0)
+                        total = job_info.get('total', 1)
+                        progress = completed / total if total > 0 else 0
+                        percentage = progress * 100
+                        # Create progress bar
+                        bar_length = 30  # Shorter bar for error message
+                        filled_length = int(bar_length * progress)
+                        bar = '█' * filled_length + '░' * (bar_length - filled_length)
+                        raise RuntimeError(
+                            f"The zonal stats job (job_id: {self._obj.job_id}) is still running. "
+                            f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
+                            f"Please come back at a later time!"
+                        )
+            except concurrent.futures.TimeoutError:
+                self._client.logger.warning("Timeout checking job status, proceeding with reduction")
+            except Exception as e:
+                if "still running" in str(e) or "failed" in str(e).lower():
+                    raise  # Re-raise our custom errors
+                else:
+                    self._client.logger.warning(f"Could not check job status: {e}, proceeding with reduction")
         current_time = time.time()
         chain_reset_threshold = 0.01
@@ -835,20 +887,25 @@ class GeoXarrayAccessor:
             "from io import BytesIO",
             "import tempfile",
             "import os",
+            "import traceback",
             "",
             "def consume(filename, file_bytes, metadata):",
         ]
         script_lines.extend([
+            "    tmp_file = None",
+            "    nc_tmp_file = None",
+            "    ds = None",
             "    ",
             "    try:",
             "        with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file:",
             "            tmp_file.write(file_bytes)",
             "            tmp_file.flush()",
-            "            ds = xr.open_dataset(tmp_file.name, engine='scipy')",
+            "            ds = xr.open_dataset(tmp_file.name, engine='h5netcdf')",
             "        ",
         ])
+        # Add operations without excessive debugging
         for i, op in enumerate(self._pending_operations):
             op_type = op['type']
             params = op['params']
@@ -882,8 +939,13 @@ class GeoXarrayAccessor:
             '            output_filename = f"{base_filename}_processed.csv"',
             "            csv_data = result_df.to_csv(index=False).encode()",
             "            ",
-            "            ds.close()",
-            "            os.unlink(tmp_file.name)",
+            "            if ds is not None:",
+            "                ds.close()",
+            "            if tmp_file and hasattr(tmp_file, 'name'):",
+            "                try:",
+            "                    os.unlink(tmp_file.name)",
+            "                except:",
+            "                    pass",
             "            return output_filename, csv_data",
             "        else:",
             "            # Output as NetCDF - still has dimensions",
@@ -897,24 +959,44 @@ class GeoXarrayAccessor:
             "                netcdf_data = f.read()",
             "            ",
             "            # Clean up temp files",
-            "            os.unlink(nc_tmp_file.name)",
+            "            try:",
+            "                os.unlink(nc_tmp_file.name)",
+            "            except:",
+            "                pass",
             "            ",
-            "            ds.close()",
-            "            os.unlink(tmp_file.name)",
+            "            if ds is not None:",
+            "                ds.close()",
+            "            if tmp_file and hasattr(tmp_file, 'name'):",
+            "                try:",
+            "                    os.unlink(tmp_file.name)",
+            "                except:",
+            "                    pass",
             "            return output_filename, netcdf_data",
         ])
         script_lines.extend([
             "        ",
             "    except Exception as e:",
-            "        try:",
-            "            os.unlink(tmp_file.name)",
-            "        except:",
-            "            pass",
-            "        try:",
-            "            os.unlink(nc_tmp_file.name)",
-            "        except:",
-            "            pass",
+            "        ",
+            "        # Clean up resources",
+            "        if ds is not None:",
+            "            try:",
+            "                ds.close()",
+            "            except:",
+            "                pass",
+            "        ",
+            "        if tmp_file and hasattr(tmp_file, 'name'):",
+            "            try:",
+            "                os.unlink(tmp_file.name)",
+            "            except:",
+            "                pass",
+            "        ",
+            "        if nc_tmp_file and hasattr(nc_tmp_file, 'name'):",
+            "            try:",
+            "                os.unlink(nc_tmp_file.name)",
+            "            except:",
+            "                pass",
+            "        ",
             "        return None, None",
         ])

terrakio_core/convenience_functions/geoquries.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import xarray as xr
 import geopandas as gpd
 from shapely.geometry import shape

terrakio_core/convenience_functions/zonal_stats.py CHANGED Viewed

@@ -139,8 +139,23 @@ class cloud_object(gpd.GeoDataFrame):
             raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) has failed, cancelled, or errored. Please check the job status!")
         else:
-            raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) is still running. Please come back at a later time!")
+            # Job is still running - include progress information
+            completed = job_info.get('completed', 0)
+            total = job_info.get('total', 1)
+            progress = completed / total if total > 0 else 0
+            percentage = progress * 100
+            # Create progress bar
+            bar_length = 30  # Shorter bar for error message
+            filled_length = int(bar_length * progress)
+            bar = '█' * filled_length + '░' * (bar_length - filled_length)
+            raise RuntimeError(
+                f"The zonal stats job (job_id: {self.job_id}) is still running. "
+                f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
+                f"Please come back at a later time!"
+            )
 def expand_on_time(gdf):
     """
     Expand datasets on time dimension - each time becomes a new row.
@@ -498,6 +513,35 @@ async def zonal_stats(
         job_name = await client.mass_stats.track_job([mass_stats_id])
         job_name = job_name[mass_stats_id]["name"]
         cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
+        # Attach id column behavior to cloud object via a wrapper method
+        async def _head_with_id(n = 5):
+            result_gdf = await cloud_files_object._head_async(n)
+            if id_column is not None and id_column in gdf.columns:
+                geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
+                if hasattr(result_gdf.index, 'names') and 'geometry' in result_gdf.index.names:
+                    if isinstance(result_gdf.index, pd.MultiIndex):
+                        geometry_index = result_gdf.index.get_level_values('geometry')
+                    else:
+                        geometry_index = result_gdf.index
+                    id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
+                    result_gdf[id_column] = id_values
+                    result_gdf = result_gdf.reset_index()
+                    if 'time' in result_gdf.columns:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
+                    else:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry'])
+                else:
+                    id_values = [geometry_to_id.get(geom.wkb) for geom in result_gdf['geometry']]
+                    result_gdf[id_column] = id_values
+                    if 'time' in result_gdf.columns:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
+                    else:
+                        result_gdf = result_gdf.set_index([id_column, 'geometry'])
+            return result_gdf
+        # Monkey-patch a convenience method without modifying original class contract
+        cloud_files_object.head_with_id = lambda n=5: asyncio.run(_head_with_id(n))
         return cloud_files_object
     quries = []
@@ -524,5 +568,33 @@ async def zonal_stats(
             "is_cloud_backed": False,
         }
         gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
+        # If an id_column is provided, attach it to the result and include in the index
+        if id_column is not None and id_column in gdf.columns:
+            # Build a mapping from input geometries to id values (use WKB for robust equality)
+            geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
+            # Determine geometry values in the result (index may be geometry or (geometry, time))
+            if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
+                if isinstance(gdf_with_datasets.index, pd.MultiIndex):
+                    geometry_index = gdf_with_datasets.index.get_level_values('geometry')
+                else:
+                    geometry_index = gdf_with_datasets.index
+                id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
+                gdf_with_datasets[id_column] = id_values
+                # Reset index to control index composition precisely, then set to desired levels
+                gdf_with_datasets = gdf_with_datasets.reset_index()
+                if 'time' in gdf_with_datasets.columns:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
+                else:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
+            else:
+                # geometry exists as a column
+                id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
+                gdf_with_datasets[id_column] = id_values
+                if 'time' in gdf_with_datasets.columns:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
+                else:
+                    gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
     return gdf_with_datasets

terrakio_core/endpoints/dataset_management.py CHANGED Viewed

@@ -69,7 +69,7 @@ class DatasetManagement:
             name: Name of the dataset (required)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -142,7 +142,7 @@ class DatasetManagement:
             append: Whether to append data or replace (default: True)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -162,6 +162,10 @@ class DatasetManagement:
         Raises:
             APIError: If the API request fails
         """
+        # Sort dates_iso8601 chronologically if provided
+        if dates_iso8601 is not None:
+            dates_iso8601 = sorted(dates_iso8601)
         params = {"collection": collection, "append": str(append).lower()}
         payload = {"name": name}
         param_mapping = {
@@ -215,7 +219,7 @@ class DatasetManagement:
             append: Whether to append data or replace (default: True)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type
@@ -236,6 +240,10 @@ class DatasetManagement:
         Raises:
             APIError: If the API request fails
         """
+        # Sort dates_iso8601 chronologically if provided
+        if dates_iso8601 is not None:
+            dates_iso8601 = sorted(dates_iso8601)
         params = {"collection": collection, "append": str(append).lower()}
         payload = {"name": name}
         param_mapping = {
@@ -289,7 +297,7 @@ class DatasetManagement:
             name: Name of the dataset (required)
             collection: Dataset collection (default: 'terrakio-datasets')
             products: List of products
-            dates_iso8601: List of dates
+            dates_iso8601: List of dates (will be automatically sorted chronologically)
             bucket: Storage bucket
             path: Storage path
             data_type: Data type

{terrakio_core-0.4.95.dist-info → terrakio_core-0.4.97.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: terrakio-core
-Version: 0.4.95
+Version: 0.4.97
 Summary: Core package for the terrakio-python-api
 Requires-Python: >=3.11
 Requires-Dist: aiofiles>=24.1.0

{terrakio_core-0.4.95.dist-info → terrakio_core-0.4.97.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
-terrakio_core/__init__.py,sha256=oQmSomwovopKB9FJwxuboCtt6Sd5BwiCuROUTTR92WI,274
-terrakio_core/accessors.py,sha256=pVZ7qqkkiZdUN5DwqDFwWNHRMfzT9pLnDehI8yiUNVw,43595
+terrakio_core/__init__.py,sha256=HTlEcbJMNK-hrdq99CSMO6xU-A4UTVRBi3fVglJFSRQ,274
+terrakio_core/accessors.py,sha256=UZIi9y4RpBxouSmKuwuNYLIYqDxD8BH-GnUzwJuc1JI,47570
 terrakio_core/async_client.py,sha256=txdSsX3IwqtHlcS86u6N6vjV0-PIiermxNOIjEMQ3Yg,14950
 terrakio_core/client.py,sha256=VXP7BtJWIfpPPZR7_yNdSTcGwNgTwhb7KorusqkQrzk,5603
 terrakio_core/config.py,sha256=r8NARVYOca4AuM88VP_j-8wQxOk1s7VcRdyEdseBlLE,4193
 terrakio_core/exceptions.py,sha256=4qnpOM1gOxsNIXDXY4qwY1d3I4Myhp7HBh7b2D0SVrU,529
 terrakio_core/sync_client.py,sha256=jbG2sMnbR3QPvhAxQX2dBWeX_6f-Qx_MFSRLLpvfRh4,14604
 terrakio_core/convenience_functions/create_dataset_file.py,sha256=RDTAQnKUigyczv3EKhKrs34VMDZDCgL4iz0bge1d9e4,4774
-terrakio_core/convenience_functions/geoquries.py,sha256=zIgt4fDCBgOIUM_h7-a6brOG-Mi2C_bQdnqcSliTVDs,3766
-terrakio_core/convenience_functions/zonal_stats.py,sha256=B0c95M5yqGE3uC2_Cad3FKXeaz97hVHdnaWSMdJdKsU,19496
+terrakio_core/convenience_functions/geoquries.py,sha256=7E3drOD5ffNk2-rKLbwKsNp3_Berq-S1lQk5wwHSuAo,3786
+terrakio_core/convenience_functions/zonal_stats.py,sha256=Sg_T3_85acMPvZkDxBf3fMTmNXnEfKnjVCEB7SKT4Fc,23807
 terrakio_core/endpoints/auth.py,sha256=FdLsPScPIBo-Gxl6ZnE-46cp2molggAJtL72LssN3fg,6049
-terrakio_core/endpoints/dataset_management.py,sha256=BUm8IIlW_Q45vDiQp16CiJGeSLheI8uWRVRQtMdhaNk,13161
+terrakio_core/endpoints/dataset_management.py,sha256=D2foX8DGbSXQ4vYLRt0Es3j96a_qfd920Ct3uN3dd7Y,13641
 terrakio_core/endpoints/group_management.py,sha256=VFl3jakjQa9OPi351D3DZvLU9M7fHdfjCzGhmyJsx3U,6309
 terrakio_core/endpoints/mass_stats.py,sha256=Vb6Tf8kKf5Hlch4ddsrQnfayfiK6z7NSjO8D0pop4p8,25699
 terrakio_core/endpoints/model_management.py,sha256=LH_gHPrqYA-_45KWpDBRcFbwHgm-Kg0zk1ealy7P_C0,52379
@@ -18,6 +18,6 @@ terrakio_core/endpoints/user_management.py,sha256=WlFr3EfK8iI6DfkpMuYLHZUPk2n7_D
 terrakio_core/helper/bounded_taskgroup.py,sha256=wiTH10jhKZgrsgrFUNG6gig8bFkUEPHkGRT2XY7Rgmo,677
 terrakio_core/helper/decorators.py,sha256=L6om7wmWNgCei3Wy5U0aZ-70OzsCwclkjIf7SfQuhCg,2289
 terrakio_core/helper/tiles.py,sha256=lcLCO6KiP05lCI9vngo3zCZJ6Z9C0pUxHSQS4H58EHc,2699
-terrakio_core-0.4.95.dist-info/METADATA,sha256=gAjc5wDDg2a8vmkBVZWSK1QdtMTlaq9EGFY9qyQ16q4,1151
-terrakio_core-0.4.95.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-terrakio_core-0.4.95.dist-info/RECORD,,
+terrakio_core-0.4.97.dist-info/METADATA,sha256=wxkjwA2YX_vXeBnpenFaVjoglb7Muu84Oe5UdY5-jyM,1151
+terrakio_core-0.4.97.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+terrakio_core-0.4.97.dist-info/RECORD,,

{terrakio_core-0.4.95.dist-info → terrakio_core-0.4.97.dist-info}/WHEEL RENAMED Viewed

File without changes

terrakio-core 0.4.95__py3-none-any.whl → 0.4.97__py3-none-any.whl

Potentially problematic release.

terrakio-core 0.4.95py3-none-any.whl → 0.4.97py3-none-any.whl