PyPI - terrakio-core - Versions diffs - 0.4.97__tar.gz → 0.4.98.1b1__tar.gz - Mend

terrakio-core 0.4.97tar.gz → 0.4.98.1b1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of terrakio-core might be problematic. Click here for more details.

Files changed (28) hide show

{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: terrakio-core
-Version: 0.4.97
+Version: 0.4.98.1b1
 Summary: Core package for the terrakio-python-api
 Requires-Python: >=3.11
 Requires-Dist: aiofiles>=24.1.0
@@ -14,6 +14,7 @@ Requires-Dist: onnxruntime>=1.22.1
 Requires-Dist: psutil>=7.0.0
 Requires-Dist: scipy>=1.16.1
 Requires-Dist: shapely>=2.1.1
+Requires-Dist: typer>=0.19.2
 Requires-Dist: xarray>=2025.7.1
 Provides-Extra: ml
 Requires-Dist: scikit-learn>=1.7.1; extra == 'ml'

{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "terrakio-core"
-version = "0.4.97"
+version = "0.4.98.1b1"
 description = "Core package for the terrakio-python-api"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -20,6 +20,7 @@ dependencies = [
     "shapely>=2.1.1",
     "xarray>=2025.7.1",
     "nest-asyncio>=1.6.0",
+    "typer>=0.19.2",
 ]
 [project.optional-dependencies]

{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ Terrakio Core
 Core components for Terrakio API clients.
 """
-__version__ = "0.4.97"
+__version__ = "0.4.98"
 from .async_client import AsyncClient
 from .sync_client import SyncClient as Client

{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/async_client.py RENAMED Viewed

@@ -9,7 +9,7 @@ from geopandas import GeoDataFrame
 from shapely.geometry.base import BaseGeometry as ShapelyGeometry
 from shapely.geometry import mapping
 from .client import BaseClient
-from .exceptions import APIError
+from .exceptions import APIError, NetworkError
 from .endpoints.dataset_management import DatasetManagement
 from .endpoints.user_management import UserManagement
 from .endpoints.mass_stats import MassStats
@@ -36,13 +36,10 @@ class AsyncClient(BaseClient):
     async def _terrakio_request(self, method: str, endpoint: str, **kwargs):
         if self.session is None:
-            # To this:
             headers = {
                 'x-api-key': self.key,
                 'Authorization': self.token
             }
-            # Only add Content-Type if it's a JSON request
             if 'json' in kwargs:
                 headers['Content-Type'] = 'application/json'
             clean_headers = {k: v for k, v in headers.items() if v is not None}
@@ -57,20 +54,17 @@ class AsyncClient(BaseClient):
         for attempt in range(self.retry + 1):
             try:
                 async with session.request(method, url, **kwargs) as response:
+                    content = await response.text()
                     if not response.ok and self._should_retry(response.status, attempt):
                         self.logger.info(f"Request failed (attempt {attempt+1}/{self.retry+1}): {response.status}. Retrying...")
                         continue
-                    if not response.ok:
-                        error_msg = f"API request failed: {response.status} {response.reason}"
-                        try:
-                            error_data = await response.json()
-                            if "detail" in error_data:
-                                error_msg += f" - {error_data['detail']}"
-                        except:
-                            pass
-                        raise APIError(error_msg, status_code=response.status)
-                    return await self._parse_response(response)
+                    if response.ok:
+                        data = await self._parse_response(response)
+                        return data, response.status
+                    else:
+                        error_data = await response.json()
+                        return error_data, response.status
             except aiohttp.ClientError as e:
                 last_exception = e
                 if attempt < self.retry:
@@ -78,8 +72,8 @@ class AsyncClient(BaseClient):
                     continue
                 else:
                     break
-        raise APIError(f"Networking error, request failed after {self.retry+1} attempts: {last_exception}", status_code=None)
+        raise NetworkError(f"Network failure after {self.retry+1} attempts: {last_exception}")
     def _should_retry(self, status_code: int, attempt: int) -> bool:
         """Determine if the request should be retried based on status code."""
@@ -193,158 +187,21 @@ class AsyncClient(BaseClient):
         return result
-    async def zonal_stats(
-            self,
-            gdf: GeoDataFrame,
-            expr: str,
-            conc: int = 20,
-            in_crs: str = "epsg:4326",
-            out_crs: str = "epsg:4326",
-            resolution: int = -1,
-            geom_fix: bool = False,
-            mass_stats: bool = False,
-            id_column: Optional[str] = None,
-    ):
-        """
-        Compute zonal statistics for all geometries in a GeoDataFrame.
-        Args:
-            gdf (GeoDataFrame): GeoDataFrame containing geometries
-            expr (str): Terrakio expression to evaluate, can include spatial aggregations
-            conc (int): Number of concurrent requests to make
-            in_crs (str): Input coordinate reference system
-            out_crs (str): Output coordinate reference system
-            resolution (int): Resolution parameter
-            geom_fix (bool): Whether to fix the geometry (default False)
-            mass_stats (bool): Whether to use mass stats for processing (default False)
-            id_column (Optional[str]): Name of the ID column to use (default None)
-        Returns:
-            geopandas.GeoDataFrame: GeoDataFrame with added columns for results
-        Raises:
-            ValueError: If concurrency is too high or if data exceeds memory limit without streaming
-            APIError: If the API request fails
-        """
-        # the sync client didn't pass the self here, so the client is now async
-        return await _zonal_stats(
-            client=self,
-            gdf=gdf,
-            expr=expr,
-            conc=conc,
-            in_crs=in_crs,
-            out_crs=out_crs,
-            resolution=resolution,
-            geom_fix=geom_fix,
-            mass_stats=mass_stats,
-            id_column=id_column,
-        )
-    async def create_dataset_file(
-        self,
-        name: str,
-        aoi: str,
-        expression: str,
-        output: str,
-        in_crs: str = "epsg:4326",
-        res: float = 0.0001,
-        region: str = "aus",
-        to_crs: str = "epsg:4326",
-        overwrite: bool = True,
-        skip_existing: bool = False,
-        non_interactive: bool = True,
-        poll_interval: int = 30,
-        download_path: str = "/home/user/Downloads",
-        mask = True,
-        max_file_size_mb: int = 5120,  # Default to 5GB
-        tile_size: int = 1024,
-    ) -> dict:
-        """
-        Create a dataset file using mass stats operations.
-        Args:
-            aoi (str): Area of interest
-            expression (str): Terrakio expression to evaluate
-            output (str): Output format
-            in_crs (str): Input coordinate reference system (default "epsg:4326")
-            res (float): Resolution (default 0.0001)
-            region (str): Region (default "aus")
-            to_crs (str): Target coordinate reference system (default "epsg:4326")
-            overwrite (bool): Whether to overwrite existing files (default True)
-            skip_existing (bool): Whether to skip existing files (default False)
-            non_interactive (bool): Whether to run non-interactively (default True)
-            poll_interval (int): Polling interval in seconds (default 30)
-            download_path (str): Download path (default "/home/user/Downloads")
-        Returns:
-            dict: Dictionary containing generation_task_id and combine_task_id
-        Raises:
-            ConfigurationError: If mass stats client is not properly configured
-            RuntimeError: If job fails
-        """
-        return await _create_dataset_file(
-            client=self,
-            aoi=aoi,
-            expression=expression,
-            output=output,
-            in_crs=in_crs,
-            res=res,
-            region=region,
-            to_crs=to_crs,
-            overwrite=overwrite,
-            skip_existing=skip_existing,
-            non_interactive=non_interactive,
-            poll_interval=poll_interval,
-            download_path=download_path,
-            name=name,
-            mask=mask,
-            max_file_size_mb=max_file_size_mb,
-            tile_size=tile_size
-        )
-    async def geo_queries(
-        self,
-        queries: list[dict],
-        conc: int = 20,
-    ):
-        """
-        Execute multiple geo queries concurrently.
-        Args:
-            queries (list[dict]): List of dictionaries containing query parameters.
-                                  Each query must have 'expr', 'feature', and 'in_crs' keys.
-            conc (int): Number of concurrent requests to make (default 20, max 100)
-        Returns:
-            Union[float, geopandas.GeoDataFrame]:
-                - float: Average of all results if results are integers
-                - GeoDataFrame: GeoDataFrame with geometry and dataset columns if results are xarray datasets
-        Raises:
-            ValueError: If queries list is empty, concurrency is too high, or queries are malformed
-            APIError: If the API request fails
-        Example:
-            queries = [
-                {
-                    'expr': 'WCF.wcf',
-                    'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
-                    'in_crs': 'epsg:4326'
-                },
-                {
-                    'expr': 'NDVI.ndvi',
-                    'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
-                    'in_crs': 'epsg:4326'
-                }
-            ]
-            result = await client.geo_queries(queries)
-        """
-        return await _request_geoquery_list(
-            client=self,
-            quries=queries,  # Note: keeping original parameter name for compatibility
-            conc=conc,
-        )
+    async def zonal_stats(self, *args, **kwargs):
+        """Proxy to convenience zonal_stats with full argument passthrough."""
+        return await _zonal_stats(self, *args, **kwargs)
+    async def create_dataset_file(self, *args, **kwargs) -> dict:
+        """Proxy to convenience create_dataset_file with full argument passthrough."""
+        kwargs.setdefault('download_path', "/home/user/Downloads")
+        kwargs.setdefault('region', "aus")
+        return await _create_dataset_file(self, *args, **kwargs)
+    async def geo_queries(self, *args, **kwargs):
+        """Proxy to convenience request_geoquery_list with full argument passthrough."""
+        if 'queries' in kwargs:
+            kwargs['quries'] = kwargs.pop('queries')
+        return await _request_geoquery_list(self, *args, **kwargs)
     async def __aenter__(self):
         if self._session is None:

{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/config.py RENAMED Viewed

@@ -7,7 +7,7 @@ from .exceptions import ConfigurationError
 # Default configuration file locations
 DEFAULT_CONFIG_FILE = os.path.join(os.environ.get("HOME", ""), ".tkio_config.json")
-DEFAULT_API_URL = "https://api.terrak.io"
+DEFAULT_API_URL = "https://dev-au.terrak.io"
 def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Logger = None) -> Dict[str, Any]:
     """
@@ -27,14 +27,8 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
         file if one doesn't exist and returns appropriate status flags.
     """
     config_path = Path(os.path.expanduser(config_file))
-    # the first circumstance is that the config file does not exist
-    # that we need to login before using any of the functions
-    # Check if config file exists
     if not config_path.exists():
-        # Create an empty config file
-        config_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(config_path, 'w') as f:
-            json.dump({}, f)
         logger.info("No API key found. Please provide an API key to use this client.")
         return {
             'url': DEFAULT_API_URL,
@@ -45,12 +39,9 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
         }
     try:
-        # Read the config file
         with open(config_path, 'r') as f:
             config_data = json.load(f)
-        # Read the config file data
-        # Check if config has an API key
         if not config_data or 'TERRAKIO_API_KEY' not in config_data or not config_data.get('TERRAKIO_API_KEY'):
             logger.info("No API key found. Please provide an API key to use this client.")
             return {
@@ -61,11 +52,8 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
                 'token': config_data.get('PERSONAL_TOKEN')
             }
         logger.info(f"Currently logged in as: {config_data.get('EMAIL')}")
-        # this meanb that we have already logged in to the tkio account
-        # Convert the JSON config to our expected format
         config = {
-            # Always use the default URL, not from config file
             'url': DEFAULT_API_URL,
             'key': config_data.get('TERRAKIO_API_KEY'),
             'is_logged_in': True,
@@ -84,33 +72,4 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
             'is_logged_in': False,
             'user_email': None,
             'token': None
-        }
-def create_default_config(email: str, api_key: str, config_file: str = DEFAULT_CONFIG_FILE) -> None:
-    """
-    Create a default configuration file in JSON format.
-    Args:
-        email: User email
-        api_key: Terrakio API key
-        config_file: Path to configuration file
-    Raises:
-        ConfigurationError: If the configuration file can't be created
-    """
-    config_path = Path(os.path.expanduser(config_file))
-    # Ensure directory exists
-    config_path.parent.mkdir(parents=True, exist_ok=True)
-    try:
-        config_data = {
-            "EMAIL": email,
-            "TERRAKIO_API_KEY": api_key
-        }
-        with open(config_path, 'w') as f:
-            json.dump(config_data, f, indent=2)
-    except Exception as e:
-        raise ConfigurationError(f"Failed to create configuration file: {e}")
+        }

{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/convenience_functions/zonal_stats.py RENAMED Viewed

@@ -9,18 +9,18 @@ from typing import Optional
 # Third-party library imports
 import aiohttp
 import geopandas as gpd
-import nest_asyncio
 import pandas as pd
 import pyproj
 import xarray as xr
 from geopandas import GeoDataFrame
 from shapely.geometry import box, mapping, shape
 from shapely.ops import transform
+import threading
+from concurrent.futures import ThreadPoolExecutor
 # Local imports
 from .geoquries import request_geoquery_list
-nest_asyncio.apply()
 class cloud_object(gpd.GeoDataFrame):
     """
     This class is a class used for cloud
@@ -36,12 +36,51 @@ class cloud_object(gpd.GeoDataFrame):
         self.client = client
         self.job_name = job_name
+    def __repr__(self):
+        return (
+            f"<CloudZonalStats job_id='{self.job_id}', job_name='{self.job_name}'>\n"
+            f"Call .head(n) to fetch a preview GeoDataFrame when the job completes."
+        )
+    def _repr_html_(self):
+        # Jupyter HTML-friendly representation to avoid auto-rendering an empty DataFrame
+        return (
+            f"<div style='font-family:system-ui,Segoe UI,Helvetica,Arial,sans-serif'>"
+            f"<strong>Cloud Zonal Stats</strong><br/>"
+            f"job_id: <code>{self.job_id}</code><br/>"
+            f"job_name: <code>{self.job_name}</code><br/>"
+            f"<em>Use <code>.head(n)</code> to retrieve a preview once the job is completed.</em>"
+            f"</div>"
+        )
     def head(self, n = 5):
         """
         Returns the first n files stored in the cloud bucket.
         """
-        return asyncio.run(self._head_async(n))
+        # Detect if we're inside an existing event loop (e.g., Jupyter)
+        in_running_loop = False
+        try:
+            asyncio.get_running_loop()
+            in_running_loop = True
+        except RuntimeError:
+            in_running_loop = False
+        if in_running_loop:
+            # Run the async function in a separate thread with its own loop
+            def run_async_in_thread():
+                new_loop = asyncio.new_event_loop()
+                try:
+                    return new_loop.run_until_complete(self._head_async(n))
+                finally:
+                    new_loop.close()
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                future = executor.submit(run_async_in_thread)
+                return future.result()
+        else:
+            # No running loop - safe to use asyncio.run
+            return asyncio.run(self._head_async(n))
     async def _head_async(self, n = 5):
         """
         Returns the first n files stored in the cloud bucket.
@@ -120,12 +159,54 @@ class cloud_object(gpd.GeoDataFrame):
                             })
                             self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
+                            # Derive id values from json metadata (prefer 'file', fallback to 'group')
+                            id_values = []
+                            for i in range(min_length):
+                                entry = json_data[i] if i < len(json_data) else {}
+                                id_candidate = entry.get('file') or entry.get('group') or ''
+                                if isinstance(id_candidate, str) and id_candidate.startswith('file_'):
+                                    id_val = id_candidate[len('file_'):]
+                                elif isinstance(id_candidate, str) and id_candidate.startswith('group_'):
+                                    id_val = id_candidate[len('group_'):]
+                                else:
+                                    id_val = str(id_candidate) if id_candidate else str(i)
+                                id_values.append(id_val)
+                            # Geometry to id mapping using WKB to avoid precision issues
+                            geom_to_id = {geometries[i].wkb: id_values[i] for i in range(min_length)}
                             try:
                                 expanded_gdf = expand_on_variables_and_time(gdf)
+                                # Attach id as first index level, geometry second, time third if present
+                                if hasattr(expanded_gdf.index, 'names') and 'geometry' in expanded_gdf.index.names:
+                                    if isinstance(expanded_gdf.index, pd.MultiIndex):
+                                        geometry_index = expanded_gdf.index.get_level_values('geometry')
+                                    else:
+                                        geometry_index = expanded_gdf.index
+                                    id_col = [geom_to_id.get(geom.wkb) for geom in geometry_index]
+                                    expanded_gdf['id'] = id_col
+                                    expanded_gdf = expanded_gdf.reset_index()
+                                    if 'time' in expanded_gdf.columns:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
+                                    else:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
+                                else:
+                                    # geometry exists as a column
+                                    id_col = [geom_to_id.get(geom.wkb) for geom in expanded_gdf['geometry']]
+                                    expanded_gdf['id'] = id_col
+                                    if 'time' in expanded_gdf.columns:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
+                                    else:
+                                        expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
                                 return expanded_gdf
                             except NameError:
                                 self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
-                                return gdf
+                                # Set id on raw gdf and index appropriately
+                                gdf['id'] = id_values
+                                return gdf.set_index(['id', 'geometry'])
                         else:
                             self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
@@ -513,35 +594,7 @@ async def zonal_stats(
         job_name = await client.mass_stats.track_job([mass_stats_id])
         job_name = job_name[mass_stats_id]["name"]
         cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
-        # Attach id column behavior to cloud object via a wrapper method
-        async def _head_with_id(n = 5):
-            result_gdf = await cloud_files_object._head_async(n)
-            if id_column is not None and id_column in gdf.columns:
-                geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
-                if hasattr(result_gdf.index, 'names') and 'geometry' in result_gdf.index.names:
-                    if isinstance(result_gdf.index, pd.MultiIndex):
-                        geometry_index = result_gdf.index.get_level_values('geometry')
-                    else:
-                        geometry_index = result_gdf.index
-                    id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
-                    result_gdf[id_column] = id_values
-                    result_gdf = result_gdf.reset_index()
-                    if 'time' in result_gdf.columns:
-                        result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
-                    else:
-                        result_gdf = result_gdf.set_index([id_column, 'geometry'])
-                else:
-                    id_values = [geometry_to_id.get(geom.wkb) for geom in result_gdf['geometry']]
-                    result_gdf[id_column] = id_values
-                    if 'time' in result_gdf.columns:
-                        result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
-                    else:
-                        result_gdf = result_gdf.set_index([id_column, 'geometry'])
-            return result_gdf
-        # Monkey-patch a convenience method without modifying original class contract
-        cloud_files_object.head_with_id = lambda n=5: asyncio.run(_head_with_id(n))
         return cloud_files_object
     quries = []

terrakio-core 0.4.97__tar.gz → 0.4.98.1b1__tar.gz

Potentially problematic release.

terrakio-core 0.4.97tar.gz → 0.4.98.1b1tar.gz