PyPI - terrakio-core - Versions diffs - 0.4.97__py3-none-any.whl → 0.4.98.1b1__py3-none-any.whl - Mend

terrakio-core 0.4.97py3-none-any.whl → 0.4.98.1b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of terrakio-core might be problematic. Click here for more details.

Files changed (16) hide show

terrakio_core/__init__.py +1 -1
terrakio_core/async_client.py +26 -169
terrakio_core/config.py +3 -44
terrakio_core/convenience_functions/zonal_stats.py +86 -33
terrakio_core/endpoints/auth.py +96 -47
terrakio_core/endpoints/dataset_management.py +120 -54
terrakio_core/endpoints/group_management.py +269 -76
terrakio_core/endpoints/mass_stats.py +704 -581
terrakio_core/endpoints/model_management.py +213 -109
terrakio_core/endpoints/user_management.py +106 -21
terrakio_core/exceptions.py +371 -1
terrakio_core/sync_client.py +9 -124
{terrakio_core-0.4.97.dist-info → terrakio_core-0.4.98.1b1.dist-info}/METADATA +2 -1
terrakio_core-0.4.98.1b1.dist-info/RECORD +23 -0
terrakio_core-0.4.97.dist-info/RECORD +0 -23
{terrakio_core-0.4.97.dist-info → terrakio_core-0.4.98.1b1.dist-info}/WHEEL +0 -0

terrakio_core/endpoints/mass_stats.py CHANGED Viewed

@@ -1,712 +1,835 @@
-from typing import Dict, Any, Optional
-import json
-import gzip
 import os
-import weakref
-import weakref
-from pathlib import Path
-from urllib.parse import urlparse
-from ..helper.decorators import require_token, require_api_key, require_auth
-import aiohttp
-from typing import Dict, Any, Optional, List, Union
-import asyncio
-import xarray as xr
-from io import BytesIO
-import geopandas as gpd
-from shapely.geometry import shape
+import time
+import typer
+from typing import Dict, Any, Optional, List
+from dateutil import parser
+from rich.console import Console
+from rich.progress import Progress, TextColumn, BarColumn, TaskProgressColumn, TimeElapsedColumn
+from ..exceptions import (
+    CreateCollectionError,
+    GetCollectionError,
+    ListCollectionsError,
+    CollectionNotFoundError,
+    CollectionAlreadyExistsError,
+    InvalidCollectionTypeError,
+    DeleteCollectionError,
+    ListTasksError,
+    UploadRequestsError,
+    UploadArtifactsError,
+    GetTaskError,
+    TaskNotFoundError,
+    DownloadFilesError,
+    CancelTaskError,
+    CancelCollectionTasksError,
+    CancelAllTasksError,
+)
+from ..helper.decorators import require_api_key
+import aiohttp  # Make sure this is imported at the top
 class MassStats:
     def __init__(self, client):
         self._client = client
+        self.console = Console()
+    async def track_progress(self, task_id):
+        task_info = await self.get_task(task_id=task_id)
+        number_of_jobs = task_info["task"]["total"]
+        start_time = parser.parse(task_info["task"]["createdAt"])
+        self.console.print(f"[bold cyan]Tracking task: {task_id}[/bold cyan]")
+        completed_jobs_info = []
+        def get_job_description(job_info, include_status=False):
+            if not job_info:
+                return "No job info"
+            service = job_info.get("service", "Unknown service")
+            desc = service
+            if include_status:
+                status = job_info.get("status", "unknown")
+                desc += f" - {status}"
+            return desc
+        progress = Progress(
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TaskProgressColumn(),
+            TimeElapsedColumn(),
+        )
+        with progress:
+            last_completed_count = 0
+            current_job_task = None
+            current_job_description = None
+            while len(completed_jobs_info) < number_of_jobs:
+                task_info = await self.get_task(task_id=task_id)
+                completed_number = task_info["task"]["completed"]
+                current_job_info = task_info["currentJob"]
+                if completed_number > last_completed_count:
+                    if current_job_task is not None:
+                        completed_description = current_job_description.replace(" - pending", "").replace(" - running", "").replace(" - waiting", "")
+                        completed_description += " - completed"
+                        progress.update(
+                            current_job_task,
+                            description=f"[{last_completed_count + 1}/{number_of_jobs}] {completed_description}",
+                            completed=100
+                        )
+                        completed_jobs_info.append({
+                            "task": current_job_task,
+                            "description": completed_description,
+                            "job_number": last_completed_count + 1
+                        })
+                        current_job_task = None
+                        current_job_description = None
+                    last_completed_count = completed_number
+                if current_job_info:
+                    status = current_job_info["status"]
+                    current_job_description = get_job_description(current_job_info, include_status=True)
+                    total_value = current_job_info.get("total", 0)
+                    completed_value = current_job_info.get("completed", 0)
+                    if total_value == -9999:
+                        percent = 0
+                    elif total_value > 0:
+                        percent = int(completed_value / total_value * 100)
+                    else:
+                        percent = 0
+                    if current_job_task is None:
+                        current_job_task = progress.add_task(
+                            f"[{completed_number + 1}/{number_of_jobs}] {current_job_description}",
+                            total=100,
+                            start_time=start_time
+                        )
+                    else:
+                        progress.update(
+                            current_job_task,
+                            description=f"[{completed_number + 1}/{number_of_jobs}] {current_job_description}",
+                            completed=percent
+                        )
+                    if status == "Error":
+                        self.console.print("[bold red]Error![/bold red]")
+                        raise typer.Exit(code=1)
+                    if status == "Cancelled":
+                        self.console.print("[bold orange]Cancelled![/bold orange]")
+                        raise typer.Exit(code=1)
+                    elif status == "Completed":
+                        completed_description = current_job_description.replace(" - pending", "").replace(" - running", "").replace(" - waiting", "")
+                        completed_description += " - completed"
+                        progress.update(
+                            current_job_task,
+                            description=f"[{completed_number + 1}/{number_of_jobs}] {completed_description}",
+                            completed=100
+                        )
+                if completed_number == number_of_jobs and current_job_info is None:
+                    if current_job_task is not None:
+                        completed_description = current_job_description.replace(" - pending", "").replace(" - running", "").replace(" - waiting", "")
+                        completed_description += " - completed"
+                        progress.update(
+                            current_job_task,
+                            description=f"[{number_of_jobs}/{number_of_jobs}] {completed_description}",
+                            completed=100
+                        )
+                        completed_jobs_info.append({
+                            "task": current_job_task,
+                            "description": completed_description,
+                            "job_number": number_of_jobs
+                        })
+                    break
+                time.sleep(10)
+        self.console.print(f"[bold green]All {number_of_jobs} jobs finished![/bold green]")
     @require_api_key
-    async def _upload_request(
-        self,
-        name: str,
-        size: int,
-        sample: str,
-        output: str,
-        config: Dict[str, Any],
-        region: str = None,
-        overwrite: bool = False,
-        skip_existing: bool = False,
-        location: Optional[str] = None,
-        force_loc: Optional[bool] = None,
-        server: Optional[str] = "dev-au.terrak.io",
+    async def create_collection(
+        self,
+        collection: str,
+        bucket: Optional[str] = None,
+        location: Optional[str] = None,
+        collection_type: str = "basic"
     ) -> Dict[str, Any]:
         """
-        Upload a request to the mass stats server.
+        Create a collection for the current user.
         Args:
-            name: The name of the job
-            size: The size of the job
-            sample: The sample expression for deciding which server to make the request to
-            output: The output of the job
-            config: The config of the job
-            overwrite: Whether to overwrite the job
-            skip_existing: Whether to skip existing jobs
-            location: The location of the job
-            force_loc: Whether to force the location
-            server: The server to use
+            collection: The name of the collection (required)
+            bucket: The bucket to use (optional, admin only)
+            location: The location to use (optional, admin only)
+            collection_type: The type of collection to create (optional, defaults to "basic")
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing the collection id
         Raises:
-            APIError: If the API request fails
+            CollectionAlreadyExistsError: If the collection already exists
+            InvalidCollectionTypeError: If the collection type is invalid
+            CreateCollectionError: If the API request fails due to unknown reasons
         """
-        # we don't actually need the region function inside the request, the endpoint will fix that for us
         payload = {
-            "name": name,
-            "size": size,
-            "sample": sample,
-            "output": output,
-            "config": config,
-            "overwrite": overwrite,
-            "skip_existing": skip_existing,
-            "server": server,
-            "region": region
-        }
-        payload_mapping = {
-            "location": location,
-            "force_loc": force_loc
+            "collection_type": collection_type
         }
-        for key, value in payload_mapping.items():
-            if value is not None:
-                payload[key] = str(value).lower()
-        return await self._client._terrakio_request("POST", "mass_stats/upload", json=payload)
+        if bucket is not None:
+            payload["bucket"] = bucket
+        if location is not None:
+            payload["location"] = location
+        response, status = await self._client._terrakio_request("POST", f"collections/{collection}", json=payload)
+        if status != 200:
+            if status == 400:
+                raise CollectionAlreadyExistsError(f"Collection {collection} already exists", status_code=status)
+            if status == 422:
+                raise InvalidCollectionTypeError(f"Invalid collection type: {collection_type}", status_code=status)
+            raise CreateCollectionError(f"Create collection failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    async def start_job(self, id: str) -> Dict[str, Any]:
+    async def delete_collection(
+        self,
+        collection: str,
+        full: Optional[bool] = False,
+        outputs: Optional[list] = [],
+        data: Optional[bool] = False
+    ) -> Dict[str, Any]:
         """
-        Start a mass stats job by task ID.
+        Delete a collection by name.
         Args:
-            task_id: The ID of the task to start
+            collection: The name of the collection to delete (required)
+            full: Delete the full collection (optional, defaults to False)
+            outputs: Specific output folders to delete (optional, defaults to empty list)
+            data: Whether to delete raw data (xdata folder) (optional, defaults to False)
         Returns:
-            API response as a dictionary
+            API response as a dictionary confirming deletion
+        Raises:
+            CollectionNotFoundError: If the collection is not found
+            DeleteCollectionError: If the API request fails due to unknown reasons
         """
-        return await self._client._terrakio_request("POST", f"mass_stats/start/{id}")
+        payload = {
+            "full": full,
+            "outputs": outputs,
+            "data": data
+        }
+        response, status = await self._client._terrakio_request("DELETE", f"collections/{collection}", json=payload)
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise DeleteCollectionError(f"Delete collection failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    def get_task_id(self, name: str, stage: str, uid: Optional[str] = None) -> Dict[str, Any]:
+    async def get_collection(self, collection: str) -> Dict[str, Any]:
         """
-        Get the task ID for a mass stats job by name and stage (and optionally user ID).
+        Get a collection by name.
         Args:
-            name: The name of the job
-            stage: The stage of the job
-            uid: The user ID of the job
+            collection: The name of the collection to retrieve(required)
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing collection information
+        Raises:
+            CollectionNotFoundError: If the collection is not found
+            GetCollectionError: If the API request fails due to unknown reasons
+        """
+        response, status = await self._client._terrakio_request("GET", f"collections/{collection}")
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise GetCollectionError(f"Get collection failed with status {status}", status_code=status)
+        return response
+    @require_api_key
+    async def list_collections(
+        self,
+        collection_type: Optional[str] = None,
+        limit: Optional[int] = 10,
+        page: Optional[int] = 0
+    ) -> List[Dict[str, Any]]:
+        """
+        List collections for the current user.
+        Args:
+            collection_type: Filter by collection type (optional)
+            limit: Number of collections to return (optional, defaults to 10)
+            page: Page number (optional, defaults to 0)
+        Returns:
+            API response as a list of dictionaries containing collection information
         Raises:
-            APIError: If the API request fails
+            ListCollectionsError: If the API request fails due to unknown reasons
         """
-        url = f"mass_stats/job_id?name={name}&stage={stage}"
-        if uid is not None:
-            url += f"&uid={uid}"
-        return self._client._terrakio_request("GET", url)
+        params = {}
+        if collection_type is not None:
+            params["collection_type"] = collection_type
+        if limit is not None:
+            params["limit"] = limit
+        if page is not None:
+            params["page"] = page
+        response, status = await self._client._terrakio_request("GET", "collections", params=params)
+        if status != 200:
+            raise ListCollectionsError(f"List collections failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    async def track_job(self, ids: Optional[list] = None) -> Dict[str, Any]:
+    async def list_tasks(
+        self,
+        limit: Optional[int] = 10,
+        page: Optional[int] = 0
+    ) -> List[Dict[str, Any]]:
         """
-        Track the status of one or more mass stats jobs.
+        List tasks for the current user.
         Args:
-            ids: The IDs of the jobs to track
+            limit: Number of tasks to return (optional, defaults to 10)
+            page: Page number (optional, defaults to 0)
+        Returns:
+            API response as a list of dictionaries containing task information
+        Raises:
+            ListTasksError: If the API request fails due to unknown reasons
+        """
+        params = {
+            "limit": limit,
+            "page": page
+        }
+        response, status = await self._client._terrakio_request("GET", "tasks", params=params)
+        if status != 200:
+            raise ListTasksError(f"List tasks failed with status {status}", status_code=status)
+        return response
+    @require_api_key
+    async def upload_requests(
+        self,
+        collection: str
+    ) -> Dict[str, Any]:
+        """
+        Retrieve signed url to upload requests for a collection.
+        Args:
+            collection: Name of collection
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing the upload URL
         Raises:
-            APIError: If the API request fails
+            CollectionNotFoundError: If the collection is not found
+            UploadRequestsError: If the API request fails due to unknown reasons
         """
-        data = {"ids": ids} if ids is not None else {}
-        return await self._client._terrakio_request("POST", "mass_stats/track", json=data)
+        response, status = await self._client._terrakio_request("GET", f"collections/{collection}/upload/requests")
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise UploadRequestsError(f"Upload requests failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    def get_history(self, limit: Optional[int] = 100) -> Dict[str, Any]:
+    async def upload_artifacts(
+        self,
+        collection: str,
+        file_type: str,
+        compressed: Optional[bool] = True
+    ) -> Dict[str, Any]:
         """
-        Get the history of mass stats jobs.
+        Retrieve signed url to upload artifact file to a collection.
         Args:
-            limit: The number of jobs to return
+            collection: Name of collection
+            file_type: The extension of the file
+            compressed: Whether to compress the file using gzip or not (defaults to True)
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing the upload URL
         Raises:
-            APIError: If the API request fails
+            CollectionNotFoundError: If the collection is not found
+            UploadArtifactsError: If the API request fails due to unknown reasons
         """
-        params = {"limit": limit}
-        return self._client._terrakio_request("GET", "mass_stats/history", params=params)
+        params = {
+            "file_type": file_type,
+            "compressed": str(compressed).lower(),
+        }
-    @require_api_key
-    async def start_post_processing(
+        response, status = await self._client._terrakio_request("GET", f"collections/{collection}/upload", params=params)
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise UploadArtifactsError(f"Upload artifacts failed with status {status}", status_code=status)
+        return response
+    @require_api_key
+    async def get_task(
         self,
-        process_name: str,
-        data_name: str,
-        output: str,
-        consumer: str,
-        overwrite: bool = False
+        task_id: str
     ) -> Dict[str, Any]:
-        with open(consumer, 'rb') as f:
-            script_bytes = f.read()
+        """
+        Get task information by task ID.
+        Args:
+            task_id: ID of task to track
-        data = aiohttp.FormData()
-        data.add_field('process_name', process_name)
-        data.add_field('data_name', data_name)
-        data.add_field('output', output)
-        data.add_field('overwrite', str(overwrite).lower())
-        data.add_field('consumer', script_bytes, filename=os.path.basename(consumer), content_type='text/x-python')
+        Returns:
+            API response as a dictionary containing task information
+        Raises:
+            TaskNotFoundError: If the task is not found
+            GetTaskError: If the API request fails due to unknown reasons
+        """
+        response, status = await self._client._terrakio_request("GET", f"tasks/info/{task_id}")
+        if status != 200:
+            if status == 404:
+                raise TaskNotFoundError(f"Task {task_id} not found", status_code=status)
+            raise GetTaskError(f"Get task failed with status {status}", status_code=status)
-        return await self._client._terrakio_request(
-            "POST",
-            "mass_stats/post_process",
-            data=data,
-        )
+        return response
-    @require_api_key
-    async def zonal_stats_transform(
+    @require_api_key
+    async def generate_data(
         self,
-        data_name: str,
+        collection: str,
         output: str,
-        consumer: bytes,
-        overwrite: bool = False
+        skip_existing: Optional[bool] = True,
+        force_loc: Optional[bool] = None,
+        server: Optional[str] = None
     ) -> Dict[str, Any]:
-        data = aiohttp.FormData()
-        data.add_field('data_name', data_name)
-        data.add_field('output', output)
-        data.add_field('overwrite', str(overwrite).lower())
-        data.add_field('consumer', consumer, filename="consumer.py", content_type='text/x-python')
+        """
+        Generate data for a collection.
+        Args:
+            collection: Name of collection
+            output: Output type (str)
+            force_loc: Write data directly to the cloud under this folder
+            skip_existing: Skip existing data
+            server: Server to use
+        Returns:
+            API response as a dictionary containing task information
+        Raises:
+            CollectionNotFoundError: If the collection is not found
+            GetTaskError: If the API request fails due to unknown reasons
+        """
+        payload = {"output": output, "skip_existing": skip_existing}
+        if force_loc is not None:
+            payload["force_loc"] = force_loc
+        if server is not None:
+            payload["server"] = server
+        response, status = await self._client._terrakio_request("POST", f"collections/{collection}/generate_data", json=payload)
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise GetTaskError(f"Generate data failed with status {status}", status_code=status)
+        return response
-        return await self._client._terrakio_request(
-            "POST",
-            "mass_stats/transform",
-            data=data,
-        )
     @require_api_key
-    def download_results(
+    async def training_samples(
         self,
-        file_name: str,
-        id: Optional[str] = None,
-        force_loc: Optional[bool] = None,
-        bucket: Optional[str] = None,
-        location: Optional[str] = None,
-        output: Optional[str] = None
+        collection: str,
+        expressions: list[str],
+        filters: list[str],
+        aoi: dict,
+        samples: int,
+        crs: str,
+        tile_size: int,
+        res: float,
+        output: str,
+        year_range: Optional[list[int]] = None,
+        server: Optional[str] = None
     ) -> Dict[str, Any]:
         """
-        Download results from a mass stats job or arbitrary results if force_loc is True.
+        Generate training samples for a collection.
         Args:
-            file_name: File name of resulting zip file (required)
-            id: Post processing id. Can't be used with 'force_loc'
-            force_loc: Download arbitrary results not connected to a mass-stats job id. Can't be used with 'id'
-            bucket: Bucket name (required if force_loc is True)
-            location: Path to folder in bucket (required if force_loc is True)
-            output: Output type (required if force_loc is True)
+            collection: Name of collection
+            expressions: List of expressions for each sample
+            filters: Expressions to filter sample areas
+            aoi: AOI to sample from (geojson dict)
+            samples: Number of samples to generate
+            crs: CRS of AOI
+            tile_size: Pixel width and height of samples
+            res: Resolution of samples
+            output: Sample output type
+            year_range: Optional year range filter
+            server: Server to use
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing task information
         Raises:
-            APIError: If the API request fails
-            ValueError: If validation fails for parameter combinations
+            CollectionNotFoundError: If the collection is not found
+            GetTaskError: If the API request fails due to unknown reasons
         """
-        if id is not None and force_loc is True:
-            raise ValueError("Cannot use both 'id' and 'force_loc' parameters simultaneously")
-        if id is None and force_loc is not True:
-            raise ValueError("Either 'id' or 'force_loc=True' must be provided")
+        payload = {
+            "expressions": expressions,
+            "filters": filters,
+            "aoi": aoi,
+            "samples": samples,
+            "crs": crs,
+            "tile_size": tile_size,
+            "res": res,
+            "output": output
+        }
-        if force_loc is True:
-            if bucket is None or location is None or output is None:
-                raise ValueError("When force_loc is True, 'bucket', 'location', and 'output' must be provided")
+        if year_range is not None:
+            payload["year_range"] = year_range
+        if server is not None:
+            payload["server"] = server
-        params = {"file_name": file_name}
+        response, status = await self._client._terrakio_request("POST", f"collections/{collection}/training_samples", json=payload)
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise GetTaskError(f"Training sample failed with status {status}", status_code=status)
-        if id is not None:
-            params["id"] = id
-        if force_loc is True:
-            params["force_loc"] = force_loc
-            params["bucket"] = bucket
-            params["location"] = location
-            params["output"] = output
-        return self._client._terrakio_request("GET", "mass_stats/download", params=params)
+        return response
-    @require_api_key
-    async def _upload_file(self, file_path: str, url: str, use_gzip: bool = False):
-        """
-        Helper method to upload a JSON file to a signed URL.
+    # @require_api_key
+    # async def post_processing(
+    #     self,
+    #     collection: str,
+    #     folder: str,
+    #     consumer: str
+    # ) -> Dict[str, Any]:
+    #     """
+    #     Run post processing for a collection.
+    #     Args:
+    #         collection: Name of collection
+    #         folder: Folder to store output
+    #         consumer: Post processing script
+    #     Returns:
+    #         API response as a dictionary containing task information
+    #     Raises:
+    #         CollectionNotFoundError: If the collection is not found
+    #         GetTaskError: If the API request fails due to unknown reasons
+    #     """
+    #     # payload = {
+    #     #     "folder": folder,
+    #     #     "consumer": consumer
+    #     # }
+    #     # we have the consumer as a string, we need to read in the file and then pass in the content
+    #     with open(consumer, 'rb') as f:
+    #         files = {
+    #             'consumer': ('consumer.py', f.read(), 'text/plain')
+    #         }
+    #         data = {
+    #             'folder': folder
+    #         }
-        Args:
-            file_path: Path to the JSON file
-            url: Signed URL to upload to
-            use_gzip: Whether to compress the file with gzip
-        """
-        try:
-            with open(file_path, 'r') as file:
-                json_data = json.load(file)
-        except FileNotFoundError:
-            raise FileNotFoundError(f"JSON file not found: {file_path}")
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid JSON in file {file_path}: {e}")
+    #     # response, status = await self._client._terrakio_request("POST", f"collections/{collection}/post_process", json=payload)
+    #     response, status = await self._client._terrakio_request(
+    #         "POST",
+    #         f"collections/{collection}/post_process",
+    #         files=files,
+    #         data=data
+    #     )
+    #     if status != 200:
+    #         if status == 404:
+    #             raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+    #         raise GetTaskError(f"Post processing failed with status {status}", status_code=status)
-        return await self._upload_json_data(json_data, url, use_gzip)
+    #     return response
     @require_api_key
-    async def _upload_json_data(self, json_data: Union[Dict, List], url: str, use_gzip: bool = False):
+    async def post_processing(
+        self,
+        collection: str,
+        folder: str,
+        consumer: str
+    ) -> Dict[str, Any]:
         """
-        Helper method to upload JSON data directly to a signed URL.
+        Run post processing for a collection.
         Args:
-            json_data: JSON data (dict or list) to upload
-            url: Signed URL to upload to
-            use_gzip: Whether to compress the data with gzip
+            collection: Name of collection
+            folder: Folder to store output
+            consumer: Path to post processing script
+        Returns:
+            API response as a dictionary containing task information
+        Raises:
+            CollectionNotFoundError: If the collection is not found
+            GetTaskError: If the API request fails due to unknown reasons
         """
-        if hasattr(json, 'dumps') and 'ignore_nan' in json.dumps.__code__.co_varnames:
-            dumps_kwargs = {'ignore_nan': True}
-        else:
-            dumps_kwargs = {}
-        if use_gzip:
-            body = gzip.compress(json.dumps(json_data, **dumps_kwargs).encode('utf-8'))
-            headers = {
-                'Content-Type': 'application/json',
-                'Content-Encoding': 'gzip'
-            }
-        else:
-            body = json.dumps(json_data, **dumps_kwargs).encode('utf-8')
-            headers = {
-                'Content-Type': 'application/json'
-            }
+        # Read file and build multipart form data
+        with open(consumer, 'rb') as f:
+            form = aiohttp.FormData()
+            form.add_field('folder', folder)  # Add text field
+            form.add_field(
+                'consumer',  # Field name
+                f.read(),  # File content
+                filename='consumer.py',  # Filename
+                content_type='text/x-python'  # MIME type
+            )
+        # Send using data= with FormData object (NOT files=)
+        response, status = await self._client._terrakio_request(
+            "POST",
+            f"collections/{collection}/post_process",
+            data=form  # ✅ Pass FormData as data
+        )
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise GetTaskError(f"Post processing failed with status {status}", status_code=status)
-        response = await self._client._regular_request("PUT", url, data=body, headers=headers)
         return response
     @require_api_key
-    async def download_file(self,
-                        job_name: str,
-                        bucket: str,
-                        file_type: str,
-                        output_path: str,
-                        folder: str = None,
-                        page_size: int = None,
-                      ) -> list:
-        """
-        Download a file from mass_stats using job name and file name.
-        Args:
-            job_name: Name of the job
-            download_all: Whether to download all raw files from the job
-            file_type: either 'raw' or 'processed'
-            current_page: Current page number for pagination
-            page_size: Number of file per page for download
-            output_path: Path where the file should be saved
-        Returns:
-            str: Path to the downloaded file
+    async def zonal_stats(
+        self,
+        collection: str,
+        id_property: str,
+        column_name: str,
+        expr: str,
+        resolution: Optional[int] = 1,
+        in_crs: Optional[str] = "epsg:4326",
+        out_crs: Optional[str] = "epsg:4326"
+    ) -> Dict[str, Any]:
         """
+        Run zonal stats over uploaded geojson collection.
+        Args:
+            collection: Name of collection
+            id_property: Property key in geojson to use as id
+            column_name: Name of new column to add
+            expr: Terrak.io expression to evaluate
+            resolution: Resolution of request (optional, defaults to 1)
+            in_crs: CRS of geojson (optional, defaults to "epsg:4326")
+            out_crs: Desired output CRS (optional, defaults to "epsg:4326")
-        if file_type not in ("raw", "processed"):
-            raise ValueError("file_type must be 'raw' or 'processed'.")
-        if file_type == "raw" and page_size is None:
-            raise ValueError("page_size is required to define pagination size when downloading raw files.")
+        Returns:
+            API response as a dictionary containing task information
-        request_body = {
-            "job_name": job_name,
-            "bucket": bucket,
-            "file_type": file_type,
-            "folder": folder
+        Raises:
+            CollectionNotFoundError: If the collection is not found
+            GetTaskError: If the API request fails due to unknown reasons
+        """
+        payload = {
+            "id_property": id_property,
+            "column_name": column_name,
+            "expr": expr,
+            "resolution": resolution,
+            "in_crs": in_crs,
+            "out_crs": out_crs
         }
+        response, status = await self._client._terrakio_request("POST", f"collections/{collection}/zonal_stats", json=payload)
-        output_dir = Path(output_path)
-        output_dir.mkdir(parents=True, exist_ok=True)
-        output_files = []
-        async def download_urls_batch(download_urls, session):
-            for i, url in enumerate(download_urls):
-                parsed = urlparse(url)
-                path_parts = Path(parsed.path).parts
-                try:
-                    data_idx = path_parts.index("data") if file_type == "raw" else path_parts.index("outputs")
-                    subpath = Path(*path_parts[data_idx + 1:])
-                except ValueError:
-                    subpath = Path(path_parts[-1])
-                file_save_path = output_dir / subpath
-                file_save_path.parent.mkdir(parents=True, exist_ok=True)
-                self._client.logger.info(f"Downloading file to {file_save_path} ({i+1}/{len(download_urls)})")
-                async with session.get(url) as resp:
-                    resp.raise_for_status()
-                    import aiofiles
-                    async with aiofiles.open(file_save_path, 'wb') as file:
-                        async for chunk in resp.content.iter_chunked(1048576):  # 1 MB
-                            if chunk:
-                                await file.write(chunk)
-                if not os.path.exists(file_save_path):
-                    raise Exception(f"File was not written to {file_save_path}")
-                file_size = os.path.getsize(file_save_path)
-                self._client.logger.info(f"File downloaded successfully to {file_save_path} (size: {file_size / (1024 * 1024):.4f} mb)")
-                output_files.append(str(file_save_path))
-        try:
-            page = 1
-            total_files = None
-            downloaded_files = 0
-            async with aiohttp.ClientSession() as session:
-                while True:
-                    params = {
-                        "page": page,
-                        "page_size": page_size
-                    }
-                    response = await self._client._terrakio_request("POST", "mass_stats/download_files", json=request_body, params=params)
-                    data = response
-                    download_urls = data.get('download_urls')
-                    if not download_urls:
-                        break
-                    await download_urls_batch(download_urls, session)
-                    if total_files is None:
-                        total_files = data.get('subdir_total_files')
-                    downloaded_files += len(download_urls)
-                    if total_files is not None and downloaded_files >= total_files:
-                        break
-                    if len(download_urls) < page_size:
-                        break  # Last page
-                    page += 1
-            return output_files
-        except Exception as e:
-            raise Exception(f"Error in download process: {e}")
-    def validate_request(self, request_json: Union[str, List[Dict]]):
-        # Handle both file path and direct JSON data
-        if isinstance(request_json, str):
-            # It's a file path
-            with open(request_json, 'r') as file:
-                request_data = json.load(file)
-        elif isinstance(request_json, list):
-            # It's already JSON data
-            request_data = request_json
-        else:
-            raise ValueError("request_json must be either a file path (str) or JSON data (list)")
-        # Rest of validation logic stays exactly the same
-        if not isinstance(request_data, list):
-            raise ValueError("Request JSON should contain a list of dictionaries")
-        for i, request in enumerate(request_data):
-            if not isinstance(request, dict):
-                raise ValueError(f"Request {i} should be a dictionary")
-            required_keys = ["request", "group", "file"]
-            for key in required_keys:
-                if key not in request:
-                    raise ValueError(f"Request {i} should contain {key}")
-            try:
-                str(request["group"])
-            except ValueError:
-                ValueError("Group must be string or convertible to string")
-            if not isinstance(request["request"], dict):
-                raise ValueError("Request must be a dictionary")
-            if not isinstance(request["file"], (str, int, list)):
-                raise ValueError("'file' must be a string or a list of strings")
-            if i == 3:
-                break
-    async def execute_job(
-        self,
-        name: str,
-        output: str,
-        config: Dict[str, Any],
-        request_json: Union[str, List[Dict]],  # ← Accept both file path OR data
-        region: str = None,
-        overwrite: bool = False,
-        skip_existing: bool = False,
-        location: str = None,
-        force_loc: bool = None,
-        server: str = None
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise GetTaskError(f"Zonal stats failed with status {status}", status_code=status)
+        return response
+    @require_api_key
+    async def zonal_stats_transform(
+        self,
+        collection: str,
+        consumer: str
     ) -> Dict[str, Any]:
         """
-        Execute a mass stats job.
+        Transform raw data in collection. Creates a new collection.
         Args:
-            name: The name of the job
-            output: The output of the job
-            config: The config of the job
-            request_json: Path to the request JSON file
-            overwrite: Whether to overwrite the job
-            skip_existing: Whether to skip existing jobs
-            location: The location of the job
-            force_loc: Whether to force the location
-            server: The server to use
+            collection: Name of collection
+            consumer: Post processing script (file path or script content)
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing task information
         Raises:
-            APIError: If the API request fails
+            CollectionNotFoundError: If the collection is not found
+            GetTaskError: If the API request fails due to unknown reasons
         """
+        if os.path.isfile(consumer):
+            with open(consumer, 'r') as f:
+                script_content = f.read()
+        else:
+            script_content = consumer
+        files = {
+            'consumer': ('script.py', script_content, 'text/plain')
+        }
-        def extract_manifest_from_request(request_data: List[Dict[str, Any]]) -> List[str]:
-            """Extract unique group names from request data to create manifest list."""
-            groups = []
-            seen_groups = set()
-            for item in request_data:
-                if not isinstance(item, dict):
-                    raise ValueError("Each item in request JSON should be a dictionary")
-                if 'group' not in item:
-                    raise ValueError("Each item should have a 'group' field")
-                group = item['group']
-                if group not in seen_groups:
-                    groups.append(group)
-                    seen_groups.add(group)
-            return groups
-        # # Load and validate request JSON
-        # try:
-        #     with open(request_json, 'r') as file:
-        #         request_data = json.load(file)
-        #         if isinstance(request_data, list):
-        #             size = len(request_data)
-        #         else:
-        #             raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
-        # except FileNotFoundError as e:
-        #     return e
-        # except json.JSONDecodeError as e:
-        #     return e
-        try:
-            if isinstance(request_json, str):
-                # It's a file path
-                with open(request_json, 'r') as file:
-                    request_data = json.load(file)
-            elif isinstance(request_json, list):
-                # It's already JSON data
-                request_data = request_json
-            else:
-                raise ValueError("request_json must be either a file path (str) or JSON data (list)")
-            if isinstance(request_data, list):
-                size = len(request_data)
-            else:
-                raise ValueError("Request JSON should contain a list of dictionaries")
-        except FileNotFoundError as e:
-            return e
-        except json.JSONDecodeError as e:
-            return e
-        # Generate manifest from request data (kept in memory)
-        try:
-            manifest_groups = extract_manifest_from_request(request_data)
-        except Exception as e:
-            raise ValueError(f"Error extracting manifest from request JSON: {e}")
-        # Extract the first expression
-        first_request = request_data[0]  # Changed from data[0] to request_data[0]
-        first_expression = first_request["request"]["expr"]
-        # Get upload URLs
-        upload_result = await self._upload_request(
-            name=name,
-            size=size,
-            region=region,
-            sample = first_expression,
-            output=output,
-            config=config,
-            location=location,
-            force_loc=force_loc,
-            overwrite=overwrite,
-            server=server,
-            skip_existing=skip_existing
+        response, status = await self._client._terrakio_request(
+            "POST",
+            f"collections/{collection}/transform",
+            files=files
         )
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise GetTaskError(f"Transform failed with status {status}", status_code=status)
-        requests_url = upload_result.get('requests_url')
-        manifest_url = upload_result.get('manifest_url')
-        if not requests_url:
-            raise ValueError("No requests_url returned from server for request JSON upload")
-        # Upload request JSON file
-        try:
-            self.validate_request(request_json)
-            if isinstance(request_json, str):
-                # File path - use existing _upload_file method
-                requests_response = await self._upload_file(request_json, requests_url, use_gzip=True)
-            else:
-                # JSON data - use _upload_json_data method
-                requests_response = await self._upload_json_data(request_json, requests_url, use_gzip=True)
-            if requests_response.status not in [200, 201, 204]:
-                # ... rest stays the same
-                self._client.logger.error(f"Requests upload error: {requests_response.text()}")
-                raise Exception(f"Failed to upload request JSON: {requests_response.text()}")
-        except Exception as e:
-            raise Exception(f"Error uploading request JSON file {request_json}: {e}")
-        if not manifest_url:
-            raise ValueError("No manifest_url returned from server for manifest JSON upload")
-        # Upload manifest JSON data directly (no temporary file needed)
-        try:
-            manifest_response = await self._upload_json_data(manifest_groups, manifest_url, use_gzip=False)
-            if manifest_response.status not in [200, 201, 204]:
-                self._client.logger.error(f"Manifest upload error: {manifest_response.text()}")
-                raise Exception(f"Failed to upload manifest JSON: {manifest_response.text()}")
-        except Exception as e:
-            raise Exception(f"Error uploading manifest JSON: {e}")
-        # Start the job
-        start_job_task_id = await self.start_job(upload_result.get("id"))
-        return start_job_task_id
+        return response
     @require_api_key
-    def cancel_job(self, id: str) -> Dict[str, Any]:
+    async def download_files(
+        self,
+        collection: str,
+        file_type: str,
+        page: Optional[int] = 0,
+        page_size: Optional[int] = 100,
+        folder: Optional[str] = None
+    ) -> Dict[str, Any]:
         """
-        Cancel a mass stats job by ID.
+        Get list of signed urls to download files in collection.
         Args:
-            id: The ID of the mass stats job to cancel
+            collection: Name of collection
+            file_type: Whether to return raw or processed (after post processing) files
+            page: Page number (optional, defaults to 0)
+            page_size: Number of files to return per page (optional, defaults to 100)
+            folder: If processed file type, which folder to download files from (optional)
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing list of download URLs
         Raises:
-            APIError: If the API request fails
+            CollectionNotFoundError: If the collection is not found
+            DownloadFilesError: If the API request fails due to unknown reasons
         """
-        return self._client._terrakio_request("POST", f"mass_stats/cancel/{id}")
-    @require_api_key
-    def cancel_all_jobs(self) -> Dict[str, Any]:
-        """
-        Cancel all mass stats jobs.
+        params = {"file_type": file_type}
-        Returns:
-            API response as a dictionary
-        Raises:
-            APIError: If the API request fails
-        """
-        return self._client._terrakio_request("POST", "mass_stats/cancel")
+        if page is not None:
+            params["page"] = page
+        if page_size is not None:
+            params["page_size"] = page_size
+        if folder is not None:
+            params["folder"] = folder
+        response, status = await self._client._terrakio_request("GET", f"collections/{collection}/download", params=params)
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise DownloadFilesError(f"Download files failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    async def random_sample(
+    async def cancel_task(
         self,
-        name: str,
-        config: dict,
-        aoi: dict,
-        samples: int,
-        crs: str,
-        tile_size: int,
-        res: float,
-        output: str,
-        year_range: list[int] = None,
-        overwrite: bool = False,
-        server: str = None,
-        bucket: str = None
-    ) -> Dict[str, Any]:
+        task_id: str
+    ):
         """
-        Submit a random sample job.
+        Cancel a task by task ID.
         Args:
-            name: The name of the job
-            config: The config of the job
-            aoi: The AOI of the job
-            samples: The number of samples to take
-            crs: The CRS of the job
-            tile_size: The tile size of the job
-            res: The resolution of the job
-            output: The output of the job
-            year_range: The year range of the job
-            overwrite: Whether to overwrite the job
-            server: The server to use
-            bucket: The bucket to use
+            task_id: ID of task to cancel
         Returns:
-            API response as a dictionary
-        Raises:
-            APIError: If the API request fails
+            API response as a dictionary containing task information
+        Raises:
+            TaskNotFoundError: If the task is not found
+            CancelTaskError: If the API request fails due to unknown reasons
         """
-        payload ={
-            "name": name,
-            "config": config,
-            "aoi": aoi,
-            "samples": samples,
-            "crs": crs,
-            "tile_size": tile_size,
-            "res": res,
-            "output": output,
-            "overwrite": str(overwrite).lower(),
-        }
-        payload_mapping = {
-            "year_range": year_range,
-            "server": server,
-            "bucket": bucket,
-        }
-        for key, value in payload_mapping.items():
-            if value is not None:
-                payload[key] = value
-        return await self._client._terrakio_request("POST", "random_sample", json=payload)
+        response, status = await self._client._terrakio_request("POST", f"tasks/cancel/{task_id}")
+        if status != 200:
+            if status == 404:
+                raise TaskNotFoundError(f"Task {task_id} not found", status_code=status)
+            raise CancelTaskError(f"Cancel task failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    def create_pyramids(self, name: str, levels: int, config: dict) -> Dict[str, Any]:
+    async def cancel_collection_tasks(
+        self,
+        collection: str
+    ):
         """
-        Create pyramids for a dataset.
+        Cancel all tasks for a collection.
         Args:
-            name: The name of the job
-            levels: The levels of the pyramids
-            config: The config of the job
+            collection: Name of collection
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing task information for the collection
+        Raises:
+            CollectionNotFoundError: If the collection is not found
+            CancelCollectionTasksError: If the API request fails due to unknown reasons
         """
-        payload = {
-            "name": name,
-            "levels": levels,
-            "config": config
-        }
-        return self._client._terrakio_request("POST", "pyramids/create", json=payload)
+        response, status = await self._client._terrakio_request("POST", f"collections/{collection}/cancel")
+        if status != 200:
+            if status == 404:
+                raise CollectionNotFoundError(f"Collection {collection} not found", status_code=status)
+            raise CancelCollectionTasksError(f"Cancel collection tasks failed with status {status}", status_code=status)
+        return response
     @require_api_key
-    async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf", max_file_size_mb = 5120) -> Dict[str, Any]:
+    async def cancel_all_tasks(
+        self
+    ):
         """
-        Combine tiles for a dataset.
-        Args:
-            data_name: The name of the dataset
-            overwrite: Whether to overwrite the dataset
-            output: The output of the dataset
+        Cancel all tasks for the current user.
         Returns:
-            API response as a dictionary
+            API response as a dictionary containing task information for all tasks
         Raises:
-            APIError: If the API request fails
+            CancelAllTasksError: If the API request fails due to unknown reasons
         """
-        payload = {
-            'data_name': data_name,
-            'folder': "file-gen",
-            'output': output,
-            'overwrite': str(overwrite).lower(),
-            'max_file_size_mb': max_file_size_mb
-        }
-        return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
+        response, status = await self._client._terrakio_request("POST", "tasks/cancel")
+        if status != 200:
+            raise CancelAllTasksError(f"Cancel all tasks failed with status {status}", status_code=status)
+        return response

terrakio-core 0.4.97__py3-none-any.whl → 0.4.98.1b1__py3-none-any.whl

Potentially problematic release.

terrakio-core 0.4.97py3-none-any.whl → 0.4.98.1b1py3-none-any.whl