PyPI - terrakio-core - Versions diffs - 0.4.6__tar.gz → 0.4.7__tar.gz - Mend

terrakio-core 0.4.6tar.gz → 0.4.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of terrakio-core might be problematic. Click here for more details.

Files changed (27) hide show

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: terrakio-core
-Version: 0.4.6
+Version: 0.4.7
 Summary: Core components for Terrakio API clients
 Author-email: Yupeng Chao <yupeng@haizea.com.au>
 Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
@@ -28,6 +28,7 @@ Requires-Dist: onnxruntime>=1.10.0
 Requires-Dist: psutil>=5.0.0
 Requires-Dist: h5netcdf>=1.0.0
 Requires-Dist: netcdf4>=1.5.0
+Requires-Dist: aiofiles>=24.1.0
 Provides-Extra: ml
 Requires-Dist: torch>=2.7.1; extra == "ml"
 Requires-Dist: scikit-learn>=1.7.0; extra == "ml"

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "terrakio-core"
-version = "0.4.6"
+version = "0.4.7"
 authors = [
     {name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
 ]
@@ -35,6 +35,7 @@ dependencies = [
     "psutil>=5.0.0",
     "h5netcdf>=1.0.0",
     "netcdf4>=1.5.0",
+    "aiofiles>=24.1.0"
 ]
 [project.optional-dependencies]

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core/__init__.py RENAMED Viewed

@@ -5,7 +5,7 @@ Terrakio Core
 Core components for Terrakio API clients.
 """
-__version__ = "0.4.6"
+__version__ = "0.4.7"
 from .async_client import AsyncClient
 from .sync_client import SyncClient as Client

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core/async_client.py RENAMED Viewed

@@ -235,6 +235,7 @@ class AsyncClient(BaseClient):
     async def create_dataset_file(
         self,
+        name: str,
         aoi: str,
         expression: str,
         output: str,
@@ -247,6 +248,9 @@ class AsyncClient(BaseClient):
         non_interactive: bool = True,
         poll_interval: int = 30,
         download_path: str = "/home/user/Downloads",
+        mask = True,
+        max_file_size_mb: int = 5120,  # Default to 5GB
+        tile_size: int = 1024,
     ) -> dict:
         """
         Create a dataset file using mass stats operations.
@@ -286,6 +290,10 @@ class AsyncClient(BaseClient):
             non_interactive=non_interactive,
             poll_interval=poll_interval,
             download_path=download_path,
+            name=name,
+            mask=mask,
+            max_file_size_mb=max_file_size_mb,
+            tile_size=tile_size
         )
     async def geo_queries(

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core/convenience_functions/convenience_functions.py RENAMED Viewed

@@ -438,15 +438,7 @@ async def handle_mass_stats(
     id_column: Optional[str] = None,
 ):
-    # we have the handle mass stats function, we need to have the list of quries, and we need to pass the quries to the mass stats function
-    # we have the three different variables
-    # Check if id_column is provided
-    # if id_column is None:
-        # Handle case where no ID column is specified
-        # this means that the id column is none, so we could just use the default value of 1 2 3 4
     request_json = gdf_to_json(gdf = gdf, expr = expr, in_crs = in_crs, out_crs = out_crs, resolution = resolution, geom_fix = geom_fix, id_column = id_column)
-    # we need to call the execute job function
     job_id =await client.mass_stats.execute_job(
         name = "zonal_stats_job",
         output = "netcdf",
@@ -455,32 +447,7 @@ async def handle_mass_stats(
         overwrite = True,
     )
     return job_id
-# async def test_regular_async_mass_stats(regular_async_client):
-#     """Test mass statistics with regular client async"""
-#     start_result = await regular_async_client.mass_stats.execute_job(
-#         name="test_regular_mass_stats_test",
-#         region="aus",
-#         output="csv",
-#         config={},
-#         request_json = "./test_config.json",
-#         manifest_json = "./test_manifest.json",
-#         overwrite=True,
-#     )
-#     assert isinstance(start_result, dict)
-#     assert 'task_id' in start_result
-        # return
-    # else:
-    #     # Handle case where ID column is specified
-    #     # Verify the column exists in the GeoDataFrame
-    #     if id_column not in gdf.columns:
-    #         raise ValueError(f"ID column '{id_column}' not found in GeoDataFrame columns: {list(gdf.columns)}")
-    # pass
-    # the second case is that we have an id_column, we need to use the id_column to create the group name
-# we have the mass stats as one of the parameters, so that when a user wants a mass
-# for both cases we need to have the list of quries
 async def zonal_stats(
     client,
     gdf: GeoDataFrame,
@@ -506,7 +473,6 @@ async def zonal_stats(
             geom_fix = geom_fix,
             id_column = id_column
         )
-        # if we started the mass stats job, we need to return the job id
         return mass_stats_id
     quries = []
     for i in range(len(gdf)):
@@ -536,30 +502,35 @@ async def create_dataset_file(
     aoi: str,
     expression: str,
     output: str,
+    download_path: str,
     in_crs: str = "epsg:4326",
-    res: float = 0.0001,
-    region: str = "aus",
     to_crs: str = "epsg:4326",
-    overwrite: bool = True,
+    res: float = 0.0001,
+    region: str = None,
+    overwrite: bool = False,
     skip_existing: bool = False,
     non_interactive: bool = True,
+    name: str | None = None,
     poll_interval: int = 30,
-    download_path: str = "/home/user/Downloads",
+    max_file_size_mb: int = 5120,
+    tile_size: int = 1024,
+    mask: bool = True
 ) -> dict:
-    name = f"tiles-{uuid.uuid4().hex[:8]}"
+    if not name:
+        name = f"file-gen-{uuid.uuid4().hex[:8]}"
     body, reqs, groups = tiles(
         name = name,
         aoi = aoi,
         expression = expression,
         output = output,
-        tile_size = 128,
+        tile_size = tile_size,
         crs = in_crs,
         res = res,
         region = region,
         to_crs = to_crs,
-        fully_cover = True,
+        mask = mask,
         overwrite = overwrite,
         skip_existing = skip_existing,
         non_interactive = non_interactive
@@ -567,9 +538,6 @@ async def create_dataset_file(
     with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
         tempreq.write(reqs)
         tempreqname = tempreq.name
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempmanifest:
-        tempmanifest.write(groups)
-        tempmanifestname = tempmanifest.name
     task_id = await client.mass_stats.execute_job(
         name=body["name"],
@@ -579,28 +547,24 @@ async def create_dataset_file(
         overwrite=body["overwrite"],
         skip_existing=body["skip_existing"],
         request_json=tempreqname,
-        manifest_json=tempmanifestname,
     )
     start_time = time.time()
     status = None
+    client.logger.info(f"Tracking data generation job {task_id['task_id']}...")
     while True:
         try:
             taskid = task_id['task_id']
             trackinfo = await client.mass_stats.track_job([taskid])
-            client.logger.info("the trackinfo is: ", trackinfo)
             status = trackinfo[taskid]['status']
             if status == 'Completed':
-                client.logger.info('Tiles generated successfully!')
+                client.logger.info('Data generated successfully!')
                 break
             elif status in ['Failed', 'Cancelled', 'Error']:
                 raise RuntimeError(f"Job {taskid} failed with status: {status}")
             else:
                 elapsed_time = time.time() - start_time
-                client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s", end='\r')
+                client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s")
                 await asyncio.sleep(poll_interval)
@@ -612,16 +576,15 @@ async def create_dataset_file(
             raise
     os.unlink(tempreqname)
-    os.unlink(tempmanifestname)
-    combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"])
+    combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"], max_file_size_mb=max_file_size_mb)
     combine_task_id = combine_result.get("task_id")
     combine_start_time = time.time()
+    client.logger.info(f"Tracking file generation job {combine_task_id}...")
     while True:
         try:
             trackinfo = await client.mass_stats.track_job([combine_task_id])
-            client.logger.info('client create dataset file track info:', trackinfo)
             if body["output"] == "netcdf":
                 download_file_name = trackinfo[combine_task_id]['folder'] + '.nc'
             elif body["output"] == "geotiff":
@@ -629,19 +592,19 @@ async def create_dataset_file(
             bucket = trackinfo[combine_task_id]['bucket']
             combine_status = trackinfo[combine_task_id]['status']
             if combine_status == 'Completed':
-                client.logger.info('Tiles combined successfully!')
+                client.logger.info('File/s generated successfully!')
                 break
             elif combine_status in ['Failed', 'Cancelled', 'Error']:
-                raise RuntimeError(f"Combine job {combine_task_id} failed with status: {combine_status}")
+                raise RuntimeError(f"File generation job {combine_task_id} failed with status: {combine_status}")
             else:
                 elapsed_time = time.time() - combine_start_time
-                client.logger.info(f"Combine job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s", end='\r')
+                client.logger.info(f"File generation job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s")
                 time.sleep(poll_interval)
         except KeyboardInterrupt:
-            client.logger.info(f"\nInterrupted! Combine job {combine_task_id} is still running in the background.")
+            client.logger.info(f"\nInterrupted! File generation job {combine_task_id} is still running in the background.")
             raise
         except Exception as e:
-            client.logger.info(f"\nError tracking combine job: {e}")
+            client.logger.info(f"\nError tracking file generation job: {e}")
             raise
     if download_path:
@@ -649,11 +612,12 @@ async def create_dataset_file(
             job_name=body["name"],
             bucket=bucket,
             file_type='processed',
-            page_size=10,
+            folder='file-gen',
+            page_size=100,
             output_path=download_path,
         )
     else:
         path = f"{body['name']}/outputs/merged/{download_file_name}"
-        client.logger.info(f"Combined file is available at {path}")
+        client.logger.info(f"Dataset file/s is available at {path}")
     return {"generation_task_id": task_id, "combine_task_id": combine_task_id}

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core/endpoints/mass_stats.py RENAMED Viewed

@@ -7,6 +7,13 @@ from urllib.parse import urlparse
 from ..helper.decorators import require_token, require_api_key, require_auth
 import aiohttp
 from typing import Dict, Any, Optional, List, Union
+import asyncio
+import xarray as xr
+from io import BytesIO
+import geopandas as gpd
+from shapely.geometry import shape
+from ..convenience_functions.convenience_functions import expand_on_variables_and_time
 class MassStats:
     def __init__(self, client):
         self._client = client
@@ -19,6 +26,7 @@ class MassStats:
         sample: str,
         output: str,
         config: Dict[str, Any],
+        region: str = None,
         overwrite: bool = False,
         skip_existing: bool = False,
         location: Optional[str] = None,
@@ -55,7 +63,8 @@ class MassStats:
             "config": config,
             "overwrite": overwrite,
             "skip_existing": skip_existing,
-            "server": server
+            "server": server,
+            "region": region
         }
         payload_mapping = {
             "location": location,
@@ -66,7 +75,6 @@ class MassStats:
                 payload[key] = str(value).lower()
         return await self._client._terrakio_request("POST", "mass_stats/upload", json=payload)
     @require_api_key
     async def start_job(self, id: str) -> Dict[str, Any]:
         """
@@ -276,6 +284,7 @@ class MassStats:
                         bucket: str,
                         file_type: str,
                         output_path: str,
+                        folder: str = None,
                         page_size: int = None,
                       ) -> list:
         """
@@ -303,7 +312,8 @@ class MassStats:
         request_body = {
             "job_name": job_name,
             "bucket": bucket,
-            "file_type": file_type
+            "file_type": file_type,
+            "folder": folder
         }
         output_dir = Path(output_path)
@@ -311,8 +321,7 @@ class MassStats:
         output_files = []
         async def download_urls_batch(download_urls, session):
-            for url in download_urls:
-                self._client.logger.info(f"Processing download URL: {url}")
+            for i, url in enumerate(download_urls):
                 parsed = urlparse(url)
                 path_parts = Path(parsed.path).parts
                 try:
@@ -322,13 +331,13 @@ class MassStats:
                     subpath = Path(path_parts[-1])
                 file_save_path = output_dir / subpath
                 file_save_path.parent.mkdir(parents=True, exist_ok=True)
-                self._client.logger.info(f"Downloading file to {file_save_path}")
+                self._client.logger.info(f"Downloading file to {file_save_path} ({i+1}/{len(download_urls)})")
                 async with session.get(url) as resp:
                     resp.raise_for_status()
                     import aiofiles
                     async with aiofiles.open(file_save_path, 'wb') as file:
-                        async for chunk in resp.content.iter_chunked(1048576):
+                        async for chunk in resp.content.iter_chunked(1048576):  # 1 MB
                             if chunk:
                                 await file.write(chunk)
@@ -352,7 +361,6 @@ class MassStats:
                     response = await self._client._terrakio_request("POST", "mass_stats/download_files", json=request_body, params=params)
                     data = response
-                    self._client.logger.info(f'processed, endpoint response is {data}')
                     download_urls = data.get('download_urls')
                     if not download_urls:
                         break
@@ -363,7 +371,7 @@ class MassStats:
                     if total_files is not None and downloaded_files >= total_files:
                         break
                     if len(download_urls) < page_size:
-                        break
+                        break  # Last page
                     page += 1
             return output_files
         except Exception as e:
@@ -392,13 +400,13 @@ class MassStats:
             if i == 3:
                 break
-    @require_api_key
     async def execute_job(
         self,
         name: str,
         output: str,
         config: Dict[str, Any],
-        request_json: Union[str, list[Dict[str, Any]]],
+        request_json: str,  # Path to request JSON file
+        region: str = None,
         overwrite: bool = False,
         skip_existing: bool = False,
         location: str = None,
@@ -425,6 +433,7 @@ class MassStats:
         Raises:
             APIError: If the API request fails
         """
         def extract_manifest_from_request(request_data: List[Dict[str, Any]]) -> List[str]:
             """Extract unique group names from request data to create manifest list."""
             groups = []
@@ -444,35 +453,34 @@ class MassStats:
             return groups
-        if isinstance(request_json, str):
-            try:
-                with open(request_json, 'r') as file:
-                    request_data = json.load(file)
-                    if isinstance(request_data, list):
-                        size = len(request_data)
-                    else:
-                        raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
-            except FileNotFoundError as e:
-                return e
-            except json.JSONDecodeError as e:
-                return e
-            request_json_path = request_json
-        else:
-            request_data = request_json
-            size = len(request_data)
-            request_json_path = None
+        # Load and validate request JSON
+        try:
+            with open(request_json, 'r') as file:
+                request_data = json.load(file)
+                if isinstance(request_data, list):
+                    size = len(request_data)
+                else:
+                    raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
+        except FileNotFoundError as e:
+            return e
+        except json.JSONDecodeError as e:
+            return e
+        # Generate manifest from request data (kept in memory)
         try:
             manifest_groups = extract_manifest_from_request(request_data)
         except Exception as e:
             raise ValueError(f"Error extracting manifest from request JSON: {e}")
-        first_request = request_data[0]
+        # Extract the first expression
+        first_request = request_data[0]  # Changed from data[0] to request_data[0]
         first_expression = first_request["request"]["expr"]
+        # Get upload URLs
         upload_result = await self._upload_request(
             name=name,
             size=size,
+            region=region,
             sample = first_expression,
             output=output,
             config=config,
@@ -488,21 +496,21 @@ class MassStats:
         if not requests_url:
             raise ValueError("No requests_url returned from server for request JSON upload")
+        # Upload request JSON file
         try:
-            if request_json_path:
-                self.validate_request(request_json_path)
-                requests_response = await self._upload_file(request_json_path, requests_url, use_gzip=True)
-            else:
-                requests_response = await self._upload_json_data(request_data, requests_url, use_gzip=True)
+            self.validate_request(request_json)
+            requests_response = await self._upload_file(request_json, requests_url, use_gzip=True)
             if requests_response.status not in [200, 201, 204]:
                 self._client.logger.error(f"Requests upload error: {requests_response.text()}")
-                raise Exception(f"Failed to upload request data: {requests_response.text()}")
+                raise Exception(f"Failed to upload request JSON: {requests_response.text()}")
         except Exception as e:
             raise Exception(f"Error uploading request JSON file {request_json}: {e}")
         if not manifest_url:
             raise ValueError("No manifest_url returned from server for manifest JSON upload")
+        # Upload manifest JSON data directly (no temporary file needed)
         try:
             manifest_response = await self._upload_json_data(manifest_groups, manifest_url, use_gzip=False)
             if manifest_response.status not in [200, 201, 204]:
@@ -511,6 +519,7 @@ class MassStats:
         except Exception as e:
             raise Exception(f"Error uploading manifest JSON: {e}")
+        # Start the job
         start_job_task_id = await self.start_job(upload_result.get("id"))
         return start_job_task_id
@@ -625,7 +634,7 @@ class MassStats:
         return self._client._terrakio_request("POST", "pyramids/create", json=payload)
     @require_api_key
-    async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf") -> Dict[str, Any]:
+    async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf", max_file_size_mb = 5120) -> Dict[str, Any]:
         """
         Combine tiles for a dataset.
@@ -642,7 +651,130 @@ class MassStats:
         """
         payload = {
             'data_name': data_name,
+            'folder': "file-gen",
             'output': output,
-            'overwrite': str(overwrite).lower()
+            'overwrite': str(overwrite).lower(),
+            'max_file_size_mb': max_file_size_mb
         }
-        return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
+        return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
+    @require_api_key
+    async def load_zonal_stats(self, job_id: str, max_files: int = 5, poll_interval: int = 30):
+        """
+        Load zonal stats results from a completed mass stats job.
+        Args:
+            job_id: The job ID returned from the mass stats execution
+            max_files: Maximum number of files to download (default: 5)
+            poll_interval: Seconds to wait between status checks (default: 30)
+        Returns:
+            GeoDataFrame with geometry and dataset columns, or None if failed
+        """
+        try:
+            while True:
+                try:
+                    track_info = await self.track_job([job_id])
+                    job_info = track_info[job_id]
+                    status = job_info['status']
+                    self._client.logger.info(f"Job {job_id} status: {status}")
+                    if status == 'Completed':
+                        self._client.logger.info('Job completed successfully!')
+                        break
+                    elif status in ['Failed', 'Cancelled', 'Error']:
+                        raise RuntimeError(f"Job {job_id} failed with status: {status}")
+                    await asyncio.sleep(poll_interval)
+                except KeyboardInterrupt:
+                    self._client.logger.info(f"\nInterrupted! Job {job_id} is still running.")
+                    raise
+            async with aiohttp.ClientSession() as session:
+                payload = {
+                    "job_name": job_info['name'],
+                    "file_type": "raw",
+                    "bucket": job_info['bucket']
+                }
+                result = await self._client._terrakio_request("POST", "mass_stats/download_files", json=payload)
+                download_urls = result['download_urls'][:max_files]
+                self._client.logger.info(f"Downloading {len(download_urls)} dataset files...")
+                datasets = []
+                for i, url in enumerate(download_urls):
+                    try:
+                        self._client.logger.info(f"Downloading dataset {i+1}/{len(download_urls)}...")
+                        async with session.get(url) as response:
+                            if response.status == 200:
+                                content = await response.read()
+                                dataset = xr.open_dataset(BytesIO(content))
+                                datasets.append(dataset)
+                                self._client.logger.info(f"Successfully processed dataset {i+1}")
+                            else:
+                                self._client.logger.warning(f"Failed to download dataset {i+1}: HTTP {response.status}")
+                    except Exception as e:
+                        self._client.logger.error(f"Error downloading dataset {i+1}: {e}")
+                        continue
+                if not datasets:
+                    self._client.logger.warning("No datasets were successfully downloaded")
+                    return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
+                try:
+                    json_response = await self._client._terrakio_request(
+                        "POST", "mass_stats/download_json",
+                        params={"job_name": job_info['name']}
+                    )
+                    json_url = json_response["download_url"]
+                    async with session.get(json_url) as response:
+                        if response.status == 200:
+                            json_data = await response.json()
+                            self._client.logger.info("Successfully downloaded geometry data")
+                            geometries = []
+                            max_geometries = min(max_files, len(json_data), len(datasets))
+                            for i in range(max_geometries):
+                                try:
+                                    geom_dict = json_data[i]["request"]["feature"]["geometry"]
+                                    shapely_geom = shape(geom_dict)
+                                    geometries.append(shapely_geom)
+                                except (KeyError, ValueError) as e:
+                                    self._client.logger.warning(f"Error parsing geometry {i}: {e}")
+                                    continue
+                            min_length = min(len(datasets), len(geometries))
+                            if min_length == 0:
+                                self._client.logger.warning("No matching datasets and geometries found")
+                                return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
+                            gdf = gpd.GeoDataFrame({
+                                'geometry': geometries[:min_length],
+                                'dataset': datasets[:min_length]
+                            })
+                            self._client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
+                            try:
+                                expanded_gdf = expand_on_variables_and_time(gdf)
+                                return expanded_gdf
+                            except NameError:
+                                self._client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
+                                return gdf
+                        else:
+                            self._client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
+                            return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
+                except Exception as e:
+                    self._client.logger.error(f"Error downloading geometry data: {e}")
+                    return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
+        except Exception as e:
+            self._client.logger.error(f"Failed to load zonal stats for job {job_id}: {e}")
+            return None

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core/helper/tiles.py RENAMED Viewed

@@ -16,20 +16,21 @@ def get_bounds(aoi, crs, to_crs = None):
     bounds = aoi.geometry[0].bounds
     return *bounds, aoi
-def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output, fully_cover=True):
-    i_max = int((x_max-x_min)/(tile_size*res))
-    j_max = int((y_max-y_min)/(tile_size*res))
-    if fully_cover:
-        i_max += 1
-        j_max += 1
+def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output, mask):
+    i_max = int((x_max-x_min)/(tile_size*res)) + 1
+    j_max = int((y_max-y_min)/(tile_size*res)) + 1
     for j in range(0, int(j_max)):
         for i in range(0, int(i_max)):
             x = x_min + i*(tile_size*res)
             y = y_max - j*(tile_size*res)
-            bbox = shapely.geometry.box(x, y-(tile_size*res), x + (tile_size*res), y)
-            if not aoi.geometry[0].intersects(bbox):
+            geom = shapely.geometry.box(x, y-(tile_size*res), x + (tile_size*res), y)
+            if not aoi.geometry[0].intersects(geom):
                 continue
-            feat  = {"type": "Feature", "geometry": bbox.__geo_interface__}
+            if mask:
+                geom = geom.intersection(aoi.geometry[0])
+                if geom.is_empty:
+                    continue
+            feat  = {"type": "Feature", "geometry": geom.__geo_interface__}
             data = {
                 "feature": feat,
                 "in_crs": crs,
@@ -46,15 +47,15 @@ def tiles(
     aoi : str,
     expression: str = "red=S2v2#(year,median).red@(year =2024) \n red",
     output: str = "netcdf",
-    tile_size : float = 512,
+    tile_size : float = 1024,
     crs : str = "epsg:3577",
     res: float = 10,
     region : str = "eu",
     to_crs: str = None,
-    fully_cover: bool = True,
     overwrite: bool = False,
     skip_existing: bool = False,
     non_interactive: bool = False,
+    mask: bool = True,
 ):
     reqs = []
@@ -62,7 +63,7 @@ def tiles(
     if to_crs is None:
         to_crs = crs
-    for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output, fully_cover):
+    for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output, mask):
         req_name = f"{name}_{i:02d}_{j:02d}"
         reqs.append({"group": "tiles", "file": req_name, "request": tile_req})

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core/sync_client.py RENAMED Viewed

@@ -643,6 +643,7 @@ class SyncClient:
     def create_dataset_file(
         self,
+        name: str,
         aoi: str,
         expression: str,
         output: str,
@@ -655,6 +656,9 @@ class SyncClient:
         non_interactive: bool = True,
         poll_interval: int = 30,
         download_path: str = "/home/user/Downloads",
+        mask = True,
+        max_file_size_mb: int = 5120,  # Default to 5GB
+        tile_size: int = 1024,
     ) -> dict:
         """Create a dataset file using mass stats operations (synchronous version)."""
         coro = self._async_client.create_dataset_file(
@@ -670,9 +674,14 @@ class SyncClient:
             non_interactive=non_interactive,
             poll_interval=poll_interval,
             download_path=download_path,
+            name=name,
+            mask=mask,
+            max_file_size_mb=max_file_size_mb,
+            tile_size=tile_size
         )
         return self._run_async(coro)
     def geo_queries(
         self,
         queries: list[dict],

{terrakio_core-0.4.6 → terrakio_core-0.4.7}/terrakio_core.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: terrakio-core
-Version: 0.4.6
+Version: 0.4.7
 Summary: Core components for Terrakio API clients
 Author-email: Yupeng Chao <yupeng@haizea.com.au>
 Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
@@ -28,6 +28,7 @@ Requires-Dist: onnxruntime>=1.10.0
 Requires-Dist: psutil>=5.0.0
 Requires-Dist: h5netcdf>=1.0.0
 Requires-Dist: netcdf4>=1.5.0
+Requires-Dist: aiofiles>=24.1.0
 Provides-Extra: ml
 Requires-Dist: torch>=2.7.1; extra == "ml"
 Requires-Dist: scikit-learn>=1.7.0; extra == "ml"