terrakio-core 0.4.5__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of terrakio-core might be problematic. Click here for more details.

Files changed (27) hide show
  1. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/PKG-INFO +4 -1
  2. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/pyproject.toml +4 -1
  3. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/__init__.py +1 -1
  4. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/async_client.py +6 -0
  5. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/convenience_functions/convenience_functions.py +146 -8
  6. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/mass_stats.py +28 -33
  7. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/sync_client.py +193 -140
  8. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/PKG-INFO +4 -1
  9. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/requires.txt +3 -0
  10. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/README.md +0 -0
  11. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/setup.cfg +0 -0
  12. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/accessors.py +0 -0
  13. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/client.py +0 -0
  14. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/config.py +0 -0
  15. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/auth.py +0 -0
  16. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/dataset_management.py +0 -0
  17. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/group_management.py +0 -0
  18. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/model_management.py +0 -0
  19. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/space_management.py +0 -0
  20. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/user_management.py +0 -0
  21. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/exceptions.py +0 -0
  22. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/helper/bounded_taskgroup.py +0 -0
  23. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/helper/decorators.py +0 -0
  24. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/helper/tiles.py +0 -0
  25. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/SOURCES.txt +0 -0
  26. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/dependency_links.txt +0 -0
  27. {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: terrakio-core
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Summary: Core components for Terrakio API clients
5
5
  Author-email: Yupeng Chao <yupeng@haizea.com.au>
6
6
  Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
@@ -25,6 +25,9 @@ Requires-Dist: google-cloud-storage>=2.0.0
25
25
  Requires-Dist: scipy>=1.7.0
26
26
  Requires-Dist: nest_asyncio
27
27
  Requires-Dist: onnxruntime>=1.10.0
28
+ Requires-Dist: psutil>=5.0.0
29
+ Requires-Dist: h5netcdf>=1.0.0
30
+ Requires-Dist: netcdf4>=1.5.0
28
31
  Provides-Extra: ml
29
32
  Requires-Dist: torch>=2.7.1; extra == "ml"
30
33
  Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "terrakio-core"
7
- version = "0.4.5"
7
+ version = "0.4.6"
8
8
  authors = [
9
9
  {name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
10
10
  ]
@@ -32,6 +32,9 @@ dependencies = [
32
32
  "scipy>=1.7.0",
33
33
  "nest_asyncio",
34
34
  "onnxruntime>=1.10.0",
35
+ "psutil>=5.0.0",
36
+ "h5netcdf>=1.0.0",
37
+ "netcdf4>=1.5.0",
35
38
  ]
36
39
 
37
40
  [project.optional-dependencies]
@@ -5,7 +5,7 @@ Terrakio Core
5
5
  Core components for Terrakio API clients.
6
6
  """
7
7
 
8
- __version__ = "0.4.5"
8
+ __version__ = "0.4.6"
9
9
 
10
10
  from .async_client import AsyncClient
11
11
  from .sync_client import SyncClient as Client
@@ -196,6 +196,8 @@ class AsyncClient(BaseClient):
196
196
  out_crs: str = "epsg:4326",
197
197
  resolution: int = -1,
198
198
  geom_fix: bool = False,
199
+ mass_stats: bool = False,
200
+ id_column: Optional[str] = None,
199
201
  ):
200
202
  """
201
203
  Compute zonal statistics for all geometries in a GeoDataFrame.
@@ -208,6 +210,8 @@ class AsyncClient(BaseClient):
208
210
  out_crs (str): Output coordinate reference system
209
211
  resolution (int): Resolution parameter
210
212
  geom_fix (bool): Whether to fix the geometry (default False)
213
+ mass_stats (bool): Whether to use mass stats for processing (default False)
214
+ id_column (Optional[str]): Name of the ID column to use (default None)
211
215
 
212
216
  Returns:
213
217
  geopandas.GeoDataFrame: GeoDataFrame with added columns for results
@@ -225,6 +229,8 @@ class AsyncClient(BaseClient):
225
229
  out_crs=out_crs,
226
230
  resolution=resolution,
227
231
  geom_fix=geom_fix,
232
+ mass_stats=mass_stats,
233
+ id_column=id_column,
228
234
  )
229
235
 
230
236
  async def create_dataset_file(
@@ -24,6 +24,8 @@ import pyproj
24
24
  import pandas as pd
25
25
  import geopandas as gpd
26
26
 
27
+ from typing import Optional
28
+
27
29
  def expand_on_time(gdf):
28
30
  """
29
31
  Expand datasets on time dimension - each time becomes a new row.
@@ -359,18 +361,153 @@ async def local_or_remote(
359
361
  "local_or_remote": "local",
360
362
  "reason": "The number of the requests is not too large, and the time taking for making these requests is not too long, and the size of the dataset is not too large",
361
363
  }
364
+
365
+ def gdf_to_json(
366
+ gdf: GeoDataFrame,
367
+ expr: str,
368
+ in_crs: str = "epsg:4326",
369
+ out_crs: str = "epsg:4326",
370
+ resolution: int = -1,
371
+ geom_fix: bool = False,
372
+ id_column: Optional[str] = None,
373
+ ):
374
+ """
375
+ Convert a GeoDataFrame to a list of JSON requests for mass_stats processing.
376
+
377
+ Args:
378
+ gdf: GeoDataFrame containing geometries and optional metadata
379
+ expr: Expression to evaluate
380
+ in_crs: Input coordinate reference system
381
+ out_crs: Output coordinate reference system
382
+ resolution: Resolution parameter
383
+ geom_fix: Whether to fix geometry issues
384
+ id_column: Optional column name to use for group and file names
385
+
386
+ Returns:
387
+ list: List of dictionaries formatted for mass_stats requests
388
+ """
389
+ mass_stats_requests = []
390
+
391
+ # Loop through each row in the GeoDataFrame
392
+ for idx, row in gdf.iterrows():
393
+ # Create the request feature
394
+ request_feature = {
395
+ "expr": expr,
396
+ "feature": {
397
+ "type": "Feature",
398
+ "geometry": mapping(gdf.geometry.iloc[idx]),
399
+ "properties": {}
400
+ },
401
+ "in_crs": in_crs,
402
+ "out_crs": out_crs,
403
+ "resolution": resolution,
404
+ "geom_fix": geom_fix,
405
+ }
406
+
407
+ # Determine group name and file name based on id_column
408
+ if id_column is not None and id_column in gdf.columns:
409
+ # Use the value from the specified column as group and file name
410
+ identifier = str(row[id_column])
411
+ group_name = f"group_{identifier}"
412
+ file_name = f"file_{identifier}"
413
+ else:
414
+ # Use the index as group and file name
415
+ group_name = f"group_{idx}"
416
+ file_name = f"file_{idx}"
417
+
418
+ # Create the complete request entry
419
+ request_entry = {
420
+ "group": group_name,
421
+ "file": file_name,
422
+ "request": request_feature,
423
+ }
424
+
425
+ # Add the request to our list
426
+ mass_stats_requests.append(request_entry)
427
+
428
+ return mass_stats_requests
429
+
430
+ async def handle_mass_stats(
431
+ client,
432
+ gdf: GeoDataFrame,
433
+ expr: str,
434
+ in_crs: str = "epsg:4326",
435
+ out_crs: str = "epsg:4326",
436
+ resolution: int = -1,
437
+ geom_fix: bool = False,
438
+ id_column: Optional[str] = None,
439
+
440
+ ):
441
+ # we have the handle mass stats function, we need to have the list of quries, and we need to pass the quries to the mass stats function
442
+ # we have the three different variables
443
+
444
+ # Check if id_column is provided
445
+ # if id_column is None:
446
+ # Handle case where no ID column is specified
447
+ # this means that the id column is none, so we could just use the default value of 1 2 3 4
448
+ request_json = gdf_to_json(gdf = gdf, expr = expr, in_crs = in_crs, out_crs = out_crs, resolution = resolution, geom_fix = geom_fix, id_column = id_column)
449
+ # we need to call the execute job function
450
+ job_id =await client.mass_stats.execute_job(
451
+ name = "zonal_stats_job",
452
+ output = "netcdf",
453
+ config = {},
454
+ request_json = request_json,
455
+ overwrite = True,
456
+ )
457
+ return job_id
458
+ # async def test_regular_async_mass_stats(regular_async_client):
459
+ # """Test mass statistics with regular client async"""
460
+ # start_result = await regular_async_client.mass_stats.execute_job(
461
+ # name="test_regular_mass_stats_test",
462
+ # region="aus",
463
+ # output="csv",
464
+ # config={},
465
+ # request_json = "./test_config.json",
466
+ # manifest_json = "./test_manifest.json",
467
+ # overwrite=True,
468
+ # )
469
+ # assert isinstance(start_result, dict)
470
+ # assert 'task_id' in start_result
471
+
472
+ # return
473
+ # else:
474
+ # # Handle case where ID column is specified
475
+ # # Verify the column exists in the GeoDataFrame
476
+
477
+ # if id_column not in gdf.columns:
478
+ # raise ValueError(f"ID column '{id_column}' not found in GeoDataFrame columns: {list(gdf.columns)}")
479
+ # pass
480
+ # the second case is that we have an id_column, we need to use the id_column to create the group name
362
481
 
482
+ # we have the mass stats as one of the parameters, so that when a user wants a mass
483
+ # for both cases we need to have the list of quries
363
484
  async def zonal_stats(
364
- client,
365
- gdf: GeoDataFrame,
366
- expr: str,
367
- conc: int = 20,
368
- in_crs: str = "epsg:4326",
369
- out_crs: str = "epsg:4326",
370
- resolution: int = -1,
371
- geom_fix: bool = False,
485
+ client,
486
+ gdf: GeoDataFrame,
487
+ expr: str,
488
+ conc: int = 20,
489
+ in_crs: str = "epsg:4326",
490
+ out_crs: str = "epsg:4326",
491
+ resolution: int = -1,
492
+ geom_fix: bool = False,
493
+ mass_stats: bool = False,
494
+ id_column: Optional[str] = None,
372
495
  ):
373
496
  """Compute zonal statistics for all geometries in a GeoDataFrame."""
497
+
498
+ if mass_stats:
499
+ mass_stats_id = await handle_mass_stats(
500
+ client = client,
501
+ gdf = gdf,
502
+ expr = expr,
503
+ in_crs = in_crs,
504
+ out_crs = out_crs,
505
+ resolution = resolution,
506
+ geom_fix = geom_fix,
507
+ id_column = id_column
508
+ )
509
+ # if we started the mass stats job, we need to return the job id
510
+ return mass_stats_id
374
511
  quries = []
375
512
  for i in range(len(gdf)):
376
513
  quries.append({
@@ -385,6 +522,7 @@ async def zonal_stats(
385
522
  "resolution": resolution,
386
523
  "geom_fix": geom_fix,
387
524
  })
525
+
388
526
  local_or_remote_result = await local_or_remote(client= client, quries = quries)
389
527
  if local_or_remote_result["local_or_remote"] == "remote":
390
528
  raise ValueError(local_or_remote_result["reason"])
@@ -328,7 +328,7 @@ class MassStats:
328
328
  resp.raise_for_status()
329
329
  import aiofiles
330
330
  async with aiofiles.open(file_save_path, 'wb') as file:
331
- async for chunk in resp.content.iter_chunked(1048576): # 1 MB
331
+ async for chunk in resp.content.iter_chunked(1048576):
332
332
  if chunk:
333
333
  await file.write(chunk)
334
334
 
@@ -363,7 +363,7 @@ class MassStats:
363
363
  if total_files is not None and downloaded_files >= total_files:
364
364
  break
365
365
  if len(download_urls) < page_size:
366
- break # Last page
366
+ break
367
367
  page += 1
368
368
  return output_files
369
369
  except Exception as e:
@@ -389,7 +389,6 @@ class MassStats:
389
389
  raise ValueError("Request must be a dictionary")
390
390
  if not isinstance(request["file"], (str, int, list)):
391
391
  raise ValueError("'file' must be a string or a list of strings")
392
- # Only check the first 3 requests
393
392
  if i == 3:
394
393
  break
395
394
 
@@ -397,10 +396,9 @@ class MassStats:
397
396
  async def execute_job(
398
397
  self,
399
398
  name: str,
400
- # region: str,
401
399
  output: str,
402
400
  config: Dict[str, Any],
403
- request_json: str, # Path to request JSON file
401
+ request_json: Union[str, list[Dict[str, Any]]],
404
402
  overwrite: bool = False,
405
403
  skip_existing: bool = False,
406
404
  location: str = None,
@@ -427,7 +425,6 @@ class MassStats:
427
425
  Raises:
428
426
  APIError: If the API request fails
429
427
  """
430
-
431
428
  def extract_manifest_from_request(request_data: List[Dict[str, Any]]) -> List[str]:
432
429
  """Extract unique group names from request data to create manifest list."""
433
430
  groups = []
@@ -447,34 +444,35 @@ class MassStats:
447
444
 
448
445
  return groups
449
446
 
450
- # Load and validate request JSON
451
- try:
452
- with open(request_json, 'r') as file:
453
- request_data = json.load(file)
454
- if isinstance(request_data, list):
455
- size = len(request_data)
456
- else:
457
- raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
458
- except FileNotFoundError as e:
459
- return e
460
- except json.JSONDecodeError as e:
461
- return e
447
+ if isinstance(request_json, str):
448
+ try:
449
+ with open(request_json, 'r') as file:
450
+ request_data = json.load(file)
451
+ if isinstance(request_data, list):
452
+ size = len(request_data)
453
+ else:
454
+ raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
455
+ except FileNotFoundError as e:
456
+ return e
457
+ except json.JSONDecodeError as e:
458
+ return e
459
+ request_json_path = request_json
460
+ else:
461
+ request_data = request_json
462
+ size = len(request_data)
463
+ request_json_path = None
462
464
 
463
- # Generate manifest from request data (kept in memory)
464
465
  try:
465
466
  manifest_groups = extract_manifest_from_request(request_data)
466
467
  except Exception as e:
467
468
  raise ValueError(f"Error extracting manifest from request JSON: {e}")
468
469
 
469
- # Extract the first expression
470
- first_request = request_data[0] # Changed from data[0] to request_data[0]
470
+ first_request = request_data[0]
471
471
  first_expression = first_request["request"]["expr"]
472
472
 
473
- # Get upload URLs
474
473
  upload_result = await self._upload_request(
475
474
  name=name,
476
475
  size=size,
477
- # region=region,
478
476
  sample = first_expression,
479
477
  output=output,
480
478
  config=config,
@@ -490,21 +488,21 @@ class MassStats:
490
488
 
491
489
  if not requests_url:
492
490
  raise ValueError("No requests_url returned from server for request JSON upload")
493
-
494
- # Upload request JSON file
495
491
  try:
496
- self.validate_request(request_json)
497
- requests_response = await self._upload_file(request_json, requests_url, use_gzip=True)
492
+ if request_json_path:
493
+ self.validate_request(request_json_path)
494
+ requests_response = await self._upload_file(request_json_path, requests_url, use_gzip=True)
495
+ else:
496
+ requests_response = await self._upload_json_data(request_data, requests_url, use_gzip=True)
498
497
  if requests_response.status not in [200, 201, 204]:
499
498
  self._client.logger.error(f"Requests upload error: {requests_response.text()}")
500
- raise Exception(f"Failed to upload request JSON: {requests_response.text()}")
499
+ raise Exception(f"Failed to upload request data: {requests_response.text()}")
501
500
  except Exception as e:
502
501
  raise Exception(f"Error uploading request JSON file {request_json}: {e}")
503
-
502
+
504
503
  if not manifest_url:
505
504
  raise ValueError("No manifest_url returned from server for manifest JSON upload")
506
505
 
507
- # Upload manifest JSON data directly (no temporary file needed)
508
506
  try:
509
507
  manifest_response = await self._upload_json_data(manifest_groups, manifest_url, use_gzip=False)
510
508
  if manifest_response.status not in [200, 201, 204]:
@@ -513,7 +511,6 @@ class MassStats:
513
511
  except Exception as e:
514
512
  raise Exception(f"Error uploading manifest JSON: {e}")
515
513
 
516
- # Start the job
517
514
  start_job_task_id = await self.start_job(upload_result.get("id"))
518
515
  return start_job_task_id
519
516
 
@@ -557,7 +554,6 @@ class MassStats:
557
554
  tile_size: int,
558
555
  res: float,
559
556
  output: str,
560
- # region: str,
561
557
  year_range: list[int] = None,
562
558
  overwrite: bool = False,
563
559
  server: str = None,
@@ -600,7 +596,6 @@ class MassStats:
600
596
  payload_mapping = {
601
597
  "year_range": year_range,
602
598
  "server": server,
603
- # "region": region,
604
599
  "bucket": bucket,
605
600
  }
606
601
  for key, value in payload_mapping.items():
@@ -339,38 +339,41 @@
339
339
 
340
340
 
341
341
  import asyncio
342
- import functools
343
342
  import concurrent.futures
344
- from typing import Optional, Dict, Any, Union
343
+ import threading
344
+ import functools
345
+ import inspect
346
+ from typing import Optional, Dict, Any, Union, TYPE_CHECKING
345
347
  from geopandas import GeoDataFrame
346
348
  from shapely.geometry.base import BaseGeometry as ShapelyGeometry
347
349
  from .async_client import AsyncClient
348
350
 
351
+ # Add type checking imports for better IDE support
352
+ if TYPE_CHECKING:
353
+ from .endpoints.dataset_management import DatasetManagement
354
+ from .endpoints.user_management import UserManagement
355
+ from .endpoints.mass_stats import MassStats
356
+ from .endpoints.group_management import GroupManagement
357
+ from .endpoints.space_management import SpaceManagement
358
+ from .endpoints.model_management import ModelManagement
359
+ from .endpoints.auth import AuthClient
360
+
349
361
 
350
362
  class SyncWrapper:
351
- """
352
- Generic synchronous wrapper with __dir__ support for runtime autocomplete.
353
- """
363
+ """Generic synchronous wrapper with __dir__ support for runtime autocomplete."""
354
364
 
355
365
  def __init__(self, async_obj, sync_client):
356
366
  self._async_obj = async_obj
357
367
  self._sync_client = sync_client
358
368
 
359
369
  def __dir__(self):
360
- """
361
- Return list of attributes for autocomplete in interactive environments.
362
- This enables autocomplete in Jupyter/iPython after instantiation.
363
- """
370
+ """Return list of attributes for autocomplete in interactive environments."""
364
371
  async_attrs = [attr for attr in dir(self._async_obj) if not attr.startswith('_')]
365
-
366
372
  wrapper_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
367
-
368
373
  return list(set(async_attrs + wrapper_attrs))
369
374
 
370
375
  def __getattr__(self, name):
371
- """
372
- Dynamically wrap any method call to convert async to sync.
373
- """
376
+ """Dynamically wrap any method call to convert async to sync."""
374
377
  attr = getattr(self._async_obj, name)
375
378
 
376
379
  if callable(attr):
@@ -387,15 +390,32 @@ class SyncWrapper:
387
390
 
388
391
  class SyncClient:
389
392
  """
390
- Synchronous wrapper with __dir__ support for runtime autocomplete.
391
- Works best in interactive environments like Jupyter/iPython.
393
+ Thread-safe synchronous wrapper for AsyncClient.
394
+ Uses a persistent event loop in a dedicated thread to avoid event loop conflicts.
392
395
  """
396
+
397
+ # Add explicit type annotations for endpoint managers
398
+ datasets: 'DatasetManagement'
399
+ users: 'UserManagement'
400
+ mass_stats: 'MassStats'
401
+ groups: 'GroupManagement'
402
+ space: 'SpaceManagement'
403
+ model: 'ModelManagement'
404
+ auth: 'AuthClient'
393
405
 
394
406
  def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, verbose: bool = False):
395
407
  self._async_client = AsyncClient(url=url, api_key=api_key, verbose=verbose)
396
408
  self._context_entered = False
397
409
  self._closed = False
398
410
 
411
+ # Thread and event loop management
412
+ self._loop = None
413
+ self._thread = None
414
+ self._loop_ready = None
415
+ self._loop_exception = None
416
+
417
+ # Initialize endpoint managers with proper typing
418
+
399
419
  self.datasets = SyncWrapper(self._async_client.datasets, self)
400
420
  self.users = SyncWrapper(self._async_client.users, self)
401
421
  self.mass_stats = SyncWrapper(self._async_client.mass_stats, self)
@@ -407,21 +427,150 @@ class SyncClient:
407
427
  import atexit
408
428
  atexit.register(self._cleanup)
409
429
 
410
- def __dir__(self):
430
+ def _ensure_event_loop(self) -> None:
431
+ """Ensure we have a persistent event loop in a dedicated thread."""
432
+ if self._loop is None or self._loop.is_closed():
433
+ self._loop_ready = threading.Event()
434
+ self._loop_exception = None
435
+
436
+ def run_loop():
437
+ """Run the event loop in a dedicated thread."""
438
+ try:
439
+ # Create a new event loop for this thread
440
+ self._loop = asyncio.new_event_loop()
441
+ asyncio.set_event_loop(self._loop)
442
+
443
+ # Signal that the loop is ready
444
+ self._loop_ready.set()
445
+
446
+ # Run the loop forever (until stopped)
447
+ self._loop.run_forever()
448
+ except Exception as e:
449
+ self._loop_exception = e
450
+ self._loop_ready.set()
451
+ finally:
452
+ # Clean up when the loop stops
453
+ if self._loop and not self._loop.is_closed():
454
+ self._loop.close()
455
+
456
+ # Start the thread
457
+ self._thread = threading.Thread(target=run_loop, daemon=True)
458
+ self._thread.start()
459
+
460
+ # Wait for the loop to be ready
461
+ self._loop_ready.wait(timeout=10)
462
+
463
+ if self._loop_exception:
464
+ raise self._loop_exception
465
+
466
+ if not self._loop_ready.is_set():
467
+ raise RuntimeError("Event loop failed to start within timeout")
468
+
469
+ def _run_async(self, coro):
411
470
  """
412
- Return list of attributes for autocomplete in interactive environments.
413
- This includes all methods from the async client plus the endpoint managers.
471
+ Run async coroutine using persistent event loop.
472
+ This is the core method that makes everything work.
414
473
  """
415
- default_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
474
+ # Ensure we have an event loop
475
+ self._ensure_event_loop()
416
476
 
417
- async_client_attrs = [attr for attr in dir(self._async_client) if not attr.startswith('_')]
477
+ if self._loop.is_closed():
478
+ raise RuntimeError("Event loop is closed")
418
479
 
419
- endpoint_attrs = ['datasets', 'users', 'mass_stats', 'groups', 'space', 'model', 'auth']
480
+ # Create a future to get the result back from the event loop thread
481
+ future = concurrent.futures.Future()
420
482
 
421
- all_attrs = default_attrs + async_client_attrs + endpoint_attrs
483
+ async def run_with_context():
484
+ """Run the coroutine with proper context management."""
485
+ try:
486
+ # Ensure the async client is properly initialized
487
+ await self._ensure_context()
488
+
489
+ # Run the actual coroutine
490
+ result = await coro
491
+
492
+ # Set the result on the future
493
+ future.set_result(result)
494
+ except Exception as e:
495
+ # Set the exception on the future
496
+ future.set_exception(e)
497
+
498
+ # Schedule the coroutine on the persistent event loop
499
+ self._loop.call_soon_threadsafe(
500
+ lambda: asyncio.create_task(run_with_context())
501
+ )
502
+
422
503
 
504
+ # Wait for the result (with timeout to avoid hanging)
505
+ try:
506
+ return future.result(timeout=300) # 5 minute timeout
507
+ except concurrent.futures.TimeoutError:
508
+ raise RuntimeError("Async operation timed out after 5 minutes")
509
+
510
+ async def _ensure_context(self) -> None:
511
+ """Ensure the async client context is entered."""
512
+ if not self._context_entered and not self._closed:
513
+ await self._async_client.__aenter__()
514
+ self._context_entered = True
515
+
516
+ async def _exit_context(self) -> None:
517
+ """Exit the async client context."""
518
+ if self._context_entered and not self._closed:
519
+ await self._async_client.__aexit__(None, None, None)
520
+ self._context_entered = False
521
+
522
+ def close(self) -> None:
523
+ """Close the underlying async client session and stop the event loop."""
524
+ if not self._closed:
525
+ if self._loop and not self._loop.is_closed():
526
+ # Schedule cleanup on the event loop
527
+ future = concurrent.futures.Future()
528
+
529
+ async def cleanup():
530
+ """Clean up the async client."""
531
+ try:
532
+ await self._exit_context()
533
+ future.set_result(None)
534
+ except Exception as e:
535
+ future.set_exception(e)
536
+
537
+ # Run cleanup
538
+ self._loop.call_soon_threadsafe(
539
+ lambda: asyncio.create_task(cleanup())
540
+ )
541
+
542
+ # Wait for cleanup to complete
543
+ try:
544
+ future.result(timeout=10)
545
+ except:
546
+ pass # Ignore cleanup errors
547
+
548
+ # Stop the event loop
549
+ self._loop.call_soon_threadsafe(self._loop.stop)
550
+
551
+ # Wait for thread to finish
552
+ if self._thread and self._thread.is_alive():
553
+ self._thread.join(timeout=5)
554
+
555
+ self._closed = True
556
+
557
+ def _cleanup(self) -> None:
558
+ """Internal cleanup method called by atexit."""
559
+ if not self._closed:
560
+ try:
561
+ self.close()
562
+ except Exception:
563
+ pass # Ignore cleanup errors
564
+
565
+ def __dir__(self):
566
+ """Return list of attributes for autocomplete in interactive environments."""
567
+ default_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
568
+ async_client_attrs = [attr for attr in dir(self._async_client) if not attr.startswith('_')]
569
+ endpoint_attrs = ['datasets', 'users', 'mass_stats', 'groups', 'space', 'model', 'auth']
570
+ all_attrs = default_attrs + async_client_attrs + endpoint_attrs
423
571
  return list(set(all_attrs))
424
572
 
573
+ # Your existing methods with proper type annotations
425
574
  def geoquery(
426
575
  self,
427
576
  expr: str,
@@ -446,15 +595,18 @@ class SyncClient:
446
595
  return self._run_async(coro)
447
596
 
448
597
  def zonal_stats(
449
- self,
450
- gdf: GeoDataFrame,
451
- expr: str,
452
- conc: int = 20,
453
- in_crs: str = "epsg:4326",
454
- out_crs: str = "epsg:4326",
455
- resolution: int = -1,
456
- geom_fix: bool = False,
457
- ):
598
+ self,
599
+ gdf: GeoDataFrame,
600
+ expr: str,
601
+ conc: int = 20,
602
+ in_crs: str = "epsg:4326",
603
+ out_crs: str = "epsg:4326",
604
+ resolution: int = -1,
605
+ geom_fix: bool = False,
606
+ mass_stats: bool = False,
607
+ id_column: Optional[str] = None,
608
+ ) -> GeoDataFrame:
609
+
458
610
  """
459
611
  Compute zonal statistics for all geometries in a GeoDataFrame (synchronous version).
460
612
 
@@ -466,6 +618,8 @@ class SyncClient:
466
618
  out_crs (str): Output coordinate reference system
467
619
  resolution (int): Resolution parameter
468
620
  geom_fix (bool): Whether to fix the geometry (default False)
621
+ mass_stats (bool): Whether to use mass stats for processing (default False)
622
+ id_column (Optional[str]): Name of the ID column to use (default None)
469
623
 
470
624
  Returns:
471
625
  geopandas.GeoDataFrame: GeoDataFrame with added columns for results
@@ -482,6 +636,8 @@ class SyncClient:
482
636
  out_crs=out_crs,
483
637
  resolution=resolution,
484
638
  geom_fix=geom_fix,
639
+ mass_stats=mass_stats,
640
+ id_column=id_column,
485
641
  )
486
642
  return self._run_async(coro)
487
643
 
@@ -521,7 +677,7 @@ class SyncClient:
521
677
  self,
522
678
  queries: list[dict],
523
679
  conc: int = 20,
524
- ):
680
+ ) -> Union[float, GeoDataFrame]:
525
681
  """
526
682
  Execute multiple geo queries concurrently (synchronous version).
527
683
 
@@ -559,120 +715,17 @@ class SyncClient:
559
715
  conc=conc,
560
716
  )
561
717
  return self._run_async(coro)
562
-
563
- async def _ensure_context(self):
564
- """Ensure the async client context is entered."""
565
- if not self._context_entered and not self._closed:
566
- await self._async_client.__aenter__()
567
- self._context_entered = True
568
718
 
569
- async def _exit_context(self):
570
- """Exit the async client context."""
571
- if self._context_entered and not self._closed:
572
- await self._async_client.__aexit__(None, None, None)
573
- self._context_entered = False
574
-
575
- def _run_async(self, coro):
576
- """
577
- Run an async coroutine and return the result synchronously.
578
- This version handles both Jupyter notebook environments and regular Python environments.
579
- """
580
- async def run_with_context():
581
- await self._ensure_context()
582
- return await coro
583
-
584
- try:
585
- # Check if we're in a running event loop (like Jupyter)
586
- loop = asyncio.get_running_loop()
587
-
588
- # Method 1: Try using nest_asyncio if available
589
- try:
590
- import nest_asyncio
591
- nest_asyncio.apply()
592
- return asyncio.run(run_with_context())
593
- except ImportError:
594
- pass
595
-
596
- # Method 2: Use ThreadPoolExecutor to run in a separate thread
597
- def run_in_thread():
598
- return asyncio.run(run_with_context())
599
-
600
- with concurrent.futures.ThreadPoolExecutor() as executor:
601
- future = executor.submit(run_in_thread)
602
- return future.result()
603
-
604
- except RuntimeError:
605
- # No running loop, safe to use asyncio.run()
606
- return asyncio.run(run_with_context())
607
-
608
- def close(self):
609
- """Close the underlying async client session."""
610
- if not self._closed:
611
- async def close_async():
612
- await self._exit_context()
613
-
614
- try:
615
- loop = asyncio.get_running_loop()
616
-
617
- # Try nest_asyncio first
618
- try:
619
- import nest_asyncio
620
- nest_asyncio.apply()
621
- asyncio.run(close_async())
622
- except ImportError:
623
- # Fall back to ThreadPoolExecutor
624
- def run_in_thread():
625
- return asyncio.run(close_async())
626
-
627
- with concurrent.futures.ThreadPoolExecutor() as executor:
628
- future = executor.submit(run_in_thread)
629
- future.result()
630
-
631
- except RuntimeError:
632
- asyncio.run(close_async())
633
-
634
- self._closed = True
635
-
636
- def _cleanup(self):
637
- """Internal cleanup method called by atexit."""
638
- if not self._closed:
639
- try:
640
- self.close()
641
- except Exception:
642
- pass
643
-
644
- def __enter__(self):
719
+ # Context manager support
720
+ def __enter__(self) -> 'SyncClient':
645
721
  """Context manager entry."""
646
- async def enter_async():
647
- await self._ensure_context()
648
-
649
- try:
650
- loop = asyncio.get_running_loop()
651
-
652
- # Try nest_asyncio first
653
- try:
654
- import nest_asyncio
655
- nest_asyncio.apply()
656
- asyncio.run(enter_async())
657
- except ImportError:
658
- # Fall back to ThreadPoolExecutor
659
- def run_in_thread():
660
- return asyncio.run(enter_async())
661
-
662
- with concurrent.futures.ThreadPoolExecutor() as executor:
663
- future = executor.submit(run_in_thread)
664
- future.result()
665
-
666
- except RuntimeError:
667
- asyncio.run(enter_async())
668
-
669
722
  return self
670
723
 
671
- def __exit__(self, exc_type, exc_val, exc_tb):
724
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
672
725
  """Context manager exit."""
673
726
  self.close()
674
727
 
675
- def __del__(self):
728
+ def __del__(self) -> None:
676
729
  """Destructor to ensure session is closed."""
677
730
  if not self._closed:
678
731
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: terrakio-core
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Summary: Core components for Terrakio API clients
5
5
  Author-email: Yupeng Chao <yupeng@haizea.com.au>
6
6
  Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
@@ -25,6 +25,9 @@ Requires-Dist: google-cloud-storage>=2.0.0
25
25
  Requires-Dist: scipy>=1.7.0
26
26
  Requires-Dist: nest_asyncio
27
27
  Requires-Dist: onnxruntime>=1.10.0
28
+ Requires-Dist: psutil>=5.0.0
29
+ Requires-Dist: h5netcdf>=1.0.0
30
+ Requires-Dist: netcdf4>=1.5.0
28
31
  Provides-Extra: ml
29
32
  Requires-Dist: torch>=2.7.1; extra == "ml"
30
33
  Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
@@ -8,6 +8,9 @@ google-cloud-storage>=2.0.0
8
8
  scipy>=1.7.0
9
9
  nest_asyncio
10
10
  onnxruntime>=1.10.0
11
+ psutil>=5.0.0
12
+ h5netcdf>=1.0.0
13
+ netcdf4>=1.5.0
11
14
 
12
15
  [ml]
13
16
  torch>=2.7.1
File without changes
File without changes