terrakio-core 0.4.5__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/PKG-INFO +4 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/pyproject.toml +4 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/async_client.py +6 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/convenience_functions/convenience_functions.py +146 -8
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/mass_stats.py +28 -33
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/sync_client.py +193 -140
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/PKG-INFO +4 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/requires.txt +3 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/README.md +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/setup.cfg +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/accessors.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/client.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/config.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/auth.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/dataset_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/group_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/model_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/space_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/endpoints/user_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/exceptions.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/helper/bounded_taskgroup.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/helper/decorators.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core/helper/tiles.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/SOURCES.txt +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/dependency_links.txt +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.6}/terrakio_core.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -25,6 +25,9 @@ Requires-Dist: google-cloud-storage>=2.0.0
|
|
|
25
25
|
Requires-Dist: scipy>=1.7.0
|
|
26
26
|
Requires-Dist: nest_asyncio
|
|
27
27
|
Requires-Dist: onnxruntime>=1.10.0
|
|
28
|
+
Requires-Dist: psutil>=5.0.0
|
|
29
|
+
Requires-Dist: h5netcdf>=1.0.0
|
|
30
|
+
Requires-Dist: netcdf4>=1.5.0
|
|
28
31
|
Provides-Extra: ml
|
|
29
32
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
30
33
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "terrakio-core"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.6"
|
|
8
8
|
authors = [
|
|
9
9
|
{name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
|
|
10
10
|
]
|
|
@@ -32,6 +32,9 @@ dependencies = [
|
|
|
32
32
|
"scipy>=1.7.0",
|
|
33
33
|
"nest_asyncio",
|
|
34
34
|
"onnxruntime>=1.10.0",
|
|
35
|
+
"psutil>=5.0.0",
|
|
36
|
+
"h5netcdf>=1.0.0",
|
|
37
|
+
"netcdf4>=1.5.0",
|
|
35
38
|
]
|
|
36
39
|
|
|
37
40
|
[project.optional-dependencies]
|
|
@@ -196,6 +196,8 @@ class AsyncClient(BaseClient):
|
|
|
196
196
|
out_crs: str = "epsg:4326",
|
|
197
197
|
resolution: int = -1,
|
|
198
198
|
geom_fix: bool = False,
|
|
199
|
+
mass_stats: bool = False,
|
|
200
|
+
id_column: Optional[str] = None,
|
|
199
201
|
):
|
|
200
202
|
"""
|
|
201
203
|
Compute zonal statistics for all geometries in a GeoDataFrame.
|
|
@@ -208,6 +210,8 @@ class AsyncClient(BaseClient):
|
|
|
208
210
|
out_crs (str): Output coordinate reference system
|
|
209
211
|
resolution (int): Resolution parameter
|
|
210
212
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
213
|
+
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
214
|
+
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
211
215
|
|
|
212
216
|
Returns:
|
|
213
217
|
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
@@ -225,6 +229,8 @@ class AsyncClient(BaseClient):
|
|
|
225
229
|
out_crs=out_crs,
|
|
226
230
|
resolution=resolution,
|
|
227
231
|
geom_fix=geom_fix,
|
|
232
|
+
mass_stats=mass_stats,
|
|
233
|
+
id_column=id_column,
|
|
228
234
|
)
|
|
229
235
|
|
|
230
236
|
async def create_dataset_file(
|
|
@@ -24,6 +24,8 @@ import pyproj
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
import geopandas as gpd
|
|
26
26
|
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
27
29
|
def expand_on_time(gdf):
|
|
28
30
|
"""
|
|
29
31
|
Expand datasets on time dimension - each time becomes a new row.
|
|
@@ -359,18 +361,153 @@ async def local_or_remote(
|
|
|
359
361
|
"local_or_remote": "local",
|
|
360
362
|
"reason": "The number of the requests is not too large, and the time taking for making these requests is not too long, and the size of the dataset is not too large",
|
|
361
363
|
}
|
|
364
|
+
|
|
365
|
+
def gdf_to_json(
|
|
366
|
+
gdf: GeoDataFrame,
|
|
367
|
+
expr: str,
|
|
368
|
+
in_crs: str = "epsg:4326",
|
|
369
|
+
out_crs: str = "epsg:4326",
|
|
370
|
+
resolution: int = -1,
|
|
371
|
+
geom_fix: bool = False,
|
|
372
|
+
id_column: Optional[str] = None,
|
|
373
|
+
):
|
|
374
|
+
"""
|
|
375
|
+
Convert a GeoDataFrame to a list of JSON requests for mass_stats processing.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
gdf: GeoDataFrame containing geometries and optional metadata
|
|
379
|
+
expr: Expression to evaluate
|
|
380
|
+
in_crs: Input coordinate reference system
|
|
381
|
+
out_crs: Output coordinate reference system
|
|
382
|
+
resolution: Resolution parameter
|
|
383
|
+
geom_fix: Whether to fix geometry issues
|
|
384
|
+
id_column: Optional column name to use for group and file names
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
list: List of dictionaries formatted for mass_stats requests
|
|
388
|
+
"""
|
|
389
|
+
mass_stats_requests = []
|
|
390
|
+
|
|
391
|
+
# Loop through each row in the GeoDataFrame
|
|
392
|
+
for idx, row in gdf.iterrows():
|
|
393
|
+
# Create the request feature
|
|
394
|
+
request_feature = {
|
|
395
|
+
"expr": expr,
|
|
396
|
+
"feature": {
|
|
397
|
+
"type": "Feature",
|
|
398
|
+
"geometry": mapping(gdf.geometry.iloc[idx]),
|
|
399
|
+
"properties": {}
|
|
400
|
+
},
|
|
401
|
+
"in_crs": in_crs,
|
|
402
|
+
"out_crs": out_crs,
|
|
403
|
+
"resolution": resolution,
|
|
404
|
+
"geom_fix": geom_fix,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
# Determine group name and file name based on id_column
|
|
408
|
+
if id_column is not None and id_column in gdf.columns:
|
|
409
|
+
# Use the value from the specified column as group and file name
|
|
410
|
+
identifier = str(row[id_column])
|
|
411
|
+
group_name = f"group_{identifier}"
|
|
412
|
+
file_name = f"file_{identifier}"
|
|
413
|
+
else:
|
|
414
|
+
# Use the index as group and file name
|
|
415
|
+
group_name = f"group_{idx}"
|
|
416
|
+
file_name = f"file_{idx}"
|
|
417
|
+
|
|
418
|
+
# Create the complete request entry
|
|
419
|
+
request_entry = {
|
|
420
|
+
"group": group_name,
|
|
421
|
+
"file": file_name,
|
|
422
|
+
"request": request_feature,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
# Add the request to our list
|
|
426
|
+
mass_stats_requests.append(request_entry)
|
|
427
|
+
|
|
428
|
+
return mass_stats_requests
|
|
429
|
+
|
|
430
|
+
async def handle_mass_stats(
|
|
431
|
+
client,
|
|
432
|
+
gdf: GeoDataFrame,
|
|
433
|
+
expr: str,
|
|
434
|
+
in_crs: str = "epsg:4326",
|
|
435
|
+
out_crs: str = "epsg:4326",
|
|
436
|
+
resolution: int = -1,
|
|
437
|
+
geom_fix: bool = False,
|
|
438
|
+
id_column: Optional[str] = None,
|
|
439
|
+
|
|
440
|
+
):
|
|
441
|
+
# we have the handle mass stats function, we need to have the list of quries, and we need to pass the quries to the mass stats function
|
|
442
|
+
# we have the three different variables
|
|
443
|
+
|
|
444
|
+
# Check if id_column is provided
|
|
445
|
+
# if id_column is None:
|
|
446
|
+
# Handle case where no ID column is specified
|
|
447
|
+
# this means that the id column is none, so we could just use the default value of 1 2 3 4
|
|
448
|
+
request_json = gdf_to_json(gdf = gdf, expr = expr, in_crs = in_crs, out_crs = out_crs, resolution = resolution, geom_fix = geom_fix, id_column = id_column)
|
|
449
|
+
# we need to call the execute job function
|
|
450
|
+
job_id =await client.mass_stats.execute_job(
|
|
451
|
+
name = "zonal_stats_job",
|
|
452
|
+
output = "netcdf",
|
|
453
|
+
config = {},
|
|
454
|
+
request_json = request_json,
|
|
455
|
+
overwrite = True,
|
|
456
|
+
)
|
|
457
|
+
return job_id
|
|
458
|
+
# async def test_regular_async_mass_stats(regular_async_client):
|
|
459
|
+
# """Test mass statistics with regular client async"""
|
|
460
|
+
# start_result = await regular_async_client.mass_stats.execute_job(
|
|
461
|
+
# name="test_regular_mass_stats_test",
|
|
462
|
+
# region="aus",
|
|
463
|
+
# output="csv",
|
|
464
|
+
# config={},
|
|
465
|
+
# request_json = "./test_config.json",
|
|
466
|
+
# manifest_json = "./test_manifest.json",
|
|
467
|
+
# overwrite=True,
|
|
468
|
+
# )
|
|
469
|
+
# assert isinstance(start_result, dict)
|
|
470
|
+
# assert 'task_id' in start_result
|
|
471
|
+
|
|
472
|
+
# return
|
|
473
|
+
# else:
|
|
474
|
+
# # Handle case where ID column is specified
|
|
475
|
+
# # Verify the column exists in the GeoDataFrame
|
|
476
|
+
|
|
477
|
+
# if id_column not in gdf.columns:
|
|
478
|
+
# raise ValueError(f"ID column '{id_column}' not found in GeoDataFrame columns: {list(gdf.columns)}")
|
|
479
|
+
# pass
|
|
480
|
+
# the second case is that we have an id_column, we need to use the id_column to create the group name
|
|
362
481
|
|
|
482
|
+
# we have the mass stats as one of the parameters, so that when a user wants a mass
|
|
483
|
+
# for both cases we need to have the list of quries
|
|
363
484
|
async def zonal_stats(
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
485
|
+
client,
|
|
486
|
+
gdf: GeoDataFrame,
|
|
487
|
+
expr: str,
|
|
488
|
+
conc: int = 20,
|
|
489
|
+
in_crs: str = "epsg:4326",
|
|
490
|
+
out_crs: str = "epsg:4326",
|
|
491
|
+
resolution: int = -1,
|
|
492
|
+
geom_fix: bool = False,
|
|
493
|
+
mass_stats: bool = False,
|
|
494
|
+
id_column: Optional[str] = None,
|
|
372
495
|
):
|
|
373
496
|
"""Compute zonal statistics for all geometries in a GeoDataFrame."""
|
|
497
|
+
|
|
498
|
+
if mass_stats:
|
|
499
|
+
mass_stats_id = await handle_mass_stats(
|
|
500
|
+
client = client,
|
|
501
|
+
gdf = gdf,
|
|
502
|
+
expr = expr,
|
|
503
|
+
in_crs = in_crs,
|
|
504
|
+
out_crs = out_crs,
|
|
505
|
+
resolution = resolution,
|
|
506
|
+
geom_fix = geom_fix,
|
|
507
|
+
id_column = id_column
|
|
508
|
+
)
|
|
509
|
+
# if we started the mass stats job, we need to return the job id
|
|
510
|
+
return mass_stats_id
|
|
374
511
|
quries = []
|
|
375
512
|
for i in range(len(gdf)):
|
|
376
513
|
quries.append({
|
|
@@ -385,6 +522,7 @@ async def zonal_stats(
|
|
|
385
522
|
"resolution": resolution,
|
|
386
523
|
"geom_fix": geom_fix,
|
|
387
524
|
})
|
|
525
|
+
|
|
388
526
|
local_or_remote_result = await local_or_remote(client= client, quries = quries)
|
|
389
527
|
if local_or_remote_result["local_or_remote"] == "remote":
|
|
390
528
|
raise ValueError(local_or_remote_result["reason"])
|
|
@@ -328,7 +328,7 @@ class MassStats:
|
|
|
328
328
|
resp.raise_for_status()
|
|
329
329
|
import aiofiles
|
|
330
330
|
async with aiofiles.open(file_save_path, 'wb') as file:
|
|
331
|
-
async for chunk in resp.content.iter_chunked(1048576):
|
|
331
|
+
async for chunk in resp.content.iter_chunked(1048576):
|
|
332
332
|
if chunk:
|
|
333
333
|
await file.write(chunk)
|
|
334
334
|
|
|
@@ -363,7 +363,7 @@ class MassStats:
|
|
|
363
363
|
if total_files is not None and downloaded_files >= total_files:
|
|
364
364
|
break
|
|
365
365
|
if len(download_urls) < page_size:
|
|
366
|
-
break
|
|
366
|
+
break
|
|
367
367
|
page += 1
|
|
368
368
|
return output_files
|
|
369
369
|
except Exception as e:
|
|
@@ -389,7 +389,6 @@ class MassStats:
|
|
|
389
389
|
raise ValueError("Request must be a dictionary")
|
|
390
390
|
if not isinstance(request["file"], (str, int, list)):
|
|
391
391
|
raise ValueError("'file' must be a string or a list of strings")
|
|
392
|
-
# Only check the first 3 requests
|
|
393
392
|
if i == 3:
|
|
394
393
|
break
|
|
395
394
|
|
|
@@ -397,10 +396,9 @@ class MassStats:
|
|
|
397
396
|
async def execute_job(
|
|
398
397
|
self,
|
|
399
398
|
name: str,
|
|
400
|
-
# region: str,
|
|
401
399
|
output: str,
|
|
402
400
|
config: Dict[str, Any],
|
|
403
|
-
request_json: str,
|
|
401
|
+
request_json: Union[str, list[Dict[str, Any]]],
|
|
404
402
|
overwrite: bool = False,
|
|
405
403
|
skip_existing: bool = False,
|
|
406
404
|
location: str = None,
|
|
@@ -427,7 +425,6 @@ class MassStats:
|
|
|
427
425
|
Raises:
|
|
428
426
|
APIError: If the API request fails
|
|
429
427
|
"""
|
|
430
|
-
|
|
431
428
|
def extract_manifest_from_request(request_data: List[Dict[str, Any]]) -> List[str]:
|
|
432
429
|
"""Extract unique group names from request data to create manifest list."""
|
|
433
430
|
groups = []
|
|
@@ -447,34 +444,35 @@ class MassStats:
|
|
|
447
444
|
|
|
448
445
|
return groups
|
|
449
446
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
447
|
+
if isinstance(request_json, str):
|
|
448
|
+
try:
|
|
449
|
+
with open(request_json, 'r') as file:
|
|
450
|
+
request_data = json.load(file)
|
|
451
|
+
if isinstance(request_data, list):
|
|
452
|
+
size = len(request_data)
|
|
453
|
+
else:
|
|
454
|
+
raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
|
|
455
|
+
except FileNotFoundError as e:
|
|
456
|
+
return e
|
|
457
|
+
except json.JSONDecodeError as e:
|
|
458
|
+
return e
|
|
459
|
+
request_json_path = request_json
|
|
460
|
+
else:
|
|
461
|
+
request_data = request_json
|
|
462
|
+
size = len(request_data)
|
|
463
|
+
request_json_path = None
|
|
462
464
|
|
|
463
|
-
# Generate manifest from request data (kept in memory)
|
|
464
465
|
try:
|
|
465
466
|
manifest_groups = extract_manifest_from_request(request_data)
|
|
466
467
|
except Exception as e:
|
|
467
468
|
raise ValueError(f"Error extracting manifest from request JSON: {e}")
|
|
468
469
|
|
|
469
|
-
|
|
470
|
-
first_request = request_data[0] # Changed from data[0] to request_data[0]
|
|
470
|
+
first_request = request_data[0]
|
|
471
471
|
first_expression = first_request["request"]["expr"]
|
|
472
472
|
|
|
473
|
-
# Get upload URLs
|
|
474
473
|
upload_result = await self._upload_request(
|
|
475
474
|
name=name,
|
|
476
475
|
size=size,
|
|
477
|
-
# region=region,
|
|
478
476
|
sample = first_expression,
|
|
479
477
|
output=output,
|
|
480
478
|
config=config,
|
|
@@ -490,21 +488,21 @@ class MassStats:
|
|
|
490
488
|
|
|
491
489
|
if not requests_url:
|
|
492
490
|
raise ValueError("No requests_url returned from server for request JSON upload")
|
|
493
|
-
|
|
494
|
-
# Upload request JSON file
|
|
495
491
|
try:
|
|
496
|
-
|
|
497
|
-
|
|
492
|
+
if request_json_path:
|
|
493
|
+
self.validate_request(request_json_path)
|
|
494
|
+
requests_response = await self._upload_file(request_json_path, requests_url, use_gzip=True)
|
|
495
|
+
else:
|
|
496
|
+
requests_response = await self._upload_json_data(request_data, requests_url, use_gzip=True)
|
|
498
497
|
if requests_response.status not in [200, 201, 204]:
|
|
499
498
|
self._client.logger.error(f"Requests upload error: {requests_response.text()}")
|
|
500
|
-
raise Exception(f"Failed to upload request
|
|
499
|
+
raise Exception(f"Failed to upload request data: {requests_response.text()}")
|
|
501
500
|
except Exception as e:
|
|
502
501
|
raise Exception(f"Error uploading request JSON file {request_json}: {e}")
|
|
503
|
-
|
|
502
|
+
|
|
504
503
|
if not manifest_url:
|
|
505
504
|
raise ValueError("No manifest_url returned from server for manifest JSON upload")
|
|
506
505
|
|
|
507
|
-
# Upload manifest JSON data directly (no temporary file needed)
|
|
508
506
|
try:
|
|
509
507
|
manifest_response = await self._upload_json_data(manifest_groups, manifest_url, use_gzip=False)
|
|
510
508
|
if manifest_response.status not in [200, 201, 204]:
|
|
@@ -513,7 +511,6 @@ class MassStats:
|
|
|
513
511
|
except Exception as e:
|
|
514
512
|
raise Exception(f"Error uploading manifest JSON: {e}")
|
|
515
513
|
|
|
516
|
-
# Start the job
|
|
517
514
|
start_job_task_id = await self.start_job(upload_result.get("id"))
|
|
518
515
|
return start_job_task_id
|
|
519
516
|
|
|
@@ -557,7 +554,6 @@ class MassStats:
|
|
|
557
554
|
tile_size: int,
|
|
558
555
|
res: float,
|
|
559
556
|
output: str,
|
|
560
|
-
# region: str,
|
|
561
557
|
year_range: list[int] = None,
|
|
562
558
|
overwrite: bool = False,
|
|
563
559
|
server: str = None,
|
|
@@ -600,7 +596,6 @@ class MassStats:
|
|
|
600
596
|
payload_mapping = {
|
|
601
597
|
"year_range": year_range,
|
|
602
598
|
"server": server,
|
|
603
|
-
# "region": region,
|
|
604
599
|
"bucket": bucket,
|
|
605
600
|
}
|
|
606
601
|
for key, value in payload_mapping.items():
|
|
@@ -339,38 +339,41 @@
|
|
|
339
339
|
|
|
340
340
|
|
|
341
341
|
import asyncio
|
|
342
|
-
import functools
|
|
343
342
|
import concurrent.futures
|
|
344
|
-
|
|
343
|
+
import threading
|
|
344
|
+
import functools
|
|
345
|
+
import inspect
|
|
346
|
+
from typing import Optional, Dict, Any, Union, TYPE_CHECKING
|
|
345
347
|
from geopandas import GeoDataFrame
|
|
346
348
|
from shapely.geometry.base import BaseGeometry as ShapelyGeometry
|
|
347
349
|
from .async_client import AsyncClient
|
|
348
350
|
|
|
351
|
+
# Add type checking imports for better IDE support
|
|
352
|
+
if TYPE_CHECKING:
|
|
353
|
+
from .endpoints.dataset_management import DatasetManagement
|
|
354
|
+
from .endpoints.user_management import UserManagement
|
|
355
|
+
from .endpoints.mass_stats import MassStats
|
|
356
|
+
from .endpoints.group_management import GroupManagement
|
|
357
|
+
from .endpoints.space_management import SpaceManagement
|
|
358
|
+
from .endpoints.model_management import ModelManagement
|
|
359
|
+
from .endpoints.auth import AuthClient
|
|
360
|
+
|
|
349
361
|
|
|
350
362
|
class SyncWrapper:
|
|
351
|
-
"""
|
|
352
|
-
Generic synchronous wrapper with __dir__ support for runtime autocomplete.
|
|
353
|
-
"""
|
|
363
|
+
"""Generic synchronous wrapper with __dir__ support for runtime autocomplete."""
|
|
354
364
|
|
|
355
365
|
def __init__(self, async_obj, sync_client):
|
|
356
366
|
self._async_obj = async_obj
|
|
357
367
|
self._sync_client = sync_client
|
|
358
368
|
|
|
359
369
|
def __dir__(self):
|
|
360
|
-
"""
|
|
361
|
-
Return list of attributes for autocomplete in interactive environments.
|
|
362
|
-
This enables autocomplete in Jupyter/iPython after instantiation.
|
|
363
|
-
"""
|
|
370
|
+
"""Return list of attributes for autocomplete in interactive environments."""
|
|
364
371
|
async_attrs = [attr for attr in dir(self._async_obj) if not attr.startswith('_')]
|
|
365
|
-
|
|
366
372
|
wrapper_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
|
|
367
|
-
|
|
368
373
|
return list(set(async_attrs + wrapper_attrs))
|
|
369
374
|
|
|
370
375
|
def __getattr__(self, name):
|
|
371
|
-
"""
|
|
372
|
-
Dynamically wrap any method call to convert async to sync.
|
|
373
|
-
"""
|
|
376
|
+
"""Dynamically wrap any method call to convert async to sync."""
|
|
374
377
|
attr = getattr(self._async_obj, name)
|
|
375
378
|
|
|
376
379
|
if callable(attr):
|
|
@@ -387,15 +390,32 @@ class SyncWrapper:
|
|
|
387
390
|
|
|
388
391
|
class SyncClient:
|
|
389
392
|
"""
|
|
390
|
-
|
|
391
|
-
|
|
393
|
+
Thread-safe synchronous wrapper for AsyncClient.
|
|
394
|
+
Uses a persistent event loop in a dedicated thread to avoid event loop conflicts.
|
|
392
395
|
"""
|
|
396
|
+
|
|
397
|
+
# Add explicit type annotations for endpoint managers
|
|
398
|
+
datasets: 'DatasetManagement'
|
|
399
|
+
users: 'UserManagement'
|
|
400
|
+
mass_stats: 'MassStats'
|
|
401
|
+
groups: 'GroupManagement'
|
|
402
|
+
space: 'SpaceManagement'
|
|
403
|
+
model: 'ModelManagement'
|
|
404
|
+
auth: 'AuthClient'
|
|
393
405
|
|
|
394
406
|
def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, verbose: bool = False):
|
|
395
407
|
self._async_client = AsyncClient(url=url, api_key=api_key, verbose=verbose)
|
|
396
408
|
self._context_entered = False
|
|
397
409
|
self._closed = False
|
|
398
410
|
|
|
411
|
+
# Thread and event loop management
|
|
412
|
+
self._loop = None
|
|
413
|
+
self._thread = None
|
|
414
|
+
self._loop_ready = None
|
|
415
|
+
self._loop_exception = None
|
|
416
|
+
|
|
417
|
+
# Initialize endpoint managers with proper typing
|
|
418
|
+
|
|
399
419
|
self.datasets = SyncWrapper(self._async_client.datasets, self)
|
|
400
420
|
self.users = SyncWrapper(self._async_client.users, self)
|
|
401
421
|
self.mass_stats = SyncWrapper(self._async_client.mass_stats, self)
|
|
@@ -407,21 +427,150 @@ class SyncClient:
|
|
|
407
427
|
import atexit
|
|
408
428
|
atexit.register(self._cleanup)
|
|
409
429
|
|
|
410
|
-
def
|
|
430
|
+
def _ensure_event_loop(self) -> None:
|
|
431
|
+
"""Ensure we have a persistent event loop in a dedicated thread."""
|
|
432
|
+
if self._loop is None or self._loop.is_closed():
|
|
433
|
+
self._loop_ready = threading.Event()
|
|
434
|
+
self._loop_exception = None
|
|
435
|
+
|
|
436
|
+
def run_loop():
|
|
437
|
+
"""Run the event loop in a dedicated thread."""
|
|
438
|
+
try:
|
|
439
|
+
# Create a new event loop for this thread
|
|
440
|
+
self._loop = asyncio.new_event_loop()
|
|
441
|
+
asyncio.set_event_loop(self._loop)
|
|
442
|
+
|
|
443
|
+
# Signal that the loop is ready
|
|
444
|
+
self._loop_ready.set()
|
|
445
|
+
|
|
446
|
+
# Run the loop forever (until stopped)
|
|
447
|
+
self._loop.run_forever()
|
|
448
|
+
except Exception as e:
|
|
449
|
+
self._loop_exception = e
|
|
450
|
+
self._loop_ready.set()
|
|
451
|
+
finally:
|
|
452
|
+
# Clean up when the loop stops
|
|
453
|
+
if self._loop and not self._loop.is_closed():
|
|
454
|
+
self._loop.close()
|
|
455
|
+
|
|
456
|
+
# Start the thread
|
|
457
|
+
self._thread = threading.Thread(target=run_loop, daemon=True)
|
|
458
|
+
self._thread.start()
|
|
459
|
+
|
|
460
|
+
# Wait for the loop to be ready
|
|
461
|
+
self._loop_ready.wait(timeout=10)
|
|
462
|
+
|
|
463
|
+
if self._loop_exception:
|
|
464
|
+
raise self._loop_exception
|
|
465
|
+
|
|
466
|
+
if not self._loop_ready.is_set():
|
|
467
|
+
raise RuntimeError("Event loop failed to start within timeout")
|
|
468
|
+
|
|
469
|
+
def _run_async(self, coro):
|
|
411
470
|
"""
|
|
412
|
-
|
|
413
|
-
This
|
|
471
|
+
Run async coroutine using persistent event loop.
|
|
472
|
+
This is the core method that makes everything work.
|
|
414
473
|
"""
|
|
415
|
-
|
|
474
|
+
# Ensure we have an event loop
|
|
475
|
+
self._ensure_event_loop()
|
|
416
476
|
|
|
417
|
-
|
|
477
|
+
if self._loop.is_closed():
|
|
478
|
+
raise RuntimeError("Event loop is closed")
|
|
418
479
|
|
|
419
|
-
|
|
480
|
+
# Create a future to get the result back from the event loop thread
|
|
481
|
+
future = concurrent.futures.Future()
|
|
420
482
|
|
|
421
|
-
|
|
483
|
+
async def run_with_context():
|
|
484
|
+
"""Run the coroutine with proper context management."""
|
|
485
|
+
try:
|
|
486
|
+
# Ensure the async client is properly initialized
|
|
487
|
+
await self._ensure_context()
|
|
488
|
+
|
|
489
|
+
# Run the actual coroutine
|
|
490
|
+
result = await coro
|
|
491
|
+
|
|
492
|
+
# Set the result on the future
|
|
493
|
+
future.set_result(result)
|
|
494
|
+
except Exception as e:
|
|
495
|
+
# Set the exception on the future
|
|
496
|
+
future.set_exception(e)
|
|
497
|
+
|
|
498
|
+
# Schedule the coroutine on the persistent event loop
|
|
499
|
+
self._loop.call_soon_threadsafe(
|
|
500
|
+
lambda: asyncio.create_task(run_with_context())
|
|
501
|
+
)
|
|
502
|
+
|
|
422
503
|
|
|
504
|
+
# Wait for the result (with timeout to avoid hanging)
|
|
505
|
+
try:
|
|
506
|
+
return future.result(timeout=300) # 5 minute timeout
|
|
507
|
+
except concurrent.futures.TimeoutError:
|
|
508
|
+
raise RuntimeError("Async operation timed out after 5 minutes")
|
|
509
|
+
|
|
510
|
+
async def _ensure_context(self) -> None:
|
|
511
|
+
"""Ensure the async client context is entered."""
|
|
512
|
+
if not self._context_entered and not self._closed:
|
|
513
|
+
await self._async_client.__aenter__()
|
|
514
|
+
self._context_entered = True
|
|
515
|
+
|
|
516
|
+
async def _exit_context(self) -> None:
|
|
517
|
+
"""Exit the async client context."""
|
|
518
|
+
if self._context_entered and not self._closed:
|
|
519
|
+
await self._async_client.__aexit__(None, None, None)
|
|
520
|
+
self._context_entered = False
|
|
521
|
+
|
|
522
|
+
def close(self) -> None:
|
|
523
|
+
"""Close the underlying async client session and stop the event loop."""
|
|
524
|
+
if not self._closed:
|
|
525
|
+
if self._loop and not self._loop.is_closed():
|
|
526
|
+
# Schedule cleanup on the event loop
|
|
527
|
+
future = concurrent.futures.Future()
|
|
528
|
+
|
|
529
|
+
async def cleanup():
|
|
530
|
+
"""Clean up the async client."""
|
|
531
|
+
try:
|
|
532
|
+
await self._exit_context()
|
|
533
|
+
future.set_result(None)
|
|
534
|
+
except Exception as e:
|
|
535
|
+
future.set_exception(e)
|
|
536
|
+
|
|
537
|
+
# Run cleanup
|
|
538
|
+
self._loop.call_soon_threadsafe(
|
|
539
|
+
lambda: asyncio.create_task(cleanup())
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
# Wait for cleanup to complete
|
|
543
|
+
try:
|
|
544
|
+
future.result(timeout=10)
|
|
545
|
+
except:
|
|
546
|
+
pass # Ignore cleanup errors
|
|
547
|
+
|
|
548
|
+
# Stop the event loop
|
|
549
|
+
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
550
|
+
|
|
551
|
+
# Wait for thread to finish
|
|
552
|
+
if self._thread and self._thread.is_alive():
|
|
553
|
+
self._thread.join(timeout=5)
|
|
554
|
+
|
|
555
|
+
self._closed = True
|
|
556
|
+
|
|
557
|
+
def _cleanup(self) -> None:
|
|
558
|
+
"""Internal cleanup method called by atexit."""
|
|
559
|
+
if not self._closed:
|
|
560
|
+
try:
|
|
561
|
+
self.close()
|
|
562
|
+
except Exception:
|
|
563
|
+
pass # Ignore cleanup errors
|
|
564
|
+
|
|
565
|
+
def __dir__(self):
|
|
566
|
+
"""Return list of attributes for autocomplete in interactive environments."""
|
|
567
|
+
default_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
|
|
568
|
+
async_client_attrs = [attr for attr in dir(self._async_client) if not attr.startswith('_')]
|
|
569
|
+
endpoint_attrs = ['datasets', 'users', 'mass_stats', 'groups', 'space', 'model', 'auth']
|
|
570
|
+
all_attrs = default_attrs + async_client_attrs + endpoint_attrs
|
|
423
571
|
return list(set(all_attrs))
|
|
424
572
|
|
|
573
|
+
# Your existing methods with proper type annotations
|
|
425
574
|
def geoquery(
|
|
426
575
|
self,
|
|
427
576
|
expr: str,
|
|
@@ -446,15 +595,18 @@ class SyncClient:
|
|
|
446
595
|
return self._run_async(coro)
|
|
447
596
|
|
|
448
597
|
def zonal_stats(
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
598
|
+
self,
|
|
599
|
+
gdf: GeoDataFrame,
|
|
600
|
+
expr: str,
|
|
601
|
+
conc: int = 20,
|
|
602
|
+
in_crs: str = "epsg:4326",
|
|
603
|
+
out_crs: str = "epsg:4326",
|
|
604
|
+
resolution: int = -1,
|
|
605
|
+
geom_fix: bool = False,
|
|
606
|
+
mass_stats: bool = False,
|
|
607
|
+
id_column: Optional[str] = None,
|
|
608
|
+
) -> GeoDataFrame:
|
|
609
|
+
|
|
458
610
|
"""
|
|
459
611
|
Compute zonal statistics for all geometries in a GeoDataFrame (synchronous version).
|
|
460
612
|
|
|
@@ -466,6 +618,8 @@ class SyncClient:
|
|
|
466
618
|
out_crs (str): Output coordinate reference system
|
|
467
619
|
resolution (int): Resolution parameter
|
|
468
620
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
621
|
+
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
622
|
+
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
469
623
|
|
|
470
624
|
Returns:
|
|
471
625
|
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
@@ -482,6 +636,8 @@ class SyncClient:
|
|
|
482
636
|
out_crs=out_crs,
|
|
483
637
|
resolution=resolution,
|
|
484
638
|
geom_fix=geom_fix,
|
|
639
|
+
mass_stats=mass_stats,
|
|
640
|
+
id_column=id_column,
|
|
485
641
|
)
|
|
486
642
|
return self._run_async(coro)
|
|
487
643
|
|
|
@@ -521,7 +677,7 @@ class SyncClient:
|
|
|
521
677
|
self,
|
|
522
678
|
queries: list[dict],
|
|
523
679
|
conc: int = 20,
|
|
524
|
-
):
|
|
680
|
+
) -> Union[float, GeoDataFrame]:
|
|
525
681
|
"""
|
|
526
682
|
Execute multiple geo queries concurrently (synchronous version).
|
|
527
683
|
|
|
@@ -559,120 +715,17 @@ class SyncClient:
|
|
|
559
715
|
conc=conc,
|
|
560
716
|
)
|
|
561
717
|
return self._run_async(coro)
|
|
562
|
-
|
|
563
|
-
async def _ensure_context(self):
|
|
564
|
-
"""Ensure the async client context is entered."""
|
|
565
|
-
if not self._context_entered and not self._closed:
|
|
566
|
-
await self._async_client.__aenter__()
|
|
567
|
-
self._context_entered = True
|
|
568
718
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
if self._context_entered and not self._closed:
|
|
572
|
-
await self._async_client.__aexit__(None, None, None)
|
|
573
|
-
self._context_entered = False
|
|
574
|
-
|
|
575
|
-
def _run_async(self, coro):
|
|
576
|
-
"""
|
|
577
|
-
Run an async coroutine and return the result synchronously.
|
|
578
|
-
This version handles both Jupyter notebook environments and regular Python environments.
|
|
579
|
-
"""
|
|
580
|
-
async def run_with_context():
|
|
581
|
-
await self._ensure_context()
|
|
582
|
-
return await coro
|
|
583
|
-
|
|
584
|
-
try:
|
|
585
|
-
# Check if we're in a running event loop (like Jupyter)
|
|
586
|
-
loop = asyncio.get_running_loop()
|
|
587
|
-
|
|
588
|
-
# Method 1: Try using nest_asyncio if available
|
|
589
|
-
try:
|
|
590
|
-
import nest_asyncio
|
|
591
|
-
nest_asyncio.apply()
|
|
592
|
-
return asyncio.run(run_with_context())
|
|
593
|
-
except ImportError:
|
|
594
|
-
pass
|
|
595
|
-
|
|
596
|
-
# Method 2: Use ThreadPoolExecutor to run in a separate thread
|
|
597
|
-
def run_in_thread():
|
|
598
|
-
return asyncio.run(run_with_context())
|
|
599
|
-
|
|
600
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
601
|
-
future = executor.submit(run_in_thread)
|
|
602
|
-
return future.result()
|
|
603
|
-
|
|
604
|
-
except RuntimeError:
|
|
605
|
-
# No running loop, safe to use asyncio.run()
|
|
606
|
-
return asyncio.run(run_with_context())
|
|
607
|
-
|
|
608
|
-
def close(self):
|
|
609
|
-
"""Close the underlying async client session."""
|
|
610
|
-
if not self._closed:
|
|
611
|
-
async def close_async():
|
|
612
|
-
await self._exit_context()
|
|
613
|
-
|
|
614
|
-
try:
|
|
615
|
-
loop = asyncio.get_running_loop()
|
|
616
|
-
|
|
617
|
-
# Try nest_asyncio first
|
|
618
|
-
try:
|
|
619
|
-
import nest_asyncio
|
|
620
|
-
nest_asyncio.apply()
|
|
621
|
-
asyncio.run(close_async())
|
|
622
|
-
except ImportError:
|
|
623
|
-
# Fall back to ThreadPoolExecutor
|
|
624
|
-
def run_in_thread():
|
|
625
|
-
return asyncio.run(close_async())
|
|
626
|
-
|
|
627
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
628
|
-
future = executor.submit(run_in_thread)
|
|
629
|
-
future.result()
|
|
630
|
-
|
|
631
|
-
except RuntimeError:
|
|
632
|
-
asyncio.run(close_async())
|
|
633
|
-
|
|
634
|
-
self._closed = True
|
|
635
|
-
|
|
636
|
-
def _cleanup(self):
|
|
637
|
-
"""Internal cleanup method called by atexit."""
|
|
638
|
-
if not self._closed:
|
|
639
|
-
try:
|
|
640
|
-
self.close()
|
|
641
|
-
except Exception:
|
|
642
|
-
pass
|
|
643
|
-
|
|
644
|
-
def __enter__(self):
|
|
719
|
+
# Context manager support
|
|
720
|
+
def __enter__(self) -> 'SyncClient':
|
|
645
721
|
"""Context manager entry."""
|
|
646
|
-
async def enter_async():
|
|
647
|
-
await self._ensure_context()
|
|
648
|
-
|
|
649
|
-
try:
|
|
650
|
-
loop = asyncio.get_running_loop()
|
|
651
|
-
|
|
652
|
-
# Try nest_asyncio first
|
|
653
|
-
try:
|
|
654
|
-
import nest_asyncio
|
|
655
|
-
nest_asyncio.apply()
|
|
656
|
-
asyncio.run(enter_async())
|
|
657
|
-
except ImportError:
|
|
658
|
-
# Fall back to ThreadPoolExecutor
|
|
659
|
-
def run_in_thread():
|
|
660
|
-
return asyncio.run(enter_async())
|
|
661
|
-
|
|
662
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
663
|
-
future = executor.submit(run_in_thread)
|
|
664
|
-
future.result()
|
|
665
|
-
|
|
666
|
-
except RuntimeError:
|
|
667
|
-
asyncio.run(enter_async())
|
|
668
|
-
|
|
669
722
|
return self
|
|
670
723
|
|
|
671
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
724
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
672
725
|
"""Context manager exit."""
|
|
673
726
|
self.close()
|
|
674
727
|
|
|
675
|
-
def __del__(self):
|
|
728
|
+
def __del__(self) -> None:
|
|
676
729
|
"""Destructor to ensure session is closed."""
|
|
677
730
|
if not self._closed:
|
|
678
731
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -25,6 +25,9 @@ Requires-Dist: google-cloud-storage>=2.0.0
|
|
|
25
25
|
Requires-Dist: scipy>=1.7.0
|
|
26
26
|
Requires-Dist: nest_asyncio
|
|
27
27
|
Requires-Dist: onnxruntime>=1.10.0
|
|
28
|
+
Requires-Dist: psutil>=5.0.0
|
|
29
|
+
Requires-Dist: h5netcdf>=1.0.0
|
|
30
|
+
Requires-Dist: netcdf4>=1.5.0
|
|
28
31
|
Provides-Extra: ml
|
|
29
32
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
30
33
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|