terrakio-core 0.4.4__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/PKG-INFO +4 -1
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/pyproject.toml +4 -1
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/async_client.py +50 -1
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/convenience_functions/convenience_functions.py +146 -8
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/mass_stats.py +28 -33
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/sync_client.py +234 -97
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core.egg-info/PKG-INFO +4 -1
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core.egg-info/requires.txt +3 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/README.md +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/setup.cfg +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/accessors.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/client.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/config.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/auth.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/dataset_management.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/group_management.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/model_management.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/space_management.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/endpoints/user_management.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/exceptions.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/helper/bounded_taskgroup.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/helper/decorators.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core/helper/tiles.py +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core.egg-info/SOURCES.txt +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core.egg-info/dependency_links.txt +0 -0
- {terrakio_core-0.4.4 → terrakio_core-0.4.6}/terrakio_core.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -25,6 +25,9 @@ Requires-Dist: google-cloud-storage>=2.0.0
|
|
|
25
25
|
Requires-Dist: scipy>=1.7.0
|
|
26
26
|
Requires-Dist: nest_asyncio
|
|
27
27
|
Requires-Dist: onnxruntime>=1.10.0
|
|
28
|
+
Requires-Dist: psutil>=5.0.0
|
|
29
|
+
Requires-Dist: h5netcdf>=1.0.0
|
|
30
|
+
Requires-Dist: netcdf4>=1.5.0
|
|
28
31
|
Provides-Extra: ml
|
|
29
32
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
30
33
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "terrakio-core"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.6"
|
|
8
8
|
authors = [
|
|
9
9
|
{name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
|
|
10
10
|
]
|
|
@@ -32,6 +32,9 @@ dependencies = [
|
|
|
32
32
|
"scipy>=1.7.0",
|
|
33
33
|
"nest_asyncio",
|
|
34
34
|
"onnxruntime>=1.10.0",
|
|
35
|
+
"psutil>=5.0.0",
|
|
36
|
+
"h5netcdf>=1.0.0",
|
|
37
|
+
"netcdf4>=1.5.0",
|
|
35
38
|
]
|
|
36
39
|
|
|
37
40
|
[project.optional-dependencies]
|
|
@@ -17,7 +17,7 @@ from .endpoints.group_management import GroupManagement
|
|
|
17
17
|
from .endpoints.space_management import SpaceManagement
|
|
18
18
|
from .endpoints.model_management import ModelManagement
|
|
19
19
|
from .endpoints.auth import AuthClient
|
|
20
|
-
from .convenience_functions.convenience_functions import zonal_stats as _zonal_stats, create_dataset_file as _create_dataset_file
|
|
20
|
+
from .convenience_functions.convenience_functions import zonal_stats as _zonal_stats, create_dataset_file as _create_dataset_file, request_geoquery_list as _request_geoquery_list
|
|
21
21
|
|
|
22
22
|
class AsyncClient(BaseClient):
|
|
23
23
|
def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, verbose: bool = False, session: Optional[aiohttp.ClientSession] = None):
|
|
@@ -196,6 +196,8 @@ class AsyncClient(BaseClient):
|
|
|
196
196
|
out_crs: str = "epsg:4326",
|
|
197
197
|
resolution: int = -1,
|
|
198
198
|
geom_fix: bool = False,
|
|
199
|
+
mass_stats: bool = False,
|
|
200
|
+
id_column: Optional[str] = None,
|
|
199
201
|
):
|
|
200
202
|
"""
|
|
201
203
|
Compute zonal statistics for all geometries in a GeoDataFrame.
|
|
@@ -208,6 +210,8 @@ class AsyncClient(BaseClient):
|
|
|
208
210
|
out_crs (str): Output coordinate reference system
|
|
209
211
|
resolution (int): Resolution parameter
|
|
210
212
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
213
|
+
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
214
|
+
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
211
215
|
|
|
212
216
|
Returns:
|
|
213
217
|
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
@@ -225,6 +229,8 @@ class AsyncClient(BaseClient):
|
|
|
225
229
|
out_crs=out_crs,
|
|
226
230
|
resolution=resolution,
|
|
227
231
|
geom_fix=geom_fix,
|
|
232
|
+
mass_stats=mass_stats,
|
|
233
|
+
id_column=id_column,
|
|
228
234
|
)
|
|
229
235
|
|
|
230
236
|
async def create_dataset_file(
|
|
@@ -282,6 +288,49 @@ class AsyncClient(BaseClient):
|
|
|
282
288
|
download_path=download_path,
|
|
283
289
|
)
|
|
284
290
|
|
|
291
|
+
async def geo_queries(
|
|
292
|
+
self,
|
|
293
|
+
queries: list[dict],
|
|
294
|
+
conc: int = 20,
|
|
295
|
+
):
|
|
296
|
+
"""
|
|
297
|
+
Execute multiple geo queries concurrently.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
queries (list[dict]): List of dictionaries containing query parameters.
|
|
301
|
+
Each query must have 'expr', 'feature', and 'in_crs' keys.
|
|
302
|
+
conc (int): Number of concurrent requests to make (default 20, max 100)
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Union[float, geopandas.GeoDataFrame]:
|
|
306
|
+
- float: Average of all results if results are integers
|
|
307
|
+
- GeoDataFrame: GeoDataFrame with geometry and dataset columns if results are xarray datasets
|
|
308
|
+
|
|
309
|
+
Raises:
|
|
310
|
+
ValueError: If queries list is empty, concurrency is too high, or queries are malformed
|
|
311
|
+
APIError: If the API request fails
|
|
312
|
+
|
|
313
|
+
Example:
|
|
314
|
+
queries = [
|
|
315
|
+
{
|
|
316
|
+
'expr': 'WCF.wcf',
|
|
317
|
+
'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
|
|
318
|
+
'in_crs': 'epsg:4326'
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
'expr': 'NDVI.ndvi',
|
|
322
|
+
'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
|
|
323
|
+
'in_crs': 'epsg:4326'
|
|
324
|
+
}
|
|
325
|
+
]
|
|
326
|
+
result = await client.geo_queries(queries)
|
|
327
|
+
"""
|
|
328
|
+
return await _request_geoquery_list(
|
|
329
|
+
client=self,
|
|
330
|
+
quries=queries, # Note: keeping original parameter name for compatibility
|
|
331
|
+
conc=conc,
|
|
332
|
+
)
|
|
333
|
+
|
|
285
334
|
async def __aenter__(self):
|
|
286
335
|
if self._session is None:
|
|
287
336
|
headers = {
|
|
@@ -24,6 +24,8 @@ import pyproj
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
import geopandas as gpd
|
|
26
26
|
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
27
29
|
def expand_on_time(gdf):
|
|
28
30
|
"""
|
|
29
31
|
Expand datasets on time dimension - each time becomes a new row.
|
|
@@ -359,18 +361,153 @@ async def local_or_remote(
|
|
|
359
361
|
"local_or_remote": "local",
|
|
360
362
|
"reason": "The number of the requests is not too large, and the time taking for making these requests is not too long, and the size of the dataset is not too large",
|
|
361
363
|
}
|
|
364
|
+
|
|
365
|
+
def gdf_to_json(
|
|
366
|
+
gdf: GeoDataFrame,
|
|
367
|
+
expr: str,
|
|
368
|
+
in_crs: str = "epsg:4326",
|
|
369
|
+
out_crs: str = "epsg:4326",
|
|
370
|
+
resolution: int = -1,
|
|
371
|
+
geom_fix: bool = False,
|
|
372
|
+
id_column: Optional[str] = None,
|
|
373
|
+
):
|
|
374
|
+
"""
|
|
375
|
+
Convert a GeoDataFrame to a list of JSON requests for mass_stats processing.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
gdf: GeoDataFrame containing geometries and optional metadata
|
|
379
|
+
expr: Expression to evaluate
|
|
380
|
+
in_crs: Input coordinate reference system
|
|
381
|
+
out_crs: Output coordinate reference system
|
|
382
|
+
resolution: Resolution parameter
|
|
383
|
+
geom_fix: Whether to fix geometry issues
|
|
384
|
+
id_column: Optional column name to use for group and file names
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
list: List of dictionaries formatted for mass_stats requests
|
|
388
|
+
"""
|
|
389
|
+
mass_stats_requests = []
|
|
390
|
+
|
|
391
|
+
# Loop through each row in the GeoDataFrame
|
|
392
|
+
for idx, row in gdf.iterrows():
|
|
393
|
+
# Create the request feature
|
|
394
|
+
request_feature = {
|
|
395
|
+
"expr": expr,
|
|
396
|
+
"feature": {
|
|
397
|
+
"type": "Feature",
|
|
398
|
+
"geometry": mapping(gdf.geometry.iloc[idx]),
|
|
399
|
+
"properties": {}
|
|
400
|
+
},
|
|
401
|
+
"in_crs": in_crs,
|
|
402
|
+
"out_crs": out_crs,
|
|
403
|
+
"resolution": resolution,
|
|
404
|
+
"geom_fix": geom_fix,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
# Determine group name and file name based on id_column
|
|
408
|
+
if id_column is not None and id_column in gdf.columns:
|
|
409
|
+
# Use the value from the specified column as group and file name
|
|
410
|
+
identifier = str(row[id_column])
|
|
411
|
+
group_name = f"group_{identifier}"
|
|
412
|
+
file_name = f"file_{identifier}"
|
|
413
|
+
else:
|
|
414
|
+
# Use the index as group and file name
|
|
415
|
+
group_name = f"group_{idx}"
|
|
416
|
+
file_name = f"file_{idx}"
|
|
417
|
+
|
|
418
|
+
# Create the complete request entry
|
|
419
|
+
request_entry = {
|
|
420
|
+
"group": group_name,
|
|
421
|
+
"file": file_name,
|
|
422
|
+
"request": request_feature,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
# Add the request to our list
|
|
426
|
+
mass_stats_requests.append(request_entry)
|
|
427
|
+
|
|
428
|
+
return mass_stats_requests
|
|
429
|
+
|
|
430
|
+
async def handle_mass_stats(
|
|
431
|
+
client,
|
|
432
|
+
gdf: GeoDataFrame,
|
|
433
|
+
expr: str,
|
|
434
|
+
in_crs: str = "epsg:4326",
|
|
435
|
+
out_crs: str = "epsg:4326",
|
|
436
|
+
resolution: int = -1,
|
|
437
|
+
geom_fix: bool = False,
|
|
438
|
+
id_column: Optional[str] = None,
|
|
439
|
+
|
|
440
|
+
):
|
|
441
|
+
# we have the handle mass stats function, we need to have the list of quries, and we need to pass the quries to the mass stats function
|
|
442
|
+
# we have the three different variables
|
|
443
|
+
|
|
444
|
+
# Check if id_column is provided
|
|
445
|
+
# if id_column is None:
|
|
446
|
+
# Handle case where no ID column is specified
|
|
447
|
+
# this means that the id column is none, so we could just use the default value of 1 2 3 4
|
|
448
|
+
request_json = gdf_to_json(gdf = gdf, expr = expr, in_crs = in_crs, out_crs = out_crs, resolution = resolution, geom_fix = geom_fix, id_column = id_column)
|
|
449
|
+
# we need to call the execute job function
|
|
450
|
+
job_id =await client.mass_stats.execute_job(
|
|
451
|
+
name = "zonal_stats_job",
|
|
452
|
+
output = "netcdf",
|
|
453
|
+
config = {},
|
|
454
|
+
request_json = request_json,
|
|
455
|
+
overwrite = True,
|
|
456
|
+
)
|
|
457
|
+
return job_id
|
|
458
|
+
# async def test_regular_async_mass_stats(regular_async_client):
|
|
459
|
+
# """Test mass statistics with regular client async"""
|
|
460
|
+
# start_result = await regular_async_client.mass_stats.execute_job(
|
|
461
|
+
# name="test_regular_mass_stats_test",
|
|
462
|
+
# region="aus",
|
|
463
|
+
# output="csv",
|
|
464
|
+
# config={},
|
|
465
|
+
# request_json = "./test_config.json",
|
|
466
|
+
# manifest_json = "./test_manifest.json",
|
|
467
|
+
# overwrite=True,
|
|
468
|
+
# )
|
|
469
|
+
# assert isinstance(start_result, dict)
|
|
470
|
+
# assert 'task_id' in start_result
|
|
471
|
+
|
|
472
|
+
# return
|
|
473
|
+
# else:
|
|
474
|
+
# # Handle case where ID column is specified
|
|
475
|
+
# # Verify the column exists in the GeoDataFrame
|
|
476
|
+
|
|
477
|
+
# if id_column not in gdf.columns:
|
|
478
|
+
# raise ValueError(f"ID column '{id_column}' not found in GeoDataFrame columns: {list(gdf.columns)}")
|
|
479
|
+
# pass
|
|
480
|
+
# the second case is that we have an id_column, we need to use the id_column to create the group name
|
|
362
481
|
|
|
482
|
+
# we have the mass stats as one of the parameters, so that when a user wants a mass
|
|
483
|
+
# for both cases we need to have the list of quries
|
|
363
484
|
async def zonal_stats(
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
485
|
+
client,
|
|
486
|
+
gdf: GeoDataFrame,
|
|
487
|
+
expr: str,
|
|
488
|
+
conc: int = 20,
|
|
489
|
+
in_crs: str = "epsg:4326",
|
|
490
|
+
out_crs: str = "epsg:4326",
|
|
491
|
+
resolution: int = -1,
|
|
492
|
+
geom_fix: bool = False,
|
|
493
|
+
mass_stats: bool = False,
|
|
494
|
+
id_column: Optional[str] = None,
|
|
372
495
|
):
|
|
373
496
|
"""Compute zonal statistics for all geometries in a GeoDataFrame."""
|
|
497
|
+
|
|
498
|
+
if mass_stats:
|
|
499
|
+
mass_stats_id = await handle_mass_stats(
|
|
500
|
+
client = client,
|
|
501
|
+
gdf = gdf,
|
|
502
|
+
expr = expr,
|
|
503
|
+
in_crs = in_crs,
|
|
504
|
+
out_crs = out_crs,
|
|
505
|
+
resolution = resolution,
|
|
506
|
+
geom_fix = geom_fix,
|
|
507
|
+
id_column = id_column
|
|
508
|
+
)
|
|
509
|
+
# if we started the mass stats job, we need to return the job id
|
|
510
|
+
return mass_stats_id
|
|
374
511
|
quries = []
|
|
375
512
|
for i in range(len(gdf)):
|
|
376
513
|
quries.append({
|
|
@@ -385,6 +522,7 @@ async def zonal_stats(
|
|
|
385
522
|
"resolution": resolution,
|
|
386
523
|
"geom_fix": geom_fix,
|
|
387
524
|
})
|
|
525
|
+
|
|
388
526
|
local_or_remote_result = await local_or_remote(client= client, quries = quries)
|
|
389
527
|
if local_or_remote_result["local_or_remote"] == "remote":
|
|
390
528
|
raise ValueError(local_or_remote_result["reason"])
|
|
@@ -328,7 +328,7 @@ class MassStats:
|
|
|
328
328
|
resp.raise_for_status()
|
|
329
329
|
import aiofiles
|
|
330
330
|
async with aiofiles.open(file_save_path, 'wb') as file:
|
|
331
|
-
async for chunk in resp.content.iter_chunked(1048576):
|
|
331
|
+
async for chunk in resp.content.iter_chunked(1048576):
|
|
332
332
|
if chunk:
|
|
333
333
|
await file.write(chunk)
|
|
334
334
|
|
|
@@ -363,7 +363,7 @@ class MassStats:
|
|
|
363
363
|
if total_files is not None and downloaded_files >= total_files:
|
|
364
364
|
break
|
|
365
365
|
if len(download_urls) < page_size:
|
|
366
|
-
break
|
|
366
|
+
break
|
|
367
367
|
page += 1
|
|
368
368
|
return output_files
|
|
369
369
|
except Exception as e:
|
|
@@ -389,7 +389,6 @@ class MassStats:
|
|
|
389
389
|
raise ValueError("Request must be a dictionary")
|
|
390
390
|
if not isinstance(request["file"], (str, int, list)):
|
|
391
391
|
raise ValueError("'file' must be a string or a list of strings")
|
|
392
|
-
# Only check the first 3 requests
|
|
393
392
|
if i == 3:
|
|
394
393
|
break
|
|
395
394
|
|
|
@@ -397,10 +396,9 @@ class MassStats:
|
|
|
397
396
|
async def execute_job(
|
|
398
397
|
self,
|
|
399
398
|
name: str,
|
|
400
|
-
# region: str,
|
|
401
399
|
output: str,
|
|
402
400
|
config: Dict[str, Any],
|
|
403
|
-
request_json: str,
|
|
401
|
+
request_json: Union[str, list[Dict[str, Any]]],
|
|
404
402
|
overwrite: bool = False,
|
|
405
403
|
skip_existing: bool = False,
|
|
406
404
|
location: str = None,
|
|
@@ -427,7 +425,6 @@ class MassStats:
|
|
|
427
425
|
Raises:
|
|
428
426
|
APIError: If the API request fails
|
|
429
427
|
"""
|
|
430
|
-
|
|
431
428
|
def extract_manifest_from_request(request_data: List[Dict[str, Any]]) -> List[str]:
|
|
432
429
|
"""Extract unique group names from request data to create manifest list."""
|
|
433
430
|
groups = []
|
|
@@ -447,34 +444,35 @@ class MassStats:
|
|
|
447
444
|
|
|
448
445
|
return groups
|
|
449
446
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
447
|
+
if isinstance(request_json, str):
|
|
448
|
+
try:
|
|
449
|
+
with open(request_json, 'r') as file:
|
|
450
|
+
request_data = json.load(file)
|
|
451
|
+
if isinstance(request_data, list):
|
|
452
|
+
size = len(request_data)
|
|
453
|
+
else:
|
|
454
|
+
raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
|
|
455
|
+
except FileNotFoundError as e:
|
|
456
|
+
return e
|
|
457
|
+
except json.JSONDecodeError as e:
|
|
458
|
+
return e
|
|
459
|
+
request_json_path = request_json
|
|
460
|
+
else:
|
|
461
|
+
request_data = request_json
|
|
462
|
+
size = len(request_data)
|
|
463
|
+
request_json_path = None
|
|
462
464
|
|
|
463
|
-
# Generate manifest from request data (kept in memory)
|
|
464
465
|
try:
|
|
465
466
|
manifest_groups = extract_manifest_from_request(request_data)
|
|
466
467
|
except Exception as e:
|
|
467
468
|
raise ValueError(f"Error extracting manifest from request JSON: {e}")
|
|
468
469
|
|
|
469
|
-
|
|
470
|
-
first_request = request_data[0] # Changed from data[0] to request_data[0]
|
|
470
|
+
first_request = request_data[0]
|
|
471
471
|
first_expression = first_request["request"]["expr"]
|
|
472
472
|
|
|
473
|
-
# Get upload URLs
|
|
474
473
|
upload_result = await self._upload_request(
|
|
475
474
|
name=name,
|
|
476
475
|
size=size,
|
|
477
|
-
# region=region,
|
|
478
476
|
sample = first_expression,
|
|
479
477
|
output=output,
|
|
480
478
|
config=config,
|
|
@@ -490,21 +488,21 @@ class MassStats:
|
|
|
490
488
|
|
|
491
489
|
if not requests_url:
|
|
492
490
|
raise ValueError("No requests_url returned from server for request JSON upload")
|
|
493
|
-
|
|
494
|
-
# Upload request JSON file
|
|
495
491
|
try:
|
|
496
|
-
|
|
497
|
-
|
|
492
|
+
if request_json_path:
|
|
493
|
+
self.validate_request(request_json_path)
|
|
494
|
+
requests_response = await self._upload_file(request_json_path, requests_url, use_gzip=True)
|
|
495
|
+
else:
|
|
496
|
+
requests_response = await self._upload_json_data(request_data, requests_url, use_gzip=True)
|
|
498
497
|
if requests_response.status not in [200, 201, 204]:
|
|
499
498
|
self._client.logger.error(f"Requests upload error: {requests_response.text()}")
|
|
500
|
-
raise Exception(f"Failed to upload request
|
|
499
|
+
raise Exception(f"Failed to upload request data: {requests_response.text()}")
|
|
501
500
|
except Exception as e:
|
|
502
501
|
raise Exception(f"Error uploading request JSON file {request_json}: {e}")
|
|
503
|
-
|
|
502
|
+
|
|
504
503
|
if not manifest_url:
|
|
505
504
|
raise ValueError("No manifest_url returned from server for manifest JSON upload")
|
|
506
505
|
|
|
507
|
-
# Upload manifest JSON data directly (no temporary file needed)
|
|
508
506
|
try:
|
|
509
507
|
manifest_response = await self._upload_json_data(manifest_groups, manifest_url, use_gzip=False)
|
|
510
508
|
if manifest_response.status not in [200, 201, 204]:
|
|
@@ -513,7 +511,6 @@ class MassStats:
|
|
|
513
511
|
except Exception as e:
|
|
514
512
|
raise Exception(f"Error uploading manifest JSON: {e}")
|
|
515
513
|
|
|
516
|
-
# Start the job
|
|
517
514
|
start_job_task_id = await self.start_job(upload_result.get("id"))
|
|
518
515
|
return start_job_task_id
|
|
519
516
|
|
|
@@ -557,7 +554,6 @@ class MassStats:
|
|
|
557
554
|
tile_size: int,
|
|
558
555
|
res: float,
|
|
559
556
|
output: str,
|
|
560
|
-
# region: str,
|
|
561
557
|
year_range: list[int] = None,
|
|
562
558
|
overwrite: bool = False,
|
|
563
559
|
server: str = None,
|
|
@@ -600,7 +596,6 @@ class MassStats:
|
|
|
600
596
|
payload_mapping = {
|
|
601
597
|
"year_range": year_range,
|
|
602
598
|
"server": server,
|
|
603
|
-
# "region": region,
|
|
604
599
|
"bucket": bucket,
|
|
605
600
|
}
|
|
606
601
|
for key, value in payload_mapping.items():
|
|
@@ -339,37 +339,41 @@
|
|
|
339
339
|
|
|
340
340
|
|
|
341
341
|
import asyncio
|
|
342
|
+
import concurrent.futures
|
|
343
|
+
import threading
|
|
342
344
|
import functools
|
|
343
|
-
|
|
345
|
+
import inspect
|
|
346
|
+
from typing import Optional, Dict, Any, Union, TYPE_CHECKING
|
|
344
347
|
from geopandas import GeoDataFrame
|
|
345
348
|
from shapely.geometry.base import BaseGeometry as ShapelyGeometry
|
|
346
349
|
from .async_client import AsyncClient
|
|
347
350
|
|
|
351
|
+
# Add type checking imports for better IDE support
|
|
352
|
+
if TYPE_CHECKING:
|
|
353
|
+
from .endpoints.dataset_management import DatasetManagement
|
|
354
|
+
from .endpoints.user_management import UserManagement
|
|
355
|
+
from .endpoints.mass_stats import MassStats
|
|
356
|
+
from .endpoints.group_management import GroupManagement
|
|
357
|
+
from .endpoints.space_management import SpaceManagement
|
|
358
|
+
from .endpoints.model_management import ModelManagement
|
|
359
|
+
from .endpoints.auth import AuthClient
|
|
360
|
+
|
|
348
361
|
|
|
349
362
|
class SyncWrapper:
|
|
350
|
-
"""
|
|
351
|
-
Generic synchronous wrapper with __dir__ support for runtime autocomplete.
|
|
352
|
-
"""
|
|
363
|
+
"""Generic synchronous wrapper with __dir__ support for runtime autocomplete."""
|
|
353
364
|
|
|
354
365
|
def __init__(self, async_obj, sync_client):
|
|
355
366
|
self._async_obj = async_obj
|
|
356
367
|
self._sync_client = sync_client
|
|
357
368
|
|
|
358
369
|
def __dir__(self):
|
|
359
|
-
"""
|
|
360
|
-
Return list of attributes for autocomplete in interactive environments.
|
|
361
|
-
This enables autocomplete in Jupyter/iPython after instantiation.
|
|
362
|
-
"""
|
|
370
|
+
"""Return list of attributes for autocomplete in interactive environments."""
|
|
363
371
|
async_attrs = [attr for attr in dir(self._async_obj) if not attr.startswith('_')]
|
|
364
|
-
|
|
365
372
|
wrapper_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
|
|
366
|
-
|
|
367
373
|
return list(set(async_attrs + wrapper_attrs))
|
|
368
374
|
|
|
369
375
|
def __getattr__(self, name):
|
|
370
|
-
"""
|
|
371
|
-
Dynamically wrap any method call to convert async to sync.
|
|
372
|
-
"""
|
|
376
|
+
"""Dynamically wrap any method call to convert async to sync."""
|
|
373
377
|
attr = getattr(self._async_obj, name)
|
|
374
378
|
|
|
375
379
|
if callable(attr):
|
|
@@ -386,15 +390,32 @@ class SyncWrapper:
|
|
|
386
390
|
|
|
387
391
|
class SyncClient:
|
|
388
392
|
"""
|
|
389
|
-
|
|
390
|
-
|
|
393
|
+
Thread-safe synchronous wrapper for AsyncClient.
|
|
394
|
+
Uses a persistent event loop in a dedicated thread to avoid event loop conflicts.
|
|
391
395
|
"""
|
|
396
|
+
|
|
397
|
+
# Add explicit type annotations for endpoint managers
|
|
398
|
+
datasets: 'DatasetManagement'
|
|
399
|
+
users: 'UserManagement'
|
|
400
|
+
mass_stats: 'MassStats'
|
|
401
|
+
groups: 'GroupManagement'
|
|
402
|
+
space: 'SpaceManagement'
|
|
403
|
+
model: 'ModelManagement'
|
|
404
|
+
auth: 'AuthClient'
|
|
392
405
|
|
|
393
406
|
def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, verbose: bool = False):
|
|
394
407
|
self._async_client = AsyncClient(url=url, api_key=api_key, verbose=verbose)
|
|
395
408
|
self._context_entered = False
|
|
396
409
|
self._closed = False
|
|
397
410
|
|
|
411
|
+
# Thread and event loop management
|
|
412
|
+
self._loop = None
|
|
413
|
+
self._thread = None
|
|
414
|
+
self._loop_ready = None
|
|
415
|
+
self._loop_exception = None
|
|
416
|
+
|
|
417
|
+
# Initialize endpoint managers with proper typing
|
|
418
|
+
|
|
398
419
|
self.datasets = SyncWrapper(self._async_client.datasets, self)
|
|
399
420
|
self.users = SyncWrapper(self._async_client.users, self)
|
|
400
421
|
self.mass_stats = SyncWrapper(self._async_client.mass_stats, self)
|
|
@@ -406,21 +427,150 @@ class SyncClient:
|
|
|
406
427
|
import atexit
|
|
407
428
|
atexit.register(self._cleanup)
|
|
408
429
|
|
|
409
|
-
def
|
|
430
|
+
def _ensure_event_loop(self) -> None:
|
|
431
|
+
"""Ensure we have a persistent event loop in a dedicated thread."""
|
|
432
|
+
if self._loop is None or self._loop.is_closed():
|
|
433
|
+
self._loop_ready = threading.Event()
|
|
434
|
+
self._loop_exception = None
|
|
435
|
+
|
|
436
|
+
def run_loop():
|
|
437
|
+
"""Run the event loop in a dedicated thread."""
|
|
438
|
+
try:
|
|
439
|
+
# Create a new event loop for this thread
|
|
440
|
+
self._loop = asyncio.new_event_loop()
|
|
441
|
+
asyncio.set_event_loop(self._loop)
|
|
442
|
+
|
|
443
|
+
# Signal that the loop is ready
|
|
444
|
+
self._loop_ready.set()
|
|
445
|
+
|
|
446
|
+
# Run the loop forever (until stopped)
|
|
447
|
+
self._loop.run_forever()
|
|
448
|
+
except Exception as e:
|
|
449
|
+
self._loop_exception = e
|
|
450
|
+
self._loop_ready.set()
|
|
451
|
+
finally:
|
|
452
|
+
# Clean up when the loop stops
|
|
453
|
+
if self._loop and not self._loop.is_closed():
|
|
454
|
+
self._loop.close()
|
|
455
|
+
|
|
456
|
+
# Start the thread
|
|
457
|
+
self._thread = threading.Thread(target=run_loop, daemon=True)
|
|
458
|
+
self._thread.start()
|
|
459
|
+
|
|
460
|
+
# Wait for the loop to be ready
|
|
461
|
+
self._loop_ready.wait(timeout=10)
|
|
462
|
+
|
|
463
|
+
if self._loop_exception:
|
|
464
|
+
raise self._loop_exception
|
|
465
|
+
|
|
466
|
+
if not self._loop_ready.is_set():
|
|
467
|
+
raise RuntimeError("Event loop failed to start within timeout")
|
|
468
|
+
|
|
469
|
+
def _run_async(self, coro):
|
|
410
470
|
"""
|
|
411
|
-
|
|
412
|
-
This
|
|
471
|
+
Run async coroutine using persistent event loop.
|
|
472
|
+
This is the core method that makes everything work.
|
|
413
473
|
"""
|
|
414
|
-
|
|
474
|
+
# Ensure we have an event loop
|
|
475
|
+
self._ensure_event_loop()
|
|
415
476
|
|
|
416
|
-
|
|
477
|
+
if self._loop.is_closed():
|
|
478
|
+
raise RuntimeError("Event loop is closed")
|
|
417
479
|
|
|
418
|
-
|
|
480
|
+
# Create a future to get the result back from the event loop thread
|
|
481
|
+
future = concurrent.futures.Future()
|
|
419
482
|
|
|
420
|
-
|
|
483
|
+
async def run_with_context():
|
|
484
|
+
"""Run the coroutine with proper context management."""
|
|
485
|
+
try:
|
|
486
|
+
# Ensure the async client is properly initialized
|
|
487
|
+
await self._ensure_context()
|
|
488
|
+
|
|
489
|
+
# Run the actual coroutine
|
|
490
|
+
result = await coro
|
|
491
|
+
|
|
492
|
+
# Set the result on the future
|
|
493
|
+
future.set_result(result)
|
|
494
|
+
except Exception as e:
|
|
495
|
+
# Set the exception on the future
|
|
496
|
+
future.set_exception(e)
|
|
497
|
+
|
|
498
|
+
# Schedule the coroutine on the persistent event loop
|
|
499
|
+
self._loop.call_soon_threadsafe(
|
|
500
|
+
lambda: asyncio.create_task(run_with_context())
|
|
501
|
+
)
|
|
502
|
+
|
|
421
503
|
|
|
504
|
+
# Wait for the result (with timeout to avoid hanging)
|
|
505
|
+
try:
|
|
506
|
+
return future.result(timeout=300) # 5 minute timeout
|
|
507
|
+
except concurrent.futures.TimeoutError:
|
|
508
|
+
raise RuntimeError("Async operation timed out after 5 minutes")
|
|
509
|
+
|
|
510
|
+
async def _ensure_context(self) -> None:
|
|
511
|
+
"""Ensure the async client context is entered."""
|
|
512
|
+
if not self._context_entered and not self._closed:
|
|
513
|
+
await self._async_client.__aenter__()
|
|
514
|
+
self._context_entered = True
|
|
515
|
+
|
|
516
|
+
async def _exit_context(self) -> None:
|
|
517
|
+
"""Exit the async client context."""
|
|
518
|
+
if self._context_entered and not self._closed:
|
|
519
|
+
await self._async_client.__aexit__(None, None, None)
|
|
520
|
+
self._context_entered = False
|
|
521
|
+
|
|
522
|
+
def close(self) -> None:
|
|
523
|
+
"""Close the underlying async client session and stop the event loop."""
|
|
524
|
+
if not self._closed:
|
|
525
|
+
if self._loop and not self._loop.is_closed():
|
|
526
|
+
# Schedule cleanup on the event loop
|
|
527
|
+
future = concurrent.futures.Future()
|
|
528
|
+
|
|
529
|
+
async def cleanup():
|
|
530
|
+
"""Clean up the async client."""
|
|
531
|
+
try:
|
|
532
|
+
await self._exit_context()
|
|
533
|
+
future.set_result(None)
|
|
534
|
+
except Exception as e:
|
|
535
|
+
future.set_exception(e)
|
|
536
|
+
|
|
537
|
+
# Run cleanup
|
|
538
|
+
self._loop.call_soon_threadsafe(
|
|
539
|
+
lambda: asyncio.create_task(cleanup())
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
# Wait for cleanup to complete
|
|
543
|
+
try:
|
|
544
|
+
future.result(timeout=10)
|
|
545
|
+
except:
|
|
546
|
+
pass # Ignore cleanup errors
|
|
547
|
+
|
|
548
|
+
# Stop the event loop
|
|
549
|
+
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
550
|
+
|
|
551
|
+
# Wait for thread to finish
|
|
552
|
+
if self._thread and self._thread.is_alive():
|
|
553
|
+
self._thread.join(timeout=5)
|
|
554
|
+
|
|
555
|
+
self._closed = True
|
|
556
|
+
|
|
557
|
+
def _cleanup(self) -> None:
|
|
558
|
+
"""Internal cleanup method called by atexit."""
|
|
559
|
+
if not self._closed:
|
|
560
|
+
try:
|
|
561
|
+
self.close()
|
|
562
|
+
except Exception:
|
|
563
|
+
pass # Ignore cleanup errors
|
|
564
|
+
|
|
565
|
+
def __dir__(self):
|
|
566
|
+
"""Return list of attributes for autocomplete in interactive environments."""
|
|
567
|
+
default_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
|
|
568
|
+
async_client_attrs = [attr for attr in dir(self._async_client) if not attr.startswith('_')]
|
|
569
|
+
endpoint_attrs = ['datasets', 'users', 'mass_stats', 'groups', 'space', 'model', 'auth']
|
|
570
|
+
all_attrs = default_attrs + async_client_attrs + endpoint_attrs
|
|
422
571
|
return list(set(all_attrs))
|
|
423
572
|
|
|
573
|
+
# Your existing methods with proper type annotations
|
|
424
574
|
def geoquery(
|
|
425
575
|
self,
|
|
426
576
|
expr: str,
|
|
@@ -445,15 +595,18 @@ class SyncClient:
|
|
|
445
595
|
return self._run_async(coro)
|
|
446
596
|
|
|
447
597
|
def zonal_stats(
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
598
|
+
self,
|
|
599
|
+
gdf: GeoDataFrame,
|
|
600
|
+
expr: str,
|
|
601
|
+
conc: int = 20,
|
|
602
|
+
in_crs: str = "epsg:4326",
|
|
603
|
+
out_crs: str = "epsg:4326",
|
|
604
|
+
resolution: int = -1,
|
|
605
|
+
geom_fix: bool = False,
|
|
606
|
+
mass_stats: bool = False,
|
|
607
|
+
id_column: Optional[str] = None,
|
|
608
|
+
) -> GeoDataFrame:
|
|
609
|
+
|
|
457
610
|
"""
|
|
458
611
|
Compute zonal statistics for all geometries in a GeoDataFrame (synchronous version).
|
|
459
612
|
|
|
@@ -465,6 +618,8 @@ class SyncClient:
|
|
|
465
618
|
out_crs (str): Output coordinate reference system
|
|
466
619
|
resolution (int): Resolution parameter
|
|
467
620
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
621
|
+
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
622
|
+
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
468
623
|
|
|
469
624
|
Returns:
|
|
470
625
|
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
@@ -481,6 +636,8 @@ class SyncClient:
|
|
|
481
636
|
out_crs=out_crs,
|
|
482
637
|
resolution=resolution,
|
|
483
638
|
geom_fix=geom_fix,
|
|
639
|
+
mass_stats=mass_stats,
|
|
640
|
+
id_column=id_column,
|
|
484
641
|
)
|
|
485
642
|
return self._run_async(coro)
|
|
486
643
|
|
|
@@ -516,79 +673,59 @@ class SyncClient:
|
|
|
516
673
|
)
|
|
517
674
|
return self._run_async(coro)
|
|
518
675
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
self._closed = True
|
|
562
|
-
|
|
563
|
-
def _cleanup(self):
|
|
564
|
-
"""Internal cleanup method called by atexit."""
|
|
565
|
-
if not self._closed:
|
|
566
|
-
try:
|
|
567
|
-
self.close()
|
|
568
|
-
except Exception:
|
|
569
|
-
pass
|
|
676
|
+
def geo_queries(
|
|
677
|
+
self,
|
|
678
|
+
queries: list[dict],
|
|
679
|
+
conc: int = 20,
|
|
680
|
+
) -> Union[float, GeoDataFrame]:
|
|
681
|
+
"""
|
|
682
|
+
Execute multiple geo queries concurrently (synchronous version).
|
|
683
|
+
|
|
684
|
+
Args:
|
|
685
|
+
queries (list[dict]): List of dictionaries containing query parameters.
|
|
686
|
+
Each query must have 'expr', 'feature', and 'in_crs' keys.
|
|
687
|
+
conc (int): Number of concurrent requests to make (default 20, max 100)
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
Union[float, geopandas.GeoDataFrame]:
|
|
691
|
+
- float: Average of all results if results are integers
|
|
692
|
+
- GeoDataFrame: GeoDataFrame with geometry and dataset columns if results are xarray datasets
|
|
693
|
+
|
|
694
|
+
Raises:
|
|
695
|
+
ValueError: If queries list is empty, concurrency is too high, or queries are malformed
|
|
696
|
+
APIError: If the API request fails
|
|
697
|
+
|
|
698
|
+
Example:
|
|
699
|
+
queries = [
|
|
700
|
+
{
|
|
701
|
+
'expr': 'WCF.wcf',
|
|
702
|
+
'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
|
|
703
|
+
'in_crs': 'epsg:4326'
|
|
704
|
+
},
|
|
705
|
+
{
|
|
706
|
+
'expr': 'NDVI.ndvi',
|
|
707
|
+
'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
|
|
708
|
+
'in_crs': 'epsg:4326'
|
|
709
|
+
}
|
|
710
|
+
]
|
|
711
|
+
result = client.geo_queries(queries)
|
|
712
|
+
"""
|
|
713
|
+
coro = self._async_client.geo_queries(
|
|
714
|
+
queries=queries,
|
|
715
|
+
conc=conc,
|
|
716
|
+
)
|
|
717
|
+
return self._run_async(coro)
|
|
570
718
|
|
|
571
|
-
|
|
719
|
+
# Context manager support
|
|
720
|
+
def __enter__(self) -> 'SyncClient':
|
|
572
721
|
"""Context manager entry."""
|
|
573
|
-
async def enter_async():
|
|
574
|
-
await self._ensure_context()
|
|
575
|
-
|
|
576
|
-
try:
|
|
577
|
-
loop = asyncio.get_running_loop()
|
|
578
|
-
import concurrent.futures
|
|
579
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
580
|
-
future = executor.submit(asyncio.run, enter_async())
|
|
581
|
-
future.result()
|
|
582
|
-
except RuntimeError:
|
|
583
|
-
asyncio.run(enter_async())
|
|
584
|
-
|
|
585
722
|
return self
|
|
586
723
|
|
|
587
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
724
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
588
725
|
"""Context manager exit."""
|
|
589
726
|
self.close()
|
|
590
727
|
|
|
591
|
-
def __del__(self):
|
|
728
|
+
def __del__(self) -> None:
|
|
592
729
|
"""Destructor to ensure session is closed."""
|
|
593
730
|
if not self._closed:
|
|
594
731
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -25,6 +25,9 @@ Requires-Dist: google-cloud-storage>=2.0.0
|
|
|
25
25
|
Requires-Dist: scipy>=1.7.0
|
|
26
26
|
Requires-Dist: nest_asyncio
|
|
27
27
|
Requires-Dist: onnxruntime>=1.10.0
|
|
28
|
+
Requires-Dist: psutil>=5.0.0
|
|
29
|
+
Requires-Dist: h5netcdf>=1.0.0
|
|
30
|
+
Requires-Dist: netcdf4>=1.5.0
|
|
28
31
|
Provides-Extra: ml
|
|
29
32
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
30
33
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|