terrakio-core 0.4.5__tar.gz → 0.4.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/PKG-INFO +5 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/pyproject.toml +5 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/async_client.py +14 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/convenience_functions/convenience_functions.py +137 -35
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/mass_stats.py +143 -16
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/helper/tiles.py +13 -12
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/sync_client.py +202 -140
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core.egg-info/PKG-INFO +5 -1
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core.egg-info/requires.txt +4 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/README.md +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/setup.cfg +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/accessors.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/client.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/config.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/auth.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/dataset_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/group_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/model_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/space_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/endpoints/user_management.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/exceptions.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/helper/bounded_taskgroup.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core/helper/decorators.py +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core.egg-info/SOURCES.txt +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core.egg-info/dependency_links.txt +0 -0
- {terrakio_core-0.4.5 → terrakio_core-0.4.7}/terrakio_core.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.7
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -25,6 +25,10 @@ Requires-Dist: google-cloud-storage>=2.0.0
|
|
|
25
25
|
Requires-Dist: scipy>=1.7.0
|
|
26
26
|
Requires-Dist: nest_asyncio
|
|
27
27
|
Requires-Dist: onnxruntime>=1.10.0
|
|
28
|
+
Requires-Dist: psutil>=5.0.0
|
|
29
|
+
Requires-Dist: h5netcdf>=1.0.0
|
|
30
|
+
Requires-Dist: netcdf4>=1.5.0
|
|
31
|
+
Requires-Dist: aiofiles>=24.1.0
|
|
28
32
|
Provides-Extra: ml
|
|
29
33
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
30
34
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "terrakio-core"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.7"
|
|
8
8
|
authors = [
|
|
9
9
|
{name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
|
|
10
10
|
]
|
|
@@ -32,6 +32,10 @@ dependencies = [
|
|
|
32
32
|
"scipy>=1.7.0",
|
|
33
33
|
"nest_asyncio",
|
|
34
34
|
"onnxruntime>=1.10.0",
|
|
35
|
+
"psutil>=5.0.0",
|
|
36
|
+
"h5netcdf>=1.0.0",
|
|
37
|
+
"netcdf4>=1.5.0",
|
|
38
|
+
"aiofiles>=24.1.0"
|
|
35
39
|
]
|
|
36
40
|
|
|
37
41
|
[project.optional-dependencies]
|
|
@@ -196,6 +196,8 @@ class AsyncClient(BaseClient):
|
|
|
196
196
|
out_crs: str = "epsg:4326",
|
|
197
197
|
resolution: int = -1,
|
|
198
198
|
geom_fix: bool = False,
|
|
199
|
+
mass_stats: bool = False,
|
|
200
|
+
id_column: Optional[str] = None,
|
|
199
201
|
):
|
|
200
202
|
"""
|
|
201
203
|
Compute zonal statistics for all geometries in a GeoDataFrame.
|
|
@@ -208,6 +210,8 @@ class AsyncClient(BaseClient):
|
|
|
208
210
|
out_crs (str): Output coordinate reference system
|
|
209
211
|
resolution (int): Resolution parameter
|
|
210
212
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
213
|
+
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
214
|
+
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
211
215
|
|
|
212
216
|
Returns:
|
|
213
217
|
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
@@ -225,10 +229,13 @@ class AsyncClient(BaseClient):
|
|
|
225
229
|
out_crs=out_crs,
|
|
226
230
|
resolution=resolution,
|
|
227
231
|
geom_fix=geom_fix,
|
|
232
|
+
mass_stats=mass_stats,
|
|
233
|
+
id_column=id_column,
|
|
228
234
|
)
|
|
229
235
|
|
|
230
236
|
async def create_dataset_file(
|
|
231
237
|
self,
|
|
238
|
+
name: str,
|
|
232
239
|
aoi: str,
|
|
233
240
|
expression: str,
|
|
234
241
|
output: str,
|
|
@@ -241,6 +248,9 @@ class AsyncClient(BaseClient):
|
|
|
241
248
|
non_interactive: bool = True,
|
|
242
249
|
poll_interval: int = 30,
|
|
243
250
|
download_path: str = "/home/user/Downloads",
|
|
251
|
+
mask = True,
|
|
252
|
+
max_file_size_mb: int = 5120, # Default to 5GB
|
|
253
|
+
tile_size: int = 1024,
|
|
244
254
|
) -> dict:
|
|
245
255
|
"""
|
|
246
256
|
Create a dataset file using mass stats operations.
|
|
@@ -280,6 +290,10 @@ class AsyncClient(BaseClient):
|
|
|
280
290
|
non_interactive=non_interactive,
|
|
281
291
|
poll_interval=poll_interval,
|
|
282
292
|
download_path=download_path,
|
|
293
|
+
name=name,
|
|
294
|
+
mask=mask,
|
|
295
|
+
max_file_size_mb=max_file_size_mb,
|
|
296
|
+
tile_size=tile_size
|
|
283
297
|
)
|
|
284
298
|
|
|
285
299
|
async def geo_queries(
|
|
@@ -24,6 +24,8 @@ import pyproj
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
import geopandas as gpd
|
|
26
26
|
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
27
29
|
def expand_on_time(gdf):
|
|
28
30
|
"""
|
|
29
31
|
Expand datasets on time dimension - each time becomes a new row.
|
|
@@ -359,18 +361,119 @@ async def local_or_remote(
|
|
|
359
361
|
"local_or_remote": "local",
|
|
360
362
|
"reason": "The number of the requests is not too large, and the time taking for making these requests is not too long, and the size of the dataset is not too large",
|
|
361
363
|
}
|
|
364
|
+
|
|
365
|
+
def gdf_to_json(
|
|
366
|
+
gdf: GeoDataFrame,
|
|
367
|
+
expr: str,
|
|
368
|
+
in_crs: str = "epsg:4326",
|
|
369
|
+
out_crs: str = "epsg:4326",
|
|
370
|
+
resolution: int = -1,
|
|
371
|
+
geom_fix: bool = False,
|
|
372
|
+
id_column: Optional[str] = None,
|
|
373
|
+
):
|
|
374
|
+
"""
|
|
375
|
+
Convert a GeoDataFrame to a list of JSON requests for mass_stats processing.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
gdf: GeoDataFrame containing geometries and optional metadata
|
|
379
|
+
expr: Expression to evaluate
|
|
380
|
+
in_crs: Input coordinate reference system
|
|
381
|
+
out_crs: Output coordinate reference system
|
|
382
|
+
resolution: Resolution parameter
|
|
383
|
+
geom_fix: Whether to fix geometry issues
|
|
384
|
+
id_column: Optional column name to use for group and file names
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
list: List of dictionaries formatted for mass_stats requests
|
|
388
|
+
"""
|
|
389
|
+
mass_stats_requests = []
|
|
390
|
+
|
|
391
|
+
# Loop through each row in the GeoDataFrame
|
|
392
|
+
for idx, row in gdf.iterrows():
|
|
393
|
+
# Create the request feature
|
|
394
|
+
request_feature = {
|
|
395
|
+
"expr": expr,
|
|
396
|
+
"feature": {
|
|
397
|
+
"type": "Feature",
|
|
398
|
+
"geometry": mapping(gdf.geometry.iloc[idx]),
|
|
399
|
+
"properties": {}
|
|
400
|
+
},
|
|
401
|
+
"in_crs": in_crs,
|
|
402
|
+
"out_crs": out_crs,
|
|
403
|
+
"resolution": resolution,
|
|
404
|
+
"geom_fix": geom_fix,
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
# Determine group name and file name based on id_column
|
|
408
|
+
if id_column is not None and id_column in gdf.columns:
|
|
409
|
+
# Use the value from the specified column as group and file name
|
|
410
|
+
identifier = str(row[id_column])
|
|
411
|
+
group_name = f"group_{identifier}"
|
|
412
|
+
file_name = f"file_{identifier}"
|
|
413
|
+
else:
|
|
414
|
+
# Use the index as group and file name
|
|
415
|
+
group_name = f"group_{idx}"
|
|
416
|
+
file_name = f"file_{idx}"
|
|
417
|
+
|
|
418
|
+
# Create the complete request entry
|
|
419
|
+
request_entry = {
|
|
420
|
+
"group": group_name,
|
|
421
|
+
"file": file_name,
|
|
422
|
+
"request": request_feature,
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
# Add the request to our list
|
|
426
|
+
mass_stats_requests.append(request_entry)
|
|
427
|
+
|
|
428
|
+
return mass_stats_requests
|
|
429
|
+
|
|
430
|
+
async def handle_mass_stats(
|
|
431
|
+
client,
|
|
432
|
+
gdf: GeoDataFrame,
|
|
433
|
+
expr: str,
|
|
434
|
+
in_crs: str = "epsg:4326",
|
|
435
|
+
out_crs: str = "epsg:4326",
|
|
436
|
+
resolution: int = -1,
|
|
437
|
+
geom_fix: bool = False,
|
|
438
|
+
id_column: Optional[str] = None,
|
|
439
|
+
|
|
440
|
+
):
|
|
441
|
+
request_json = gdf_to_json(gdf = gdf, expr = expr, in_crs = in_crs, out_crs = out_crs, resolution = resolution, geom_fix = geom_fix, id_column = id_column)
|
|
442
|
+
job_id =await client.mass_stats.execute_job(
|
|
443
|
+
name = "zonal_stats_job",
|
|
444
|
+
output = "netcdf",
|
|
445
|
+
config = {},
|
|
446
|
+
request_json = request_json,
|
|
447
|
+
overwrite = True,
|
|
448
|
+
)
|
|
449
|
+
return job_id
|
|
362
450
|
|
|
363
451
|
async def zonal_stats(
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
452
|
+
client,
|
|
453
|
+
gdf: GeoDataFrame,
|
|
454
|
+
expr: str,
|
|
455
|
+
conc: int = 20,
|
|
456
|
+
in_crs: str = "epsg:4326",
|
|
457
|
+
out_crs: str = "epsg:4326",
|
|
458
|
+
resolution: int = -1,
|
|
459
|
+
geom_fix: bool = False,
|
|
460
|
+
mass_stats: bool = False,
|
|
461
|
+
id_column: Optional[str] = None,
|
|
372
462
|
):
|
|
373
463
|
"""Compute zonal statistics for all geometries in a GeoDataFrame."""
|
|
464
|
+
|
|
465
|
+
if mass_stats:
|
|
466
|
+
mass_stats_id = await handle_mass_stats(
|
|
467
|
+
client = client,
|
|
468
|
+
gdf = gdf,
|
|
469
|
+
expr = expr,
|
|
470
|
+
in_crs = in_crs,
|
|
471
|
+
out_crs = out_crs,
|
|
472
|
+
resolution = resolution,
|
|
473
|
+
geom_fix = geom_fix,
|
|
474
|
+
id_column = id_column
|
|
475
|
+
)
|
|
476
|
+
return mass_stats_id
|
|
374
477
|
quries = []
|
|
375
478
|
for i in range(len(gdf)):
|
|
376
479
|
quries.append({
|
|
@@ -385,6 +488,7 @@ async def zonal_stats(
|
|
|
385
488
|
"resolution": resolution,
|
|
386
489
|
"geom_fix": geom_fix,
|
|
387
490
|
})
|
|
491
|
+
|
|
388
492
|
local_or_remote_result = await local_or_remote(client= client, quries = quries)
|
|
389
493
|
if local_or_remote_result["local_or_remote"] == "remote":
|
|
390
494
|
raise ValueError(local_or_remote_result["reason"])
|
|
@@ -398,30 +502,35 @@ async def create_dataset_file(
|
|
|
398
502
|
aoi: str,
|
|
399
503
|
expression: str,
|
|
400
504
|
output: str,
|
|
505
|
+
download_path: str,
|
|
401
506
|
in_crs: str = "epsg:4326",
|
|
402
|
-
res: float = 0.0001,
|
|
403
|
-
region: str = "aus",
|
|
404
507
|
to_crs: str = "epsg:4326",
|
|
405
|
-
|
|
508
|
+
res: float = 0.0001,
|
|
509
|
+
region: str = None,
|
|
510
|
+
overwrite: bool = False,
|
|
406
511
|
skip_existing: bool = False,
|
|
407
512
|
non_interactive: bool = True,
|
|
513
|
+
name: str | None = None,
|
|
408
514
|
poll_interval: int = 30,
|
|
409
|
-
|
|
515
|
+
max_file_size_mb: int = 5120,
|
|
516
|
+
tile_size: int = 1024,
|
|
517
|
+
mask: bool = True
|
|
410
518
|
) -> dict:
|
|
411
519
|
|
|
412
|
-
|
|
520
|
+
if not name:
|
|
521
|
+
name = f"file-gen-{uuid.uuid4().hex[:8]}"
|
|
413
522
|
|
|
414
523
|
body, reqs, groups = tiles(
|
|
415
524
|
name = name,
|
|
416
525
|
aoi = aoi,
|
|
417
526
|
expression = expression,
|
|
418
527
|
output = output,
|
|
419
|
-
tile_size =
|
|
528
|
+
tile_size = tile_size,
|
|
420
529
|
crs = in_crs,
|
|
421
530
|
res = res,
|
|
422
531
|
region = region,
|
|
423
532
|
to_crs = to_crs,
|
|
424
|
-
|
|
533
|
+
mask = mask,
|
|
425
534
|
overwrite = overwrite,
|
|
426
535
|
skip_existing = skip_existing,
|
|
427
536
|
non_interactive = non_interactive
|
|
@@ -429,9 +538,6 @@ async def create_dataset_file(
|
|
|
429
538
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
|
|
430
539
|
tempreq.write(reqs)
|
|
431
540
|
tempreqname = tempreq.name
|
|
432
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempmanifest:
|
|
433
|
-
tempmanifest.write(groups)
|
|
434
|
-
tempmanifestname = tempmanifest.name
|
|
435
541
|
|
|
436
542
|
task_id = await client.mass_stats.execute_job(
|
|
437
543
|
name=body["name"],
|
|
@@ -441,28 +547,24 @@ async def create_dataset_file(
|
|
|
441
547
|
overwrite=body["overwrite"],
|
|
442
548
|
skip_existing=body["skip_existing"],
|
|
443
549
|
request_json=tempreqname,
|
|
444
|
-
manifest_json=tempmanifestname,
|
|
445
550
|
)
|
|
446
551
|
|
|
447
552
|
start_time = time.time()
|
|
448
553
|
status = None
|
|
449
|
-
|
|
554
|
+
client.logger.info(f"Tracking data generation job {task_id['task_id']}...")
|
|
450
555
|
while True:
|
|
451
556
|
try:
|
|
452
557
|
taskid = task_id['task_id']
|
|
453
558
|
trackinfo = await client.mass_stats.track_job([taskid])
|
|
454
|
-
client.logger.info("the trackinfo is: ", trackinfo)
|
|
455
559
|
status = trackinfo[taskid]['status']
|
|
456
|
-
|
|
457
560
|
if status == 'Completed':
|
|
458
|
-
client.logger.info('
|
|
561
|
+
client.logger.info('Data generated successfully!')
|
|
459
562
|
break
|
|
460
563
|
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
461
564
|
raise RuntimeError(f"Job {taskid} failed with status: {status}")
|
|
462
565
|
else:
|
|
463
566
|
elapsed_time = time.time() - start_time
|
|
464
|
-
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s"
|
|
465
|
-
|
|
567
|
+
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s")
|
|
466
568
|
await asyncio.sleep(poll_interval)
|
|
467
569
|
|
|
468
570
|
|
|
@@ -474,16 +576,15 @@ async def create_dataset_file(
|
|
|
474
576
|
raise
|
|
475
577
|
|
|
476
578
|
os.unlink(tempreqname)
|
|
477
|
-
os.unlink(tempmanifestname)
|
|
478
579
|
|
|
479
|
-
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"])
|
|
580
|
+
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"], max_file_size_mb=max_file_size_mb)
|
|
480
581
|
combine_task_id = combine_result.get("task_id")
|
|
481
582
|
|
|
482
583
|
combine_start_time = time.time()
|
|
584
|
+
client.logger.info(f"Tracking file generation job {combine_task_id}...")
|
|
483
585
|
while True:
|
|
484
586
|
try:
|
|
485
587
|
trackinfo = await client.mass_stats.track_job([combine_task_id])
|
|
486
|
-
client.logger.info('client create dataset file track info:', trackinfo)
|
|
487
588
|
if body["output"] == "netcdf":
|
|
488
589
|
download_file_name = trackinfo[combine_task_id]['folder'] + '.nc'
|
|
489
590
|
elif body["output"] == "geotiff":
|
|
@@ -491,19 +592,19 @@ async def create_dataset_file(
|
|
|
491
592
|
bucket = trackinfo[combine_task_id]['bucket']
|
|
492
593
|
combine_status = trackinfo[combine_task_id]['status']
|
|
493
594
|
if combine_status == 'Completed':
|
|
494
|
-
client.logger.info('
|
|
595
|
+
client.logger.info('File/s generated successfully!')
|
|
495
596
|
break
|
|
496
597
|
elif combine_status in ['Failed', 'Cancelled', 'Error']:
|
|
497
|
-
raise RuntimeError(f"
|
|
598
|
+
raise RuntimeError(f"File generation job {combine_task_id} failed with status: {combine_status}")
|
|
498
599
|
else:
|
|
499
600
|
elapsed_time = time.time() - combine_start_time
|
|
500
|
-
client.logger.info(f"
|
|
601
|
+
client.logger.info(f"File generation job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s")
|
|
501
602
|
time.sleep(poll_interval)
|
|
502
603
|
except KeyboardInterrupt:
|
|
503
|
-
client.logger.info(f"\nInterrupted!
|
|
604
|
+
client.logger.info(f"\nInterrupted! File generation job {combine_task_id} is still running in the background.")
|
|
504
605
|
raise
|
|
505
606
|
except Exception as e:
|
|
506
|
-
client.logger.info(f"\nError tracking
|
|
607
|
+
client.logger.info(f"\nError tracking file generation job: {e}")
|
|
507
608
|
raise
|
|
508
609
|
|
|
509
610
|
if download_path:
|
|
@@ -511,11 +612,12 @@ async def create_dataset_file(
|
|
|
511
612
|
job_name=body["name"],
|
|
512
613
|
bucket=bucket,
|
|
513
614
|
file_type='processed',
|
|
514
|
-
|
|
615
|
+
folder='file-gen',
|
|
616
|
+
page_size=100,
|
|
515
617
|
output_path=download_path,
|
|
516
618
|
)
|
|
517
619
|
else:
|
|
518
620
|
path = f"{body['name']}/outputs/merged/{download_file_name}"
|
|
519
|
-
client.logger.info(f"
|
|
621
|
+
client.logger.info(f"Dataset file/s is available at {path}")
|
|
520
622
|
|
|
521
623
|
return {"generation_task_id": task_id, "combine_task_id": combine_task_id}
|
|
@@ -7,6 +7,13 @@ from urllib.parse import urlparse
|
|
|
7
7
|
from ..helper.decorators import require_token, require_api_key, require_auth
|
|
8
8
|
import aiohttp
|
|
9
9
|
from typing import Dict, Any, Optional, List, Union
|
|
10
|
+
import asyncio
|
|
11
|
+
import xarray as xr
|
|
12
|
+
from io import BytesIO
|
|
13
|
+
import geopandas as gpd
|
|
14
|
+
from shapely.geometry import shape
|
|
15
|
+
from ..convenience_functions.convenience_functions import expand_on_variables_and_time
|
|
16
|
+
|
|
10
17
|
class MassStats:
|
|
11
18
|
def __init__(self, client):
|
|
12
19
|
self._client = client
|
|
@@ -19,6 +26,7 @@ class MassStats:
|
|
|
19
26
|
sample: str,
|
|
20
27
|
output: str,
|
|
21
28
|
config: Dict[str, Any],
|
|
29
|
+
region: str = None,
|
|
22
30
|
overwrite: bool = False,
|
|
23
31
|
skip_existing: bool = False,
|
|
24
32
|
location: Optional[str] = None,
|
|
@@ -55,7 +63,8 @@ class MassStats:
|
|
|
55
63
|
"config": config,
|
|
56
64
|
"overwrite": overwrite,
|
|
57
65
|
"skip_existing": skip_existing,
|
|
58
|
-
"server": server
|
|
66
|
+
"server": server,
|
|
67
|
+
"region": region
|
|
59
68
|
}
|
|
60
69
|
payload_mapping = {
|
|
61
70
|
"location": location,
|
|
@@ -66,7 +75,6 @@ class MassStats:
|
|
|
66
75
|
payload[key] = str(value).lower()
|
|
67
76
|
return await self._client._terrakio_request("POST", "mass_stats/upload", json=payload)
|
|
68
77
|
|
|
69
|
-
|
|
70
78
|
@require_api_key
|
|
71
79
|
async def start_job(self, id: str) -> Dict[str, Any]:
|
|
72
80
|
"""
|
|
@@ -276,6 +284,7 @@ class MassStats:
|
|
|
276
284
|
bucket: str,
|
|
277
285
|
file_type: str,
|
|
278
286
|
output_path: str,
|
|
287
|
+
folder: str = None,
|
|
279
288
|
page_size: int = None,
|
|
280
289
|
) -> list:
|
|
281
290
|
"""
|
|
@@ -303,7 +312,8 @@ class MassStats:
|
|
|
303
312
|
request_body = {
|
|
304
313
|
"job_name": job_name,
|
|
305
314
|
"bucket": bucket,
|
|
306
|
-
"file_type": file_type
|
|
315
|
+
"file_type": file_type,
|
|
316
|
+
"folder": folder
|
|
307
317
|
}
|
|
308
318
|
|
|
309
319
|
output_dir = Path(output_path)
|
|
@@ -311,8 +321,7 @@ class MassStats:
|
|
|
311
321
|
output_files = []
|
|
312
322
|
|
|
313
323
|
async def download_urls_batch(download_urls, session):
|
|
314
|
-
for url in download_urls:
|
|
315
|
-
self._client.logger.info(f"Processing download URL: {url}")
|
|
324
|
+
for i, url in enumerate(download_urls):
|
|
316
325
|
parsed = urlparse(url)
|
|
317
326
|
path_parts = Path(parsed.path).parts
|
|
318
327
|
try:
|
|
@@ -322,7 +331,7 @@ class MassStats:
|
|
|
322
331
|
subpath = Path(path_parts[-1])
|
|
323
332
|
file_save_path = output_dir / subpath
|
|
324
333
|
file_save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
325
|
-
self._client.logger.info(f"Downloading file to {file_save_path}")
|
|
334
|
+
self._client.logger.info(f"Downloading file to {file_save_path} ({i+1}/{len(download_urls)})")
|
|
326
335
|
|
|
327
336
|
async with session.get(url) as resp:
|
|
328
337
|
resp.raise_for_status()
|
|
@@ -352,7 +361,6 @@ class MassStats:
|
|
|
352
361
|
response = await self._client._terrakio_request("POST", "mass_stats/download_files", json=request_body, params=params)
|
|
353
362
|
data = response
|
|
354
363
|
|
|
355
|
-
self._client.logger.info(f'processed, endpoint response is {data}')
|
|
356
364
|
download_urls = data.get('download_urls')
|
|
357
365
|
if not download_urls:
|
|
358
366
|
break
|
|
@@ -389,18 +397,16 @@ class MassStats:
|
|
|
389
397
|
raise ValueError("Request must be a dictionary")
|
|
390
398
|
if not isinstance(request["file"], (str, int, list)):
|
|
391
399
|
raise ValueError("'file' must be a string or a list of strings")
|
|
392
|
-
# Only check the first 3 requests
|
|
393
400
|
if i == 3:
|
|
394
401
|
break
|
|
395
402
|
|
|
396
|
-
@require_api_key
|
|
397
403
|
async def execute_job(
|
|
398
404
|
self,
|
|
399
405
|
name: str,
|
|
400
|
-
# region: str,
|
|
401
406
|
output: str,
|
|
402
407
|
config: Dict[str, Any],
|
|
403
408
|
request_json: str, # Path to request JSON file
|
|
409
|
+
region: str = None,
|
|
404
410
|
overwrite: bool = False,
|
|
405
411
|
skip_existing: bool = False,
|
|
406
412
|
location: str = None,
|
|
@@ -474,7 +480,7 @@ class MassStats:
|
|
|
474
480
|
upload_result = await self._upload_request(
|
|
475
481
|
name=name,
|
|
476
482
|
size=size,
|
|
477
|
-
|
|
483
|
+
region=region,
|
|
478
484
|
sample = first_expression,
|
|
479
485
|
output=output,
|
|
480
486
|
config=config,
|
|
@@ -557,7 +563,6 @@ class MassStats:
|
|
|
557
563
|
tile_size: int,
|
|
558
564
|
res: float,
|
|
559
565
|
output: str,
|
|
560
|
-
# region: str,
|
|
561
566
|
year_range: list[int] = None,
|
|
562
567
|
overwrite: bool = False,
|
|
563
568
|
server: str = None,
|
|
@@ -600,7 +605,6 @@ class MassStats:
|
|
|
600
605
|
payload_mapping = {
|
|
601
606
|
"year_range": year_range,
|
|
602
607
|
"server": server,
|
|
603
|
-
# "region": region,
|
|
604
608
|
"bucket": bucket,
|
|
605
609
|
}
|
|
606
610
|
for key, value in payload_mapping.items():
|
|
@@ -630,7 +634,7 @@ class MassStats:
|
|
|
630
634
|
return self._client._terrakio_request("POST", "pyramids/create", json=payload)
|
|
631
635
|
|
|
632
636
|
@require_api_key
|
|
633
|
-
async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf") -> Dict[str, Any]:
|
|
637
|
+
async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf", max_file_size_mb = 5120) -> Dict[str, Any]:
|
|
634
638
|
"""
|
|
635
639
|
Combine tiles for a dataset.
|
|
636
640
|
|
|
@@ -647,7 +651,130 @@ class MassStats:
|
|
|
647
651
|
"""
|
|
648
652
|
payload = {
|
|
649
653
|
'data_name': data_name,
|
|
654
|
+
'folder': "file-gen",
|
|
650
655
|
'output': output,
|
|
651
|
-
'overwrite': str(overwrite).lower()
|
|
656
|
+
'overwrite': str(overwrite).lower(),
|
|
657
|
+
'max_file_size_mb': max_file_size_mb
|
|
652
658
|
}
|
|
653
|
-
return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
|
|
659
|
+
return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
|
|
660
|
+
|
|
661
|
+
@require_api_key
|
|
662
|
+
async def load_zonal_stats(self, job_id: str, max_files: int = 5, poll_interval: int = 30):
|
|
663
|
+
"""
|
|
664
|
+
Load zonal stats results from a completed mass stats job.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
job_id: The job ID returned from the mass stats execution
|
|
668
|
+
max_files: Maximum number of files to download (default: 5)
|
|
669
|
+
poll_interval: Seconds to wait between status checks (default: 30)
|
|
670
|
+
|
|
671
|
+
Returns:
|
|
672
|
+
GeoDataFrame with geometry and dataset columns, or None if failed
|
|
673
|
+
"""
|
|
674
|
+
try:
|
|
675
|
+
while True:
|
|
676
|
+
try:
|
|
677
|
+
track_info = await self.track_job([job_id])
|
|
678
|
+
job_info = track_info[job_id]
|
|
679
|
+
status = job_info['status']
|
|
680
|
+
|
|
681
|
+
self._client.logger.info(f"Job {job_id} status: {status}")
|
|
682
|
+
|
|
683
|
+
if status == 'Completed':
|
|
684
|
+
self._client.logger.info('Job completed successfully!')
|
|
685
|
+
break
|
|
686
|
+
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
687
|
+
raise RuntimeError(f"Job {job_id} failed with status: {status}")
|
|
688
|
+
|
|
689
|
+
await asyncio.sleep(poll_interval)
|
|
690
|
+
|
|
691
|
+
except KeyboardInterrupt:
|
|
692
|
+
self._client.logger.info(f"\nInterrupted! Job {job_id} is still running.")
|
|
693
|
+
raise
|
|
694
|
+
|
|
695
|
+
async with aiohttp.ClientSession() as session:
|
|
696
|
+
payload = {
|
|
697
|
+
"job_name": job_info['name'],
|
|
698
|
+
"file_type": "raw",
|
|
699
|
+
"bucket": job_info['bucket']
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
result = await self._client._terrakio_request("POST", "mass_stats/download_files", json=payload)
|
|
703
|
+
download_urls = result['download_urls'][:max_files]
|
|
704
|
+
|
|
705
|
+
self._client.logger.info(f"Downloading {len(download_urls)} dataset files...")
|
|
706
|
+
|
|
707
|
+
datasets = []
|
|
708
|
+
for i, url in enumerate(download_urls):
|
|
709
|
+
try:
|
|
710
|
+
self._client.logger.info(f"Downloading dataset {i+1}/{len(download_urls)}...")
|
|
711
|
+
async with session.get(url) as response:
|
|
712
|
+
if response.status == 200:
|
|
713
|
+
content = await response.read()
|
|
714
|
+
dataset = xr.open_dataset(BytesIO(content))
|
|
715
|
+
datasets.append(dataset)
|
|
716
|
+
self._client.logger.info(f"Successfully processed dataset {i+1}")
|
|
717
|
+
else:
|
|
718
|
+
self._client.logger.warning(f"Failed to download dataset {i+1}: HTTP {response.status}")
|
|
719
|
+
except Exception as e:
|
|
720
|
+
self._client.logger.error(f"Error downloading dataset {i+1}: {e}")
|
|
721
|
+
continue
|
|
722
|
+
|
|
723
|
+
if not datasets:
|
|
724
|
+
self._client.logger.warning("No datasets were successfully downloaded")
|
|
725
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
726
|
+
|
|
727
|
+
try:
|
|
728
|
+
json_response = await self._client._terrakio_request(
|
|
729
|
+
"POST", "mass_stats/download_json",
|
|
730
|
+
params={"job_name": job_info['name']}
|
|
731
|
+
)
|
|
732
|
+
json_url = json_response["download_url"]
|
|
733
|
+
|
|
734
|
+
async with session.get(json_url) as response:
|
|
735
|
+
if response.status == 200:
|
|
736
|
+
json_data = await response.json()
|
|
737
|
+
self._client.logger.info("Successfully downloaded geometry data")
|
|
738
|
+
|
|
739
|
+
geometries = []
|
|
740
|
+
max_geometries = min(max_files, len(json_data), len(datasets))
|
|
741
|
+
|
|
742
|
+
for i in range(max_geometries):
|
|
743
|
+
try:
|
|
744
|
+
geom_dict = json_data[i]["request"]["feature"]["geometry"]
|
|
745
|
+
shapely_geom = shape(geom_dict)
|
|
746
|
+
geometries.append(shapely_geom)
|
|
747
|
+
except (KeyError, ValueError) as e:
|
|
748
|
+
self._client.logger.warning(f"Error parsing geometry {i}: {e}")
|
|
749
|
+
continue
|
|
750
|
+
|
|
751
|
+
min_length = min(len(datasets), len(geometries))
|
|
752
|
+
if min_length == 0:
|
|
753
|
+
self._client.logger.warning("No matching datasets and geometries found")
|
|
754
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
755
|
+
|
|
756
|
+
gdf = gpd.GeoDataFrame({
|
|
757
|
+
'geometry': geometries[:min_length],
|
|
758
|
+
'dataset': datasets[:min_length]
|
|
759
|
+
})
|
|
760
|
+
|
|
761
|
+
self._client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
|
|
762
|
+
|
|
763
|
+
try:
|
|
764
|
+
expanded_gdf = expand_on_variables_and_time(gdf)
|
|
765
|
+
return expanded_gdf
|
|
766
|
+
except NameError:
|
|
767
|
+
self._client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
|
|
768
|
+
return gdf
|
|
769
|
+
|
|
770
|
+
else:
|
|
771
|
+
self._client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
|
|
772
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
773
|
+
|
|
774
|
+
except Exception as e:
|
|
775
|
+
self._client.logger.error(f"Error downloading geometry data: {e}")
|
|
776
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
777
|
+
|
|
778
|
+
except Exception as e:
|
|
779
|
+
self._client.logger.error(f"Failed to load zonal stats for job {job_id}: {e}")
|
|
780
|
+
return None
|
|
@@ -16,20 +16,21 @@ def get_bounds(aoi, crs, to_crs = None):
|
|
|
16
16
|
bounds = aoi.geometry[0].bounds
|
|
17
17
|
return *bounds, aoi
|
|
18
18
|
|
|
19
|
-
def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output,
|
|
20
|
-
i_max = int((x_max-x_min)/(tile_size*res))
|
|
21
|
-
j_max = int((y_max-y_min)/(tile_size*res))
|
|
22
|
-
if fully_cover:
|
|
23
|
-
i_max += 1
|
|
24
|
-
j_max += 1
|
|
19
|
+
def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output, mask):
|
|
20
|
+
i_max = int((x_max-x_min)/(tile_size*res)) + 1
|
|
21
|
+
j_max = int((y_max-y_min)/(tile_size*res)) + 1
|
|
25
22
|
for j in range(0, int(j_max)):
|
|
26
23
|
for i in range(0, int(i_max)):
|
|
27
24
|
x = x_min + i*(tile_size*res)
|
|
28
25
|
y = y_max - j*(tile_size*res)
|
|
29
|
-
|
|
30
|
-
if not aoi.geometry[0].intersects(
|
|
26
|
+
geom = shapely.geometry.box(x, y-(tile_size*res), x + (tile_size*res), y)
|
|
27
|
+
if not aoi.geometry[0].intersects(geom):
|
|
31
28
|
continue
|
|
32
|
-
|
|
29
|
+
if mask:
|
|
30
|
+
geom = geom.intersection(aoi.geometry[0])
|
|
31
|
+
if geom.is_empty:
|
|
32
|
+
continue
|
|
33
|
+
feat = {"type": "Feature", "geometry": geom.__geo_interface__}
|
|
33
34
|
data = {
|
|
34
35
|
"feature": feat,
|
|
35
36
|
"in_crs": crs,
|
|
@@ -46,15 +47,15 @@ def tiles(
|
|
|
46
47
|
aoi : str,
|
|
47
48
|
expression: str = "red=S2v2#(year,median).red@(year =2024) \n red",
|
|
48
49
|
output: str = "netcdf",
|
|
49
|
-
tile_size : float =
|
|
50
|
+
tile_size : float = 1024,
|
|
50
51
|
crs : str = "epsg:3577",
|
|
51
52
|
res: float = 10,
|
|
52
53
|
region : str = "eu",
|
|
53
54
|
to_crs: str = None,
|
|
54
|
-
fully_cover: bool = True,
|
|
55
55
|
overwrite: bool = False,
|
|
56
56
|
skip_existing: bool = False,
|
|
57
57
|
non_interactive: bool = False,
|
|
58
|
+
mask: bool = True,
|
|
58
59
|
):
|
|
59
60
|
|
|
60
61
|
reqs = []
|
|
@@ -62,7 +63,7 @@ def tiles(
|
|
|
62
63
|
|
|
63
64
|
if to_crs is None:
|
|
64
65
|
to_crs = crs
|
|
65
|
-
for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output,
|
|
66
|
+
for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output, mask):
|
|
66
67
|
req_name = f"{name}_{i:02d}_{j:02d}"
|
|
67
68
|
reqs.append({"group": "tiles", "file": req_name, "request": tile_req})
|
|
68
69
|
|
|
@@ -339,38 +339,41 @@
|
|
|
339
339
|
|
|
340
340
|
|
|
341
341
|
import asyncio
|
|
342
|
-
import functools
|
|
343
342
|
import concurrent.futures
|
|
344
|
-
|
|
343
|
+
import threading
|
|
344
|
+
import functools
|
|
345
|
+
import inspect
|
|
346
|
+
from typing import Optional, Dict, Any, Union, TYPE_CHECKING
|
|
345
347
|
from geopandas import GeoDataFrame
|
|
346
348
|
from shapely.geometry.base import BaseGeometry as ShapelyGeometry
|
|
347
349
|
from .async_client import AsyncClient
|
|
348
350
|
|
|
351
|
+
# Add type checking imports for better IDE support
|
|
352
|
+
if TYPE_CHECKING:
|
|
353
|
+
from .endpoints.dataset_management import DatasetManagement
|
|
354
|
+
from .endpoints.user_management import UserManagement
|
|
355
|
+
from .endpoints.mass_stats import MassStats
|
|
356
|
+
from .endpoints.group_management import GroupManagement
|
|
357
|
+
from .endpoints.space_management import SpaceManagement
|
|
358
|
+
from .endpoints.model_management import ModelManagement
|
|
359
|
+
from .endpoints.auth import AuthClient
|
|
360
|
+
|
|
349
361
|
|
|
350
362
|
class SyncWrapper:
|
|
351
|
-
"""
|
|
352
|
-
Generic synchronous wrapper with __dir__ support for runtime autocomplete.
|
|
353
|
-
"""
|
|
363
|
+
"""Generic synchronous wrapper with __dir__ support for runtime autocomplete."""
|
|
354
364
|
|
|
355
365
|
def __init__(self, async_obj, sync_client):
|
|
356
366
|
self._async_obj = async_obj
|
|
357
367
|
self._sync_client = sync_client
|
|
358
368
|
|
|
359
369
|
def __dir__(self):
|
|
360
|
-
"""
|
|
361
|
-
Return list of attributes for autocomplete in interactive environments.
|
|
362
|
-
This enables autocomplete in Jupyter/iPython after instantiation.
|
|
363
|
-
"""
|
|
370
|
+
"""Return list of attributes for autocomplete in interactive environments."""
|
|
364
371
|
async_attrs = [attr for attr in dir(self._async_obj) if not attr.startswith('_')]
|
|
365
|
-
|
|
366
372
|
wrapper_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
|
|
367
|
-
|
|
368
373
|
return list(set(async_attrs + wrapper_attrs))
|
|
369
374
|
|
|
370
375
|
def __getattr__(self, name):
|
|
371
|
-
"""
|
|
372
|
-
Dynamically wrap any method call to convert async to sync.
|
|
373
|
-
"""
|
|
376
|
+
"""Dynamically wrap any method call to convert async to sync."""
|
|
374
377
|
attr = getattr(self._async_obj, name)
|
|
375
378
|
|
|
376
379
|
if callable(attr):
|
|
@@ -387,15 +390,32 @@ class SyncWrapper:
|
|
|
387
390
|
|
|
388
391
|
class SyncClient:
|
|
389
392
|
"""
|
|
390
|
-
|
|
391
|
-
|
|
393
|
+
Thread-safe synchronous wrapper for AsyncClient.
|
|
394
|
+
Uses a persistent event loop in a dedicated thread to avoid event loop conflicts.
|
|
392
395
|
"""
|
|
396
|
+
|
|
397
|
+
# Add explicit type annotations for endpoint managers
|
|
398
|
+
datasets: 'DatasetManagement'
|
|
399
|
+
users: 'UserManagement'
|
|
400
|
+
mass_stats: 'MassStats'
|
|
401
|
+
groups: 'GroupManagement'
|
|
402
|
+
space: 'SpaceManagement'
|
|
403
|
+
model: 'ModelManagement'
|
|
404
|
+
auth: 'AuthClient'
|
|
393
405
|
|
|
394
406
|
def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, verbose: bool = False):
|
|
395
407
|
self._async_client = AsyncClient(url=url, api_key=api_key, verbose=verbose)
|
|
396
408
|
self._context_entered = False
|
|
397
409
|
self._closed = False
|
|
398
410
|
|
|
411
|
+
# Thread and event loop management
|
|
412
|
+
self._loop = None
|
|
413
|
+
self._thread = None
|
|
414
|
+
self._loop_ready = None
|
|
415
|
+
self._loop_exception = None
|
|
416
|
+
|
|
417
|
+
# Initialize endpoint managers with proper typing
|
|
418
|
+
|
|
399
419
|
self.datasets = SyncWrapper(self._async_client.datasets, self)
|
|
400
420
|
self.users = SyncWrapper(self._async_client.users, self)
|
|
401
421
|
self.mass_stats = SyncWrapper(self._async_client.mass_stats, self)
|
|
@@ -407,21 +427,150 @@ class SyncClient:
|
|
|
407
427
|
import atexit
|
|
408
428
|
atexit.register(self._cleanup)
|
|
409
429
|
|
|
410
|
-
def
|
|
430
|
+
def _ensure_event_loop(self) -> None:
|
|
431
|
+
"""Ensure we have a persistent event loop in a dedicated thread."""
|
|
432
|
+
if self._loop is None or self._loop.is_closed():
|
|
433
|
+
self._loop_ready = threading.Event()
|
|
434
|
+
self._loop_exception = None
|
|
435
|
+
|
|
436
|
+
def run_loop():
|
|
437
|
+
"""Run the event loop in a dedicated thread."""
|
|
438
|
+
try:
|
|
439
|
+
# Create a new event loop for this thread
|
|
440
|
+
self._loop = asyncio.new_event_loop()
|
|
441
|
+
asyncio.set_event_loop(self._loop)
|
|
442
|
+
|
|
443
|
+
# Signal that the loop is ready
|
|
444
|
+
self._loop_ready.set()
|
|
445
|
+
|
|
446
|
+
# Run the loop forever (until stopped)
|
|
447
|
+
self._loop.run_forever()
|
|
448
|
+
except Exception as e:
|
|
449
|
+
self._loop_exception = e
|
|
450
|
+
self._loop_ready.set()
|
|
451
|
+
finally:
|
|
452
|
+
# Clean up when the loop stops
|
|
453
|
+
if self._loop and not self._loop.is_closed():
|
|
454
|
+
self._loop.close()
|
|
455
|
+
|
|
456
|
+
# Start the thread
|
|
457
|
+
self._thread = threading.Thread(target=run_loop, daemon=True)
|
|
458
|
+
self._thread.start()
|
|
459
|
+
|
|
460
|
+
# Wait for the loop to be ready
|
|
461
|
+
self._loop_ready.wait(timeout=10)
|
|
462
|
+
|
|
463
|
+
if self._loop_exception:
|
|
464
|
+
raise self._loop_exception
|
|
465
|
+
|
|
466
|
+
if not self._loop_ready.is_set():
|
|
467
|
+
raise RuntimeError("Event loop failed to start within timeout")
|
|
468
|
+
|
|
469
|
+
def _run_async(self, coro):
|
|
411
470
|
"""
|
|
412
|
-
|
|
413
|
-
This
|
|
471
|
+
Run async coroutine using persistent event loop.
|
|
472
|
+
This is the core method that makes everything work.
|
|
414
473
|
"""
|
|
415
|
-
|
|
474
|
+
# Ensure we have an event loop
|
|
475
|
+
self._ensure_event_loop()
|
|
416
476
|
|
|
417
|
-
|
|
477
|
+
if self._loop.is_closed():
|
|
478
|
+
raise RuntimeError("Event loop is closed")
|
|
418
479
|
|
|
419
|
-
|
|
480
|
+
# Create a future to get the result back from the event loop thread
|
|
481
|
+
future = concurrent.futures.Future()
|
|
420
482
|
|
|
421
|
-
|
|
483
|
+
async def run_with_context():
|
|
484
|
+
"""Run the coroutine with proper context management."""
|
|
485
|
+
try:
|
|
486
|
+
# Ensure the async client is properly initialized
|
|
487
|
+
await self._ensure_context()
|
|
488
|
+
|
|
489
|
+
# Run the actual coroutine
|
|
490
|
+
result = await coro
|
|
491
|
+
|
|
492
|
+
# Set the result on the future
|
|
493
|
+
future.set_result(result)
|
|
494
|
+
except Exception as e:
|
|
495
|
+
# Set the exception on the future
|
|
496
|
+
future.set_exception(e)
|
|
497
|
+
|
|
498
|
+
# Schedule the coroutine on the persistent event loop
|
|
499
|
+
self._loop.call_soon_threadsafe(
|
|
500
|
+
lambda: asyncio.create_task(run_with_context())
|
|
501
|
+
)
|
|
502
|
+
|
|
422
503
|
|
|
504
|
+
# Wait for the result (with timeout to avoid hanging)
|
|
505
|
+
try:
|
|
506
|
+
return future.result(timeout=300) # 5 minute timeout
|
|
507
|
+
except concurrent.futures.TimeoutError:
|
|
508
|
+
raise RuntimeError("Async operation timed out after 5 minutes")
|
|
509
|
+
|
|
510
|
+
async def _ensure_context(self) -> None:
|
|
511
|
+
"""Ensure the async client context is entered."""
|
|
512
|
+
if not self._context_entered and not self._closed:
|
|
513
|
+
await self._async_client.__aenter__()
|
|
514
|
+
self._context_entered = True
|
|
515
|
+
|
|
516
|
+
async def _exit_context(self) -> None:
|
|
517
|
+
"""Exit the async client context."""
|
|
518
|
+
if self._context_entered and not self._closed:
|
|
519
|
+
await self._async_client.__aexit__(None, None, None)
|
|
520
|
+
self._context_entered = False
|
|
521
|
+
|
|
522
|
+
def close(self) -> None:
|
|
523
|
+
"""Close the underlying async client session and stop the event loop."""
|
|
524
|
+
if not self._closed:
|
|
525
|
+
if self._loop and not self._loop.is_closed():
|
|
526
|
+
# Schedule cleanup on the event loop
|
|
527
|
+
future = concurrent.futures.Future()
|
|
528
|
+
|
|
529
|
+
async def cleanup():
|
|
530
|
+
"""Clean up the async client."""
|
|
531
|
+
try:
|
|
532
|
+
await self._exit_context()
|
|
533
|
+
future.set_result(None)
|
|
534
|
+
except Exception as e:
|
|
535
|
+
future.set_exception(e)
|
|
536
|
+
|
|
537
|
+
# Run cleanup
|
|
538
|
+
self._loop.call_soon_threadsafe(
|
|
539
|
+
lambda: asyncio.create_task(cleanup())
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
# Wait for cleanup to complete
|
|
543
|
+
try:
|
|
544
|
+
future.result(timeout=10)
|
|
545
|
+
except:
|
|
546
|
+
pass # Ignore cleanup errors
|
|
547
|
+
|
|
548
|
+
# Stop the event loop
|
|
549
|
+
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
550
|
+
|
|
551
|
+
# Wait for thread to finish
|
|
552
|
+
if self._thread and self._thread.is_alive():
|
|
553
|
+
self._thread.join(timeout=5)
|
|
554
|
+
|
|
555
|
+
self._closed = True
|
|
556
|
+
|
|
557
|
+
def _cleanup(self) -> None:
|
|
558
|
+
"""Internal cleanup method called by atexit."""
|
|
559
|
+
if not self._closed:
|
|
560
|
+
try:
|
|
561
|
+
self.close()
|
|
562
|
+
except Exception:
|
|
563
|
+
pass # Ignore cleanup errors
|
|
564
|
+
|
|
565
|
+
def __dir__(self):
|
|
566
|
+
"""Return list of attributes for autocomplete in interactive environments."""
|
|
567
|
+
default_attrs = [attr for attr in object.__dir__(self) if not attr.startswith('_')]
|
|
568
|
+
async_client_attrs = [attr for attr in dir(self._async_client) if not attr.startswith('_')]
|
|
569
|
+
endpoint_attrs = ['datasets', 'users', 'mass_stats', 'groups', 'space', 'model', 'auth']
|
|
570
|
+
all_attrs = default_attrs + async_client_attrs + endpoint_attrs
|
|
423
571
|
return list(set(all_attrs))
|
|
424
572
|
|
|
573
|
+
# Your existing methods with proper type annotations
|
|
425
574
|
def geoquery(
|
|
426
575
|
self,
|
|
427
576
|
expr: str,
|
|
@@ -446,15 +595,18 @@ class SyncClient:
|
|
|
446
595
|
return self._run_async(coro)
|
|
447
596
|
|
|
448
597
|
def zonal_stats(
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
598
|
+
self,
|
|
599
|
+
gdf: GeoDataFrame,
|
|
600
|
+
expr: str,
|
|
601
|
+
conc: int = 20,
|
|
602
|
+
in_crs: str = "epsg:4326",
|
|
603
|
+
out_crs: str = "epsg:4326",
|
|
604
|
+
resolution: int = -1,
|
|
605
|
+
geom_fix: bool = False,
|
|
606
|
+
mass_stats: bool = False,
|
|
607
|
+
id_column: Optional[str] = None,
|
|
608
|
+
) -> GeoDataFrame:
|
|
609
|
+
|
|
458
610
|
"""
|
|
459
611
|
Compute zonal statistics for all geometries in a GeoDataFrame (synchronous version).
|
|
460
612
|
|
|
@@ -466,6 +618,8 @@ class SyncClient:
|
|
|
466
618
|
out_crs (str): Output coordinate reference system
|
|
467
619
|
resolution (int): Resolution parameter
|
|
468
620
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
621
|
+
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
622
|
+
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
469
623
|
|
|
470
624
|
Returns:
|
|
471
625
|
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
@@ -482,11 +636,14 @@ class SyncClient:
|
|
|
482
636
|
out_crs=out_crs,
|
|
483
637
|
resolution=resolution,
|
|
484
638
|
geom_fix=geom_fix,
|
|
639
|
+
mass_stats=mass_stats,
|
|
640
|
+
id_column=id_column,
|
|
485
641
|
)
|
|
486
642
|
return self._run_async(coro)
|
|
487
643
|
|
|
488
644
|
def create_dataset_file(
|
|
489
645
|
self,
|
|
646
|
+
name: str,
|
|
490
647
|
aoi: str,
|
|
491
648
|
expression: str,
|
|
492
649
|
output: str,
|
|
@@ -499,6 +656,9 @@ class SyncClient:
|
|
|
499
656
|
non_interactive: bool = True,
|
|
500
657
|
poll_interval: int = 30,
|
|
501
658
|
download_path: str = "/home/user/Downloads",
|
|
659
|
+
mask = True,
|
|
660
|
+
max_file_size_mb: int = 5120, # Default to 5GB
|
|
661
|
+
tile_size: int = 1024,
|
|
502
662
|
) -> dict:
|
|
503
663
|
"""Create a dataset file using mass stats operations (synchronous version)."""
|
|
504
664
|
coro = self._async_client.create_dataset_file(
|
|
@@ -514,14 +674,19 @@ class SyncClient:
|
|
|
514
674
|
non_interactive=non_interactive,
|
|
515
675
|
poll_interval=poll_interval,
|
|
516
676
|
download_path=download_path,
|
|
677
|
+
name=name,
|
|
678
|
+
mask=mask,
|
|
679
|
+
max_file_size_mb=max_file_size_mb,
|
|
680
|
+
tile_size=tile_size
|
|
517
681
|
)
|
|
518
682
|
return self._run_async(coro)
|
|
519
683
|
|
|
684
|
+
|
|
520
685
|
def geo_queries(
|
|
521
686
|
self,
|
|
522
687
|
queries: list[dict],
|
|
523
688
|
conc: int = 20,
|
|
524
|
-
):
|
|
689
|
+
) -> Union[float, GeoDataFrame]:
|
|
525
690
|
"""
|
|
526
691
|
Execute multiple geo queries concurrently (synchronous version).
|
|
527
692
|
|
|
@@ -559,120 +724,17 @@ class SyncClient:
|
|
|
559
724
|
conc=conc,
|
|
560
725
|
)
|
|
561
726
|
return self._run_async(coro)
|
|
562
|
-
|
|
563
|
-
async def _ensure_context(self):
|
|
564
|
-
"""Ensure the async client context is entered."""
|
|
565
|
-
if not self._context_entered and not self._closed:
|
|
566
|
-
await self._async_client.__aenter__()
|
|
567
|
-
self._context_entered = True
|
|
568
|
-
|
|
569
|
-
async def _exit_context(self):
|
|
570
|
-
"""Exit the async client context."""
|
|
571
|
-
if self._context_entered and not self._closed:
|
|
572
|
-
await self._async_client.__aexit__(None, None, None)
|
|
573
|
-
self._context_entered = False
|
|
574
|
-
|
|
575
|
-
def _run_async(self, coro):
|
|
576
|
-
"""
|
|
577
|
-
Run an async coroutine and return the result synchronously.
|
|
578
|
-
This version handles both Jupyter notebook environments and regular Python environments.
|
|
579
|
-
"""
|
|
580
|
-
async def run_with_context():
|
|
581
|
-
await self._ensure_context()
|
|
582
|
-
return await coro
|
|
583
|
-
|
|
584
|
-
try:
|
|
585
|
-
# Check if we're in a running event loop (like Jupyter)
|
|
586
|
-
loop = asyncio.get_running_loop()
|
|
587
|
-
|
|
588
|
-
# Method 1: Try using nest_asyncio if available
|
|
589
|
-
try:
|
|
590
|
-
import nest_asyncio
|
|
591
|
-
nest_asyncio.apply()
|
|
592
|
-
return asyncio.run(run_with_context())
|
|
593
|
-
except ImportError:
|
|
594
|
-
pass
|
|
595
|
-
|
|
596
|
-
# Method 2: Use ThreadPoolExecutor to run in a separate thread
|
|
597
|
-
def run_in_thread():
|
|
598
|
-
return asyncio.run(run_with_context())
|
|
599
|
-
|
|
600
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
601
|
-
future = executor.submit(run_in_thread)
|
|
602
|
-
return future.result()
|
|
603
|
-
|
|
604
|
-
except RuntimeError:
|
|
605
|
-
# No running loop, safe to use asyncio.run()
|
|
606
|
-
return asyncio.run(run_with_context())
|
|
607
727
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
if not self._closed:
|
|
611
|
-
async def close_async():
|
|
612
|
-
await self._exit_context()
|
|
613
|
-
|
|
614
|
-
try:
|
|
615
|
-
loop = asyncio.get_running_loop()
|
|
616
|
-
|
|
617
|
-
# Try nest_asyncio first
|
|
618
|
-
try:
|
|
619
|
-
import nest_asyncio
|
|
620
|
-
nest_asyncio.apply()
|
|
621
|
-
asyncio.run(close_async())
|
|
622
|
-
except ImportError:
|
|
623
|
-
# Fall back to ThreadPoolExecutor
|
|
624
|
-
def run_in_thread():
|
|
625
|
-
return asyncio.run(close_async())
|
|
626
|
-
|
|
627
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
628
|
-
future = executor.submit(run_in_thread)
|
|
629
|
-
future.result()
|
|
630
|
-
|
|
631
|
-
except RuntimeError:
|
|
632
|
-
asyncio.run(close_async())
|
|
633
|
-
|
|
634
|
-
self._closed = True
|
|
635
|
-
|
|
636
|
-
def _cleanup(self):
|
|
637
|
-
"""Internal cleanup method called by atexit."""
|
|
638
|
-
if not self._closed:
|
|
639
|
-
try:
|
|
640
|
-
self.close()
|
|
641
|
-
except Exception:
|
|
642
|
-
pass
|
|
643
|
-
|
|
644
|
-
def __enter__(self):
|
|
728
|
+
# Context manager support
|
|
729
|
+
def __enter__(self) -> 'SyncClient':
|
|
645
730
|
"""Context manager entry."""
|
|
646
|
-
async def enter_async():
|
|
647
|
-
await self._ensure_context()
|
|
648
|
-
|
|
649
|
-
try:
|
|
650
|
-
loop = asyncio.get_running_loop()
|
|
651
|
-
|
|
652
|
-
# Try nest_asyncio first
|
|
653
|
-
try:
|
|
654
|
-
import nest_asyncio
|
|
655
|
-
nest_asyncio.apply()
|
|
656
|
-
asyncio.run(enter_async())
|
|
657
|
-
except ImportError:
|
|
658
|
-
# Fall back to ThreadPoolExecutor
|
|
659
|
-
def run_in_thread():
|
|
660
|
-
return asyncio.run(enter_async())
|
|
661
|
-
|
|
662
|
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
663
|
-
future = executor.submit(run_in_thread)
|
|
664
|
-
future.result()
|
|
665
|
-
|
|
666
|
-
except RuntimeError:
|
|
667
|
-
asyncio.run(enter_async())
|
|
668
|
-
|
|
669
731
|
return self
|
|
670
732
|
|
|
671
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
733
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
672
734
|
"""Context manager exit."""
|
|
673
735
|
self.close()
|
|
674
736
|
|
|
675
|
-
def __del__(self):
|
|
737
|
+
def __del__(self) -> None:
|
|
676
738
|
"""Destructor to ensure session is closed."""
|
|
677
739
|
if not self._closed:
|
|
678
740
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.7
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -25,6 +25,10 @@ Requires-Dist: google-cloud-storage>=2.0.0
|
|
|
25
25
|
Requires-Dist: scipy>=1.7.0
|
|
26
26
|
Requires-Dist: nest_asyncio
|
|
27
27
|
Requires-Dist: onnxruntime>=1.10.0
|
|
28
|
+
Requires-Dist: psutil>=5.0.0
|
|
29
|
+
Requires-Dist: h5netcdf>=1.0.0
|
|
30
|
+
Requires-Dist: netcdf4>=1.5.0
|
|
31
|
+
Requires-Dist: aiofiles>=24.1.0
|
|
28
32
|
Provides-Extra: ml
|
|
29
33
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
30
34
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|