terrakio-core 0.4.6__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/__init__.py +1 -1
- terrakio_core/async_client.py +8 -0
- terrakio_core/convenience_functions/convenience_functions.py +25 -61
- terrakio_core/endpoints/mass_stats.py +171 -39
- terrakio_core/helper/tiles.py +13 -12
- terrakio_core/sync_client.py +9 -0
- {terrakio_core-0.4.6.dist-info → terrakio_core-0.4.7.dist-info}/METADATA +2 -1
- {terrakio_core-0.4.6.dist-info → terrakio_core-0.4.7.dist-info}/RECORD +10 -10
- {terrakio_core-0.4.6.dist-info → terrakio_core-0.4.7.dist-info}/WHEEL +0 -0
- {terrakio_core-0.4.6.dist-info → terrakio_core-0.4.7.dist-info}/top_level.txt +0 -0
terrakio_core/__init__.py
CHANGED
terrakio_core/async_client.py
CHANGED
|
@@ -235,6 +235,7 @@ class AsyncClient(BaseClient):
|
|
|
235
235
|
|
|
236
236
|
async def create_dataset_file(
|
|
237
237
|
self,
|
|
238
|
+
name: str,
|
|
238
239
|
aoi: str,
|
|
239
240
|
expression: str,
|
|
240
241
|
output: str,
|
|
@@ -247,6 +248,9 @@ class AsyncClient(BaseClient):
|
|
|
247
248
|
non_interactive: bool = True,
|
|
248
249
|
poll_interval: int = 30,
|
|
249
250
|
download_path: str = "/home/user/Downloads",
|
|
251
|
+
mask = True,
|
|
252
|
+
max_file_size_mb: int = 5120, # Default to 5GB
|
|
253
|
+
tile_size: int = 1024,
|
|
250
254
|
) -> dict:
|
|
251
255
|
"""
|
|
252
256
|
Create a dataset file using mass stats operations.
|
|
@@ -286,6 +290,10 @@ class AsyncClient(BaseClient):
|
|
|
286
290
|
non_interactive=non_interactive,
|
|
287
291
|
poll_interval=poll_interval,
|
|
288
292
|
download_path=download_path,
|
|
293
|
+
name=name,
|
|
294
|
+
mask=mask,
|
|
295
|
+
max_file_size_mb=max_file_size_mb,
|
|
296
|
+
tile_size=tile_size
|
|
289
297
|
)
|
|
290
298
|
|
|
291
299
|
async def geo_queries(
|
|
@@ -438,15 +438,7 @@ async def handle_mass_stats(
|
|
|
438
438
|
id_column: Optional[str] = None,
|
|
439
439
|
|
|
440
440
|
):
|
|
441
|
-
# we have the handle mass stats function, we need to have the list of quries, and we need to pass the quries to the mass stats function
|
|
442
|
-
# we have the three different variables
|
|
443
|
-
|
|
444
|
-
# Check if id_column is provided
|
|
445
|
-
# if id_column is None:
|
|
446
|
-
# Handle case where no ID column is specified
|
|
447
|
-
# this means that the id column is none, so we could just use the default value of 1 2 3 4
|
|
448
441
|
request_json = gdf_to_json(gdf = gdf, expr = expr, in_crs = in_crs, out_crs = out_crs, resolution = resolution, geom_fix = geom_fix, id_column = id_column)
|
|
449
|
-
# we need to call the execute job function
|
|
450
442
|
job_id =await client.mass_stats.execute_job(
|
|
451
443
|
name = "zonal_stats_job",
|
|
452
444
|
output = "netcdf",
|
|
@@ -455,32 +447,7 @@ async def handle_mass_stats(
|
|
|
455
447
|
overwrite = True,
|
|
456
448
|
)
|
|
457
449
|
return job_id
|
|
458
|
-
# async def test_regular_async_mass_stats(regular_async_client):
|
|
459
|
-
# """Test mass statistics with regular client async"""
|
|
460
|
-
# start_result = await regular_async_client.mass_stats.execute_job(
|
|
461
|
-
# name="test_regular_mass_stats_test",
|
|
462
|
-
# region="aus",
|
|
463
|
-
# output="csv",
|
|
464
|
-
# config={},
|
|
465
|
-
# request_json = "./test_config.json",
|
|
466
|
-
# manifest_json = "./test_manifest.json",
|
|
467
|
-
# overwrite=True,
|
|
468
|
-
# )
|
|
469
|
-
# assert isinstance(start_result, dict)
|
|
470
|
-
# assert 'task_id' in start_result
|
|
471
|
-
|
|
472
|
-
# return
|
|
473
|
-
# else:
|
|
474
|
-
# # Handle case where ID column is specified
|
|
475
|
-
# # Verify the column exists in the GeoDataFrame
|
|
476
|
-
|
|
477
|
-
# if id_column not in gdf.columns:
|
|
478
|
-
# raise ValueError(f"ID column '{id_column}' not found in GeoDataFrame columns: {list(gdf.columns)}")
|
|
479
|
-
# pass
|
|
480
|
-
# the second case is that we have an id_column, we need to use the id_column to create the group name
|
|
481
450
|
|
|
482
|
-
# we have the mass stats as one of the parameters, so that when a user wants a mass
|
|
483
|
-
# for both cases we need to have the list of quries
|
|
484
451
|
async def zonal_stats(
|
|
485
452
|
client,
|
|
486
453
|
gdf: GeoDataFrame,
|
|
@@ -506,7 +473,6 @@ async def zonal_stats(
|
|
|
506
473
|
geom_fix = geom_fix,
|
|
507
474
|
id_column = id_column
|
|
508
475
|
)
|
|
509
|
-
# if we started the mass stats job, we need to return the job id
|
|
510
476
|
return mass_stats_id
|
|
511
477
|
quries = []
|
|
512
478
|
for i in range(len(gdf)):
|
|
@@ -536,30 +502,35 @@ async def create_dataset_file(
|
|
|
536
502
|
aoi: str,
|
|
537
503
|
expression: str,
|
|
538
504
|
output: str,
|
|
505
|
+
download_path: str,
|
|
539
506
|
in_crs: str = "epsg:4326",
|
|
540
|
-
res: float = 0.0001,
|
|
541
|
-
region: str = "aus",
|
|
542
507
|
to_crs: str = "epsg:4326",
|
|
543
|
-
|
|
508
|
+
res: float = 0.0001,
|
|
509
|
+
region: str = None,
|
|
510
|
+
overwrite: bool = False,
|
|
544
511
|
skip_existing: bool = False,
|
|
545
512
|
non_interactive: bool = True,
|
|
513
|
+
name: str | None = None,
|
|
546
514
|
poll_interval: int = 30,
|
|
547
|
-
|
|
515
|
+
max_file_size_mb: int = 5120,
|
|
516
|
+
tile_size: int = 1024,
|
|
517
|
+
mask: bool = True
|
|
548
518
|
) -> dict:
|
|
549
519
|
|
|
550
|
-
|
|
520
|
+
if not name:
|
|
521
|
+
name = f"file-gen-{uuid.uuid4().hex[:8]}"
|
|
551
522
|
|
|
552
523
|
body, reqs, groups = tiles(
|
|
553
524
|
name = name,
|
|
554
525
|
aoi = aoi,
|
|
555
526
|
expression = expression,
|
|
556
527
|
output = output,
|
|
557
|
-
tile_size =
|
|
528
|
+
tile_size = tile_size,
|
|
558
529
|
crs = in_crs,
|
|
559
530
|
res = res,
|
|
560
531
|
region = region,
|
|
561
532
|
to_crs = to_crs,
|
|
562
|
-
|
|
533
|
+
mask = mask,
|
|
563
534
|
overwrite = overwrite,
|
|
564
535
|
skip_existing = skip_existing,
|
|
565
536
|
non_interactive = non_interactive
|
|
@@ -567,9 +538,6 @@ async def create_dataset_file(
|
|
|
567
538
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
|
|
568
539
|
tempreq.write(reqs)
|
|
569
540
|
tempreqname = tempreq.name
|
|
570
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempmanifest:
|
|
571
|
-
tempmanifest.write(groups)
|
|
572
|
-
tempmanifestname = tempmanifest.name
|
|
573
541
|
|
|
574
542
|
task_id = await client.mass_stats.execute_job(
|
|
575
543
|
name=body["name"],
|
|
@@ -579,28 +547,24 @@ async def create_dataset_file(
|
|
|
579
547
|
overwrite=body["overwrite"],
|
|
580
548
|
skip_existing=body["skip_existing"],
|
|
581
549
|
request_json=tempreqname,
|
|
582
|
-
manifest_json=tempmanifestname,
|
|
583
550
|
)
|
|
584
551
|
|
|
585
552
|
start_time = time.time()
|
|
586
553
|
status = None
|
|
587
|
-
|
|
554
|
+
client.logger.info(f"Tracking data generation job {task_id['task_id']}...")
|
|
588
555
|
while True:
|
|
589
556
|
try:
|
|
590
557
|
taskid = task_id['task_id']
|
|
591
558
|
trackinfo = await client.mass_stats.track_job([taskid])
|
|
592
|
-
client.logger.info("the trackinfo is: ", trackinfo)
|
|
593
559
|
status = trackinfo[taskid]['status']
|
|
594
|
-
|
|
595
560
|
if status == 'Completed':
|
|
596
|
-
client.logger.info('
|
|
561
|
+
client.logger.info('Data generated successfully!')
|
|
597
562
|
break
|
|
598
563
|
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
599
564
|
raise RuntimeError(f"Job {taskid} failed with status: {status}")
|
|
600
565
|
else:
|
|
601
566
|
elapsed_time = time.time() - start_time
|
|
602
|
-
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s"
|
|
603
|
-
|
|
567
|
+
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s")
|
|
604
568
|
await asyncio.sleep(poll_interval)
|
|
605
569
|
|
|
606
570
|
|
|
@@ -612,16 +576,15 @@ async def create_dataset_file(
|
|
|
612
576
|
raise
|
|
613
577
|
|
|
614
578
|
os.unlink(tempreqname)
|
|
615
|
-
os.unlink(tempmanifestname)
|
|
616
579
|
|
|
617
|
-
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"])
|
|
580
|
+
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"], max_file_size_mb=max_file_size_mb)
|
|
618
581
|
combine_task_id = combine_result.get("task_id")
|
|
619
582
|
|
|
620
583
|
combine_start_time = time.time()
|
|
584
|
+
client.logger.info(f"Tracking file generation job {combine_task_id}...")
|
|
621
585
|
while True:
|
|
622
586
|
try:
|
|
623
587
|
trackinfo = await client.mass_stats.track_job([combine_task_id])
|
|
624
|
-
client.logger.info('client create dataset file track info:', trackinfo)
|
|
625
588
|
if body["output"] == "netcdf":
|
|
626
589
|
download_file_name = trackinfo[combine_task_id]['folder'] + '.nc'
|
|
627
590
|
elif body["output"] == "geotiff":
|
|
@@ -629,19 +592,19 @@ async def create_dataset_file(
|
|
|
629
592
|
bucket = trackinfo[combine_task_id]['bucket']
|
|
630
593
|
combine_status = trackinfo[combine_task_id]['status']
|
|
631
594
|
if combine_status == 'Completed':
|
|
632
|
-
client.logger.info('
|
|
595
|
+
client.logger.info('File/s generated successfully!')
|
|
633
596
|
break
|
|
634
597
|
elif combine_status in ['Failed', 'Cancelled', 'Error']:
|
|
635
|
-
raise RuntimeError(f"
|
|
598
|
+
raise RuntimeError(f"File generation job {combine_task_id} failed with status: {combine_status}")
|
|
636
599
|
else:
|
|
637
600
|
elapsed_time = time.time() - combine_start_time
|
|
638
|
-
client.logger.info(f"
|
|
601
|
+
client.logger.info(f"File generation job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s")
|
|
639
602
|
time.sleep(poll_interval)
|
|
640
603
|
except KeyboardInterrupt:
|
|
641
|
-
client.logger.info(f"\nInterrupted!
|
|
604
|
+
client.logger.info(f"\nInterrupted! File generation job {combine_task_id} is still running in the background.")
|
|
642
605
|
raise
|
|
643
606
|
except Exception as e:
|
|
644
|
-
client.logger.info(f"\nError tracking
|
|
607
|
+
client.logger.info(f"\nError tracking file generation job: {e}")
|
|
645
608
|
raise
|
|
646
609
|
|
|
647
610
|
if download_path:
|
|
@@ -649,11 +612,12 @@ async def create_dataset_file(
|
|
|
649
612
|
job_name=body["name"],
|
|
650
613
|
bucket=bucket,
|
|
651
614
|
file_type='processed',
|
|
652
|
-
|
|
615
|
+
folder='file-gen',
|
|
616
|
+
page_size=100,
|
|
653
617
|
output_path=download_path,
|
|
654
618
|
)
|
|
655
619
|
else:
|
|
656
620
|
path = f"{body['name']}/outputs/merged/{download_file_name}"
|
|
657
|
-
client.logger.info(f"
|
|
621
|
+
client.logger.info(f"Dataset file/s is available at {path}")
|
|
658
622
|
|
|
659
623
|
return {"generation_task_id": task_id, "combine_task_id": combine_task_id}
|
|
@@ -7,6 +7,13 @@ from urllib.parse import urlparse
|
|
|
7
7
|
from ..helper.decorators import require_token, require_api_key, require_auth
|
|
8
8
|
import aiohttp
|
|
9
9
|
from typing import Dict, Any, Optional, List, Union
|
|
10
|
+
import asyncio
|
|
11
|
+
import xarray as xr
|
|
12
|
+
from io import BytesIO
|
|
13
|
+
import geopandas as gpd
|
|
14
|
+
from shapely.geometry import shape
|
|
15
|
+
from ..convenience_functions.convenience_functions import expand_on_variables_and_time
|
|
16
|
+
|
|
10
17
|
class MassStats:
|
|
11
18
|
def __init__(self, client):
|
|
12
19
|
self._client = client
|
|
@@ -19,6 +26,7 @@ class MassStats:
|
|
|
19
26
|
sample: str,
|
|
20
27
|
output: str,
|
|
21
28
|
config: Dict[str, Any],
|
|
29
|
+
region: str = None,
|
|
22
30
|
overwrite: bool = False,
|
|
23
31
|
skip_existing: bool = False,
|
|
24
32
|
location: Optional[str] = None,
|
|
@@ -55,7 +63,8 @@ class MassStats:
|
|
|
55
63
|
"config": config,
|
|
56
64
|
"overwrite": overwrite,
|
|
57
65
|
"skip_existing": skip_existing,
|
|
58
|
-
"server": server
|
|
66
|
+
"server": server,
|
|
67
|
+
"region": region
|
|
59
68
|
}
|
|
60
69
|
payload_mapping = {
|
|
61
70
|
"location": location,
|
|
@@ -66,7 +75,6 @@ class MassStats:
|
|
|
66
75
|
payload[key] = str(value).lower()
|
|
67
76
|
return await self._client._terrakio_request("POST", "mass_stats/upload", json=payload)
|
|
68
77
|
|
|
69
|
-
|
|
70
78
|
@require_api_key
|
|
71
79
|
async def start_job(self, id: str) -> Dict[str, Any]:
|
|
72
80
|
"""
|
|
@@ -276,6 +284,7 @@ class MassStats:
|
|
|
276
284
|
bucket: str,
|
|
277
285
|
file_type: str,
|
|
278
286
|
output_path: str,
|
|
287
|
+
folder: str = None,
|
|
279
288
|
page_size: int = None,
|
|
280
289
|
) -> list:
|
|
281
290
|
"""
|
|
@@ -303,7 +312,8 @@ class MassStats:
|
|
|
303
312
|
request_body = {
|
|
304
313
|
"job_name": job_name,
|
|
305
314
|
"bucket": bucket,
|
|
306
|
-
"file_type": file_type
|
|
315
|
+
"file_type": file_type,
|
|
316
|
+
"folder": folder
|
|
307
317
|
}
|
|
308
318
|
|
|
309
319
|
output_dir = Path(output_path)
|
|
@@ -311,8 +321,7 @@ class MassStats:
|
|
|
311
321
|
output_files = []
|
|
312
322
|
|
|
313
323
|
async def download_urls_batch(download_urls, session):
|
|
314
|
-
for url in download_urls:
|
|
315
|
-
self._client.logger.info(f"Processing download URL: {url}")
|
|
324
|
+
for i, url in enumerate(download_urls):
|
|
316
325
|
parsed = urlparse(url)
|
|
317
326
|
path_parts = Path(parsed.path).parts
|
|
318
327
|
try:
|
|
@@ -322,13 +331,13 @@ class MassStats:
|
|
|
322
331
|
subpath = Path(path_parts[-1])
|
|
323
332
|
file_save_path = output_dir / subpath
|
|
324
333
|
file_save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
325
|
-
self._client.logger.info(f"Downloading file to {file_save_path}")
|
|
334
|
+
self._client.logger.info(f"Downloading file to {file_save_path} ({i+1}/{len(download_urls)})")
|
|
326
335
|
|
|
327
336
|
async with session.get(url) as resp:
|
|
328
337
|
resp.raise_for_status()
|
|
329
338
|
import aiofiles
|
|
330
339
|
async with aiofiles.open(file_save_path, 'wb') as file:
|
|
331
|
-
async for chunk in resp.content.iter_chunked(1048576):
|
|
340
|
+
async for chunk in resp.content.iter_chunked(1048576): # 1 MB
|
|
332
341
|
if chunk:
|
|
333
342
|
await file.write(chunk)
|
|
334
343
|
|
|
@@ -352,7 +361,6 @@ class MassStats:
|
|
|
352
361
|
response = await self._client._terrakio_request("POST", "mass_stats/download_files", json=request_body, params=params)
|
|
353
362
|
data = response
|
|
354
363
|
|
|
355
|
-
self._client.logger.info(f'processed, endpoint response is {data}')
|
|
356
364
|
download_urls = data.get('download_urls')
|
|
357
365
|
if not download_urls:
|
|
358
366
|
break
|
|
@@ -363,7 +371,7 @@ class MassStats:
|
|
|
363
371
|
if total_files is not None and downloaded_files >= total_files:
|
|
364
372
|
break
|
|
365
373
|
if len(download_urls) < page_size:
|
|
366
|
-
break
|
|
374
|
+
break # Last page
|
|
367
375
|
page += 1
|
|
368
376
|
return output_files
|
|
369
377
|
except Exception as e:
|
|
@@ -392,13 +400,13 @@ class MassStats:
|
|
|
392
400
|
if i == 3:
|
|
393
401
|
break
|
|
394
402
|
|
|
395
|
-
@require_api_key
|
|
396
403
|
async def execute_job(
|
|
397
404
|
self,
|
|
398
405
|
name: str,
|
|
399
406
|
output: str,
|
|
400
407
|
config: Dict[str, Any],
|
|
401
|
-
request_json:
|
|
408
|
+
request_json: str, # Path to request JSON file
|
|
409
|
+
region: str = None,
|
|
402
410
|
overwrite: bool = False,
|
|
403
411
|
skip_existing: bool = False,
|
|
404
412
|
location: str = None,
|
|
@@ -425,6 +433,7 @@ class MassStats:
|
|
|
425
433
|
Raises:
|
|
426
434
|
APIError: If the API request fails
|
|
427
435
|
"""
|
|
436
|
+
|
|
428
437
|
def extract_manifest_from_request(request_data: List[Dict[str, Any]]) -> List[str]:
|
|
429
438
|
"""Extract unique group names from request data to create manifest list."""
|
|
430
439
|
groups = []
|
|
@@ -444,35 +453,34 @@ class MassStats:
|
|
|
444
453
|
|
|
445
454
|
return groups
|
|
446
455
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
request_json_path = request_json
|
|
460
|
-
else:
|
|
461
|
-
request_data = request_json
|
|
462
|
-
size = len(request_data)
|
|
463
|
-
request_json_path = None
|
|
456
|
+
# Load and validate request JSON
|
|
457
|
+
try:
|
|
458
|
+
with open(request_json, 'r') as file:
|
|
459
|
+
request_data = json.load(file)
|
|
460
|
+
if isinstance(request_data, list):
|
|
461
|
+
size = len(request_data)
|
|
462
|
+
else:
|
|
463
|
+
raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
|
|
464
|
+
except FileNotFoundError as e:
|
|
465
|
+
return e
|
|
466
|
+
except json.JSONDecodeError as e:
|
|
467
|
+
return e
|
|
464
468
|
|
|
469
|
+
# Generate manifest from request data (kept in memory)
|
|
465
470
|
try:
|
|
466
471
|
manifest_groups = extract_manifest_from_request(request_data)
|
|
467
472
|
except Exception as e:
|
|
468
473
|
raise ValueError(f"Error extracting manifest from request JSON: {e}")
|
|
469
474
|
|
|
470
|
-
|
|
475
|
+
# Extract the first expression
|
|
476
|
+
first_request = request_data[0] # Changed from data[0] to request_data[0]
|
|
471
477
|
first_expression = first_request["request"]["expr"]
|
|
472
478
|
|
|
479
|
+
# Get upload URLs
|
|
473
480
|
upload_result = await self._upload_request(
|
|
474
481
|
name=name,
|
|
475
482
|
size=size,
|
|
483
|
+
region=region,
|
|
476
484
|
sample = first_expression,
|
|
477
485
|
output=output,
|
|
478
486
|
config=config,
|
|
@@ -488,21 +496,21 @@ class MassStats:
|
|
|
488
496
|
|
|
489
497
|
if not requests_url:
|
|
490
498
|
raise ValueError("No requests_url returned from server for request JSON upload")
|
|
499
|
+
|
|
500
|
+
# Upload request JSON file
|
|
491
501
|
try:
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
requests_response = await self._upload_file(request_json_path, requests_url, use_gzip=True)
|
|
495
|
-
else:
|
|
496
|
-
requests_response = await self._upload_json_data(request_data, requests_url, use_gzip=True)
|
|
502
|
+
self.validate_request(request_json)
|
|
503
|
+
requests_response = await self._upload_file(request_json, requests_url, use_gzip=True)
|
|
497
504
|
if requests_response.status not in [200, 201, 204]:
|
|
498
505
|
self._client.logger.error(f"Requests upload error: {requests_response.text()}")
|
|
499
|
-
raise Exception(f"Failed to upload request
|
|
506
|
+
raise Exception(f"Failed to upload request JSON: {requests_response.text()}")
|
|
500
507
|
except Exception as e:
|
|
501
508
|
raise Exception(f"Error uploading request JSON file {request_json}: {e}")
|
|
502
|
-
|
|
509
|
+
|
|
503
510
|
if not manifest_url:
|
|
504
511
|
raise ValueError("No manifest_url returned from server for manifest JSON upload")
|
|
505
512
|
|
|
513
|
+
# Upload manifest JSON data directly (no temporary file needed)
|
|
506
514
|
try:
|
|
507
515
|
manifest_response = await self._upload_json_data(manifest_groups, manifest_url, use_gzip=False)
|
|
508
516
|
if manifest_response.status not in [200, 201, 204]:
|
|
@@ -511,6 +519,7 @@ class MassStats:
|
|
|
511
519
|
except Exception as e:
|
|
512
520
|
raise Exception(f"Error uploading manifest JSON: {e}")
|
|
513
521
|
|
|
522
|
+
# Start the job
|
|
514
523
|
start_job_task_id = await self.start_job(upload_result.get("id"))
|
|
515
524
|
return start_job_task_id
|
|
516
525
|
|
|
@@ -625,7 +634,7 @@ class MassStats:
|
|
|
625
634
|
return self._client._terrakio_request("POST", "pyramids/create", json=payload)
|
|
626
635
|
|
|
627
636
|
@require_api_key
|
|
628
|
-
async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf") -> Dict[str, Any]:
|
|
637
|
+
async def combine_tiles(self, data_name: str, overwrite: bool = True, output: str = "netcdf", max_file_size_mb = 5120) -> Dict[str, Any]:
|
|
629
638
|
"""
|
|
630
639
|
Combine tiles for a dataset.
|
|
631
640
|
|
|
@@ -642,7 +651,130 @@ class MassStats:
|
|
|
642
651
|
"""
|
|
643
652
|
payload = {
|
|
644
653
|
'data_name': data_name,
|
|
654
|
+
'folder': "file-gen",
|
|
645
655
|
'output': output,
|
|
646
|
-
'overwrite': str(overwrite).lower()
|
|
656
|
+
'overwrite': str(overwrite).lower(),
|
|
657
|
+
'max_file_size_mb': max_file_size_mb
|
|
647
658
|
}
|
|
648
|
-
return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
|
|
659
|
+
return await self._client._terrakio_request("POST", "mass_stats/combine_tiles", json=payload)
|
|
660
|
+
|
|
661
|
+
@require_api_key
|
|
662
|
+
async def load_zonal_stats(self, job_id: str, max_files: int = 5, poll_interval: int = 30):
|
|
663
|
+
"""
|
|
664
|
+
Load zonal stats results from a completed mass stats job.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
job_id: The job ID returned from the mass stats execution
|
|
668
|
+
max_files: Maximum number of files to download (default: 5)
|
|
669
|
+
poll_interval: Seconds to wait between status checks (default: 30)
|
|
670
|
+
|
|
671
|
+
Returns:
|
|
672
|
+
GeoDataFrame with geometry and dataset columns, or None if failed
|
|
673
|
+
"""
|
|
674
|
+
try:
|
|
675
|
+
while True:
|
|
676
|
+
try:
|
|
677
|
+
track_info = await self.track_job([job_id])
|
|
678
|
+
job_info = track_info[job_id]
|
|
679
|
+
status = job_info['status']
|
|
680
|
+
|
|
681
|
+
self._client.logger.info(f"Job {job_id} status: {status}")
|
|
682
|
+
|
|
683
|
+
if status == 'Completed':
|
|
684
|
+
self._client.logger.info('Job completed successfully!')
|
|
685
|
+
break
|
|
686
|
+
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
687
|
+
raise RuntimeError(f"Job {job_id} failed with status: {status}")
|
|
688
|
+
|
|
689
|
+
await asyncio.sleep(poll_interval)
|
|
690
|
+
|
|
691
|
+
except KeyboardInterrupt:
|
|
692
|
+
self._client.logger.info(f"\nInterrupted! Job {job_id} is still running.")
|
|
693
|
+
raise
|
|
694
|
+
|
|
695
|
+
async with aiohttp.ClientSession() as session:
|
|
696
|
+
payload = {
|
|
697
|
+
"job_name": job_info['name'],
|
|
698
|
+
"file_type": "raw",
|
|
699
|
+
"bucket": job_info['bucket']
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
result = await self._client._terrakio_request("POST", "mass_stats/download_files", json=payload)
|
|
703
|
+
download_urls = result['download_urls'][:max_files]
|
|
704
|
+
|
|
705
|
+
self._client.logger.info(f"Downloading {len(download_urls)} dataset files...")
|
|
706
|
+
|
|
707
|
+
datasets = []
|
|
708
|
+
for i, url in enumerate(download_urls):
|
|
709
|
+
try:
|
|
710
|
+
self._client.logger.info(f"Downloading dataset {i+1}/{len(download_urls)}...")
|
|
711
|
+
async with session.get(url) as response:
|
|
712
|
+
if response.status == 200:
|
|
713
|
+
content = await response.read()
|
|
714
|
+
dataset = xr.open_dataset(BytesIO(content))
|
|
715
|
+
datasets.append(dataset)
|
|
716
|
+
self._client.logger.info(f"Successfully processed dataset {i+1}")
|
|
717
|
+
else:
|
|
718
|
+
self._client.logger.warning(f"Failed to download dataset {i+1}: HTTP {response.status}")
|
|
719
|
+
except Exception as e:
|
|
720
|
+
self._client.logger.error(f"Error downloading dataset {i+1}: {e}")
|
|
721
|
+
continue
|
|
722
|
+
|
|
723
|
+
if not datasets:
|
|
724
|
+
self._client.logger.warning("No datasets were successfully downloaded")
|
|
725
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
726
|
+
|
|
727
|
+
try:
|
|
728
|
+
json_response = await self._client._terrakio_request(
|
|
729
|
+
"POST", "mass_stats/download_json",
|
|
730
|
+
params={"job_name": job_info['name']}
|
|
731
|
+
)
|
|
732
|
+
json_url = json_response["download_url"]
|
|
733
|
+
|
|
734
|
+
async with session.get(json_url) as response:
|
|
735
|
+
if response.status == 200:
|
|
736
|
+
json_data = await response.json()
|
|
737
|
+
self._client.logger.info("Successfully downloaded geometry data")
|
|
738
|
+
|
|
739
|
+
geometries = []
|
|
740
|
+
max_geometries = min(max_files, len(json_data), len(datasets))
|
|
741
|
+
|
|
742
|
+
for i in range(max_geometries):
|
|
743
|
+
try:
|
|
744
|
+
geom_dict = json_data[i]["request"]["feature"]["geometry"]
|
|
745
|
+
shapely_geom = shape(geom_dict)
|
|
746
|
+
geometries.append(shapely_geom)
|
|
747
|
+
except (KeyError, ValueError) as e:
|
|
748
|
+
self._client.logger.warning(f"Error parsing geometry {i}: {e}")
|
|
749
|
+
continue
|
|
750
|
+
|
|
751
|
+
min_length = min(len(datasets), len(geometries))
|
|
752
|
+
if min_length == 0:
|
|
753
|
+
self._client.logger.warning("No matching datasets and geometries found")
|
|
754
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
755
|
+
|
|
756
|
+
gdf = gpd.GeoDataFrame({
|
|
757
|
+
'geometry': geometries[:min_length],
|
|
758
|
+
'dataset': datasets[:min_length]
|
|
759
|
+
})
|
|
760
|
+
|
|
761
|
+
self._client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
|
|
762
|
+
|
|
763
|
+
try:
|
|
764
|
+
expanded_gdf = expand_on_variables_and_time(gdf)
|
|
765
|
+
return expanded_gdf
|
|
766
|
+
except NameError:
|
|
767
|
+
self._client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
|
|
768
|
+
return gdf
|
|
769
|
+
|
|
770
|
+
else:
|
|
771
|
+
self._client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
|
|
772
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
773
|
+
|
|
774
|
+
except Exception as e:
|
|
775
|
+
self._client.logger.error(f"Error downloading geometry data: {e}")
|
|
776
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
777
|
+
|
|
778
|
+
except Exception as e:
|
|
779
|
+
self._client.logger.error(f"Failed to load zonal stats for job {job_id}: {e}")
|
|
780
|
+
return None
|
terrakio_core/helper/tiles.py
CHANGED
|
@@ -16,20 +16,21 @@ def get_bounds(aoi, crs, to_crs = None):
|
|
|
16
16
|
bounds = aoi.geometry[0].bounds
|
|
17
17
|
return *bounds, aoi
|
|
18
18
|
|
|
19
|
-
def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output,
|
|
20
|
-
i_max = int((x_max-x_min)/(tile_size*res))
|
|
21
|
-
j_max = int((y_max-y_min)/(tile_size*res))
|
|
22
|
-
if fully_cover:
|
|
23
|
-
i_max += 1
|
|
24
|
-
j_max += 1
|
|
19
|
+
def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output, mask):
|
|
20
|
+
i_max = int((x_max-x_min)/(tile_size*res)) + 1
|
|
21
|
+
j_max = int((y_max-y_min)/(tile_size*res)) + 1
|
|
25
22
|
for j in range(0, int(j_max)):
|
|
26
23
|
for i in range(0, int(i_max)):
|
|
27
24
|
x = x_min + i*(tile_size*res)
|
|
28
25
|
y = y_max - j*(tile_size*res)
|
|
29
|
-
|
|
30
|
-
if not aoi.geometry[0].intersects(
|
|
26
|
+
geom = shapely.geometry.box(x, y-(tile_size*res), x + (tile_size*res), y)
|
|
27
|
+
if not aoi.geometry[0].intersects(geom):
|
|
31
28
|
continue
|
|
32
|
-
|
|
29
|
+
if mask:
|
|
30
|
+
geom = geom.intersection(aoi.geometry[0])
|
|
31
|
+
if geom.is_empty:
|
|
32
|
+
continue
|
|
33
|
+
feat = {"type": "Feature", "geometry": geom.__geo_interface__}
|
|
33
34
|
data = {
|
|
34
35
|
"feature": feat,
|
|
35
36
|
"in_crs": crs,
|
|
@@ -46,15 +47,15 @@ def tiles(
|
|
|
46
47
|
aoi : str,
|
|
47
48
|
expression: str = "red=S2v2#(year,median).red@(year =2024) \n red",
|
|
48
49
|
output: str = "netcdf",
|
|
49
|
-
tile_size : float =
|
|
50
|
+
tile_size : float = 1024,
|
|
50
51
|
crs : str = "epsg:3577",
|
|
51
52
|
res: float = 10,
|
|
52
53
|
region : str = "eu",
|
|
53
54
|
to_crs: str = None,
|
|
54
|
-
fully_cover: bool = True,
|
|
55
55
|
overwrite: bool = False,
|
|
56
56
|
skip_existing: bool = False,
|
|
57
57
|
non_interactive: bool = False,
|
|
58
|
+
mask: bool = True,
|
|
58
59
|
):
|
|
59
60
|
|
|
60
61
|
reqs = []
|
|
@@ -62,7 +63,7 @@ def tiles(
|
|
|
62
63
|
|
|
63
64
|
if to_crs is None:
|
|
64
65
|
to_crs = crs
|
|
65
|
-
for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output,
|
|
66
|
+
for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output, mask):
|
|
66
67
|
req_name = f"{name}_{i:02d}_{j:02d}"
|
|
67
68
|
reqs.append({"group": "tiles", "file": req_name, "request": tile_req})
|
|
68
69
|
|
terrakio_core/sync_client.py
CHANGED
|
@@ -643,6 +643,7 @@ class SyncClient:
|
|
|
643
643
|
|
|
644
644
|
def create_dataset_file(
|
|
645
645
|
self,
|
|
646
|
+
name: str,
|
|
646
647
|
aoi: str,
|
|
647
648
|
expression: str,
|
|
648
649
|
output: str,
|
|
@@ -655,6 +656,9 @@ class SyncClient:
|
|
|
655
656
|
non_interactive: bool = True,
|
|
656
657
|
poll_interval: int = 30,
|
|
657
658
|
download_path: str = "/home/user/Downloads",
|
|
659
|
+
mask = True,
|
|
660
|
+
max_file_size_mb: int = 5120, # Default to 5GB
|
|
661
|
+
tile_size: int = 1024,
|
|
658
662
|
) -> dict:
|
|
659
663
|
"""Create a dataset file using mass stats operations (synchronous version)."""
|
|
660
664
|
coro = self._async_client.create_dataset_file(
|
|
@@ -670,9 +674,14 @@ class SyncClient:
|
|
|
670
674
|
non_interactive=non_interactive,
|
|
671
675
|
poll_interval=poll_interval,
|
|
672
676
|
download_path=download_path,
|
|
677
|
+
name=name,
|
|
678
|
+
mask=mask,
|
|
679
|
+
max_file_size_mb=max_file_size_mb,
|
|
680
|
+
tile_size=tile_size
|
|
673
681
|
)
|
|
674
682
|
return self._run_async(coro)
|
|
675
683
|
|
|
684
|
+
|
|
676
685
|
def geo_queries(
|
|
677
686
|
self,
|
|
678
687
|
queries: list[dict],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.7
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -28,6 +28,7 @@ Requires-Dist: onnxruntime>=1.10.0
|
|
|
28
28
|
Requires-Dist: psutil>=5.0.0
|
|
29
29
|
Requires-Dist: h5netcdf>=1.0.0
|
|
30
30
|
Requires-Dist: netcdf4>=1.5.0
|
|
31
|
+
Requires-Dist: aiofiles>=24.1.0
|
|
31
32
|
Provides-Extra: ml
|
|
32
33
|
Requires-Dist: torch>=2.7.1; extra == "ml"
|
|
33
34
|
Requires-Dist: scikit-learn>=1.7.0; extra == "ml"
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
terrakio_core/__init__.py,sha256=
|
|
1
|
+
terrakio_core/__init__.py,sha256=oBlKpY5_oKnNAR3VXehRw4a0mmg7W0SvMGh2QEwqO0A,273
|
|
2
2
|
terrakio_core/accessors.py,sha256=qWLljU83YO7EUOefo_f6_P6ba6uiYMXwou0ihAHKBHQ,23706
|
|
3
|
-
terrakio_core/async_client.py,sha256=
|
|
3
|
+
terrakio_core/async_client.py,sha256=FNl1K3g6UoB1aKPAG6gFc-25ukNLCmyI-m-UpLawzFI,14656
|
|
4
4
|
terrakio_core/client.py,sha256=VXP7BtJWIfpPPZR7_yNdSTcGwNgTwhb7KorusqkQrzk,5603
|
|
5
5
|
terrakio_core/config.py,sha256=r8NARVYOca4AuM88VP_j-8wQxOk1s7VcRdyEdseBlLE,4193
|
|
6
6
|
terrakio_core/exceptions.py,sha256=4qnpOM1gOxsNIXDXY4qwY1d3I4Myhp7HBh7b2D0SVrU,529
|
|
7
|
-
terrakio_core/sync_client.py,sha256=
|
|
8
|
-
terrakio_core/convenience_functions/convenience_functions.py,sha256=
|
|
7
|
+
terrakio_core/sync_client.py,sha256=ATM-2aeLj4-V2x92Fc_Fxsr4UiI9iT8G0PqrDXh_8XU,27759
|
|
8
|
+
terrakio_core/convenience_functions/convenience_functions.py,sha256=tbzG7j6o0HojAEJqAxg2KPqCpSASJlLnayFQ_Cxfips,21700
|
|
9
9
|
terrakio_core/endpoints/auth.py,sha256=FdLsPScPIBo-Gxl6ZnE-46cp2molggAJtL72LssN3fg,6049
|
|
10
10
|
terrakio_core/endpoints/dataset_management.py,sha256=BUm8IIlW_Q45vDiQp16CiJGeSLheI8uWRVRQtMdhaNk,13161
|
|
11
11
|
terrakio_core/endpoints/group_management.py,sha256=VFl3jakjQa9OPi351D3DZvLU9M7fHdfjCzGhmyJsx3U,6309
|
|
12
|
-
terrakio_core/endpoints/mass_stats.py,sha256=
|
|
12
|
+
terrakio_core/endpoints/mass_stats.py,sha256=Gz70eyRrmQykrr7zFC5Pd5RTNvVpHgyTTF3rr2lNuFE,30038
|
|
13
13
|
terrakio_core/endpoints/model_management.py,sha256=LH_gHPrqYA-_45KWpDBRcFbwHgm-Kg0zk1ealy7P_C0,52379
|
|
14
14
|
terrakio_core/endpoints/space_management.py,sha256=YWb55nkJnFJGlALJ520DvurxDqVqwYtsvqQPWzxzhDs,2266
|
|
15
15
|
terrakio_core/endpoints/user_management.py,sha256=WlFr3EfK8iI6DfkpMuYLHZUPk2n7_DHHO6z1hndmZB4,3816
|
|
16
16
|
terrakio_core/helper/bounded_taskgroup.py,sha256=wiTH10jhKZgrsgrFUNG6gig8bFkUEPHkGRT2XY7Rgmo,677
|
|
17
17
|
terrakio_core/helper/decorators.py,sha256=L6om7wmWNgCei3Wy5U0aZ-70OzsCwclkjIf7SfQuhCg,2289
|
|
18
|
-
terrakio_core/helper/tiles.py,sha256=
|
|
19
|
-
terrakio_core-0.4.
|
|
20
|
-
terrakio_core-0.4.
|
|
21
|
-
terrakio_core-0.4.
|
|
22
|
-
terrakio_core-0.4.
|
|
18
|
+
terrakio_core/helper/tiles.py,sha256=lcLCO6KiP05lCI9vngo3zCZJ6Z9C0pUxHSQS4H58EHc,2699
|
|
19
|
+
terrakio_core-0.4.7.dist-info/METADATA,sha256=t59FmeOR6RkUZ9XufM4qHDwaGREQjl2__VOAXFDAV_Y,1913
|
|
20
|
+
terrakio_core-0.4.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
+
terrakio_core-0.4.7.dist-info/top_level.txt,sha256=5cBj6O7rNWyn97ND4YuvvXm0Crv4RxttT4JZvNdOG6Q,14
|
|
22
|
+
terrakio_core-0.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|