terrakio-core 0.4.8__py3-none-any.whl → 0.4.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/__init__.py +1 -1
- terrakio_core/accessors.py +800 -328
- terrakio_core/async_client.py +6 -2
- terrakio_core/convenience_functions/create_dataset_file.py +132 -0
- terrakio_core/convenience_functions/geoquries.py +102 -0
- terrakio_core/convenience_functions/{convenience_functions.py → zonal_stats.py} +166 -263
- terrakio_core/endpoints/mass_stats.py +42 -147
- terrakio_core/sync_client.py +0 -340
- terrakio_core-0.4.93.dist-info/METADATA +31 -0
- {terrakio_core-0.4.8.dist-info → terrakio_core-0.4.93.dist-info}/RECORD +11 -10
- {terrakio_core-0.4.8.dist-info → terrakio_core-0.4.93.dist-info}/WHEEL +1 -2
- terrakio_core-0.4.8.dist-info/METADATA +0 -47
- terrakio_core-0.4.8.dist-info/top_level.txt +0 -1
|
@@ -1,30 +1,143 @@
|
|
|
1
|
-
|
|
2
|
-
import os
|
|
1
|
+
# Standard library imports
|
|
3
2
|
import asyncio
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import geopandas as gpd
|
|
8
|
-
from geopandas import GeoDataFrame
|
|
9
|
-
from shapely.geometry import mapping
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from ..exceptions import APIError, ConfigurationError
|
|
12
|
-
from ..helper.bounded_taskgroup import BoundedTaskGroup
|
|
13
|
-
from ..helper.tiles import tiles
|
|
3
|
+
import psutil
|
|
4
|
+
import random
|
|
14
5
|
import uuid
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
# Third-party library imports
|
|
10
|
+
import aiohttp
|
|
11
|
+
import geopandas as gpd
|
|
12
|
+
import nest_asyncio
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import pyproj
|
|
15
15
|
import xarray as xr
|
|
16
|
-
import
|
|
17
|
-
import
|
|
18
|
-
import copy
|
|
19
|
-
from shapely.geometry import shape
|
|
16
|
+
from geopandas import GeoDataFrame
|
|
17
|
+
from shapely.geometry import box, mapping, shape
|
|
20
18
|
from shapely.ops import transform
|
|
21
|
-
from shapely.geometry import box
|
|
22
|
-
import pyproj
|
|
23
19
|
|
|
24
|
-
|
|
25
|
-
|
|
20
|
+
# Local imports
|
|
21
|
+
from .geoquries import request_geoquery_list
|
|
22
|
+
|
|
23
|
+
nest_asyncio.apply()
|
|
24
|
+
class cloud_object(gpd.GeoDataFrame):
|
|
25
|
+
"""
|
|
26
|
+
This class is a class used for cloud
|
|
27
|
+
"""
|
|
28
|
+
def __init__(self, job_id: str, job_name: str, client=None):
|
|
29
|
+
|
|
30
|
+
super().__init__({
|
|
31
|
+
'geometry': [],
|
|
32
|
+
'dataset': []
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
self.job_id = job_id
|
|
36
|
+
self.client = client
|
|
37
|
+
self.job_name = job_name
|
|
38
|
+
|
|
39
|
+
def head(self, n = 5):
|
|
40
|
+
"""
|
|
41
|
+
Returns the first n files stored in the cloud bucket.
|
|
42
|
+
"""
|
|
43
|
+
return asyncio.run(self._head_async(n))
|
|
44
|
+
|
|
45
|
+
async def _head_async(self, n = 5):
|
|
46
|
+
"""
|
|
47
|
+
Returns the first n files stored in the cloud bucket.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
n (int): Number of files to return. Default is 5.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
GeoDataFrame: A GeoDataFrame containing the first n files.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
track_info = await self.client.mass_stats.track_job([self.job_id])
|
|
57
|
+
job_info = track_info[self.job_id]
|
|
58
|
+
status = job_info['status']
|
|
59
|
+
if status == "Completed":
|
|
60
|
+
payload = {
|
|
61
|
+
"job_name": job_info["name"],
|
|
62
|
+
"file_type": "raw",
|
|
63
|
+
"bucket": job_info["bucket"],
|
|
64
|
+
}
|
|
65
|
+
result = await self.client._terrakio_request("POST", "mass_stats/download_files", json=payload)
|
|
66
|
+
download_urls = result["download_urls"][:n]
|
|
67
|
+
datasets = []
|
|
68
|
+
|
|
69
|
+
async with aiohttp.ClientSession() as session:
|
|
70
|
+
for i, url in enumerate(download_urls):
|
|
71
|
+
try:
|
|
72
|
+
self.client.logger.info(f"Downloading dataset {i+1}/{len(download_urls)}...")
|
|
73
|
+
async with session.get(url) as response:
|
|
74
|
+
if response.status == 200:
|
|
75
|
+
content = await response.read()
|
|
76
|
+
dataset = xr.open_dataset(BytesIO(content))
|
|
77
|
+
datasets.append(dataset)
|
|
78
|
+
self.client.logger.info(f"Successfully processed dataset {i+1}")
|
|
79
|
+
else:
|
|
80
|
+
self.client.logger.warning(f"Failed to download dataset {i+1}: HTTP {response.status}")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
self.client.logger.error(f"Error downloading dataset {i+1}: {e}")
|
|
83
|
+
continue
|
|
84
|
+
if not datasets:
|
|
85
|
+
self.client.logger.warning("No datasets were successfully downloaded")
|
|
86
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
87
|
+
try:
|
|
88
|
+
json_response = await self.client._terrakio_request(
|
|
89
|
+
"POST", "mass_stats/download_json",
|
|
90
|
+
params={"job_name": job_info['name']}
|
|
91
|
+
)
|
|
92
|
+
json_url = json_response["download_url"]
|
|
93
|
+
|
|
94
|
+
async with session.get(json_url) as response:
|
|
95
|
+
if response.status == 200:
|
|
96
|
+
json_data = await response.json()
|
|
97
|
+
self.client.logger.info("Successfully downloaded geometry data")
|
|
98
|
+
|
|
99
|
+
geometries = []
|
|
100
|
+
max_geometries = min(n, len(json_data), len(datasets))
|
|
101
|
+
|
|
102
|
+
for i in range(max_geometries):
|
|
103
|
+
try:
|
|
104
|
+
geom_dict = json_data[i]["request"]["feature"]["geometry"]
|
|
105
|
+
shapely_geom = shape(geom_dict)
|
|
106
|
+
geometries.append(shapely_geom)
|
|
107
|
+
except (KeyError, ValueError) as e:
|
|
108
|
+
self.client.logger.warning(f"Error parsing geometry {i}: {e}")
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
min_length = min(len(datasets), len(geometries))
|
|
112
|
+
if min_length == 0:
|
|
113
|
+
self.client.logger.warning("No matching datasets and geometries found")
|
|
114
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
115
|
+
|
|
116
|
+
gdf = gpd.GeoDataFrame({
|
|
117
|
+
'geometry': geometries[:min_length],
|
|
118
|
+
'dataset': datasets[:min_length]
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
|
|
122
|
+
try:
|
|
123
|
+
expanded_gdf = expand_on_variables_and_time(gdf)
|
|
124
|
+
return expanded_gdf
|
|
125
|
+
except NameError:
|
|
126
|
+
self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
|
|
127
|
+
return gdf
|
|
128
|
+
|
|
129
|
+
else:
|
|
130
|
+
self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
|
|
131
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
132
|
+
|
|
133
|
+
except Exception as e:
|
|
134
|
+
self.client.logger.error(f"Error downloading geometry data: {e}")
|
|
135
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
136
|
+
elif status in ["Failed", "Cancelled", "Error"]:
|
|
137
|
+
return "The zonal stats job(for preparing the data) has failed, please check the job status!"
|
|
138
|
+
else:
|
|
139
|
+
return "The zonal stats job(for preparing the data) is still runningm, please come back at a later time!"
|
|
26
140
|
|
|
27
|
-
from typing import Optional
|
|
28
141
|
|
|
29
142
|
def expand_on_time(gdf):
|
|
30
143
|
"""
|
|
@@ -90,6 +203,8 @@ def expand_on_time(gdf):
|
|
|
90
203
|
result_gdf = gpd.GeoDataFrame(result_df, geometry='geometry')
|
|
91
204
|
result_gdf = result_gdf.set_index(['geometry'])
|
|
92
205
|
|
|
206
|
+
result_gdf.attrs = gdf.attrs.copy()
|
|
207
|
+
|
|
93
208
|
return result_gdf
|
|
94
209
|
|
|
95
210
|
def expand_on_variables(gdf):
|
|
@@ -143,7 +258,7 @@ def expand_on_variables(gdf):
|
|
|
143
258
|
raise ValueError("Expected 'dataset' column for variable expansion")
|
|
144
259
|
|
|
145
260
|
result_df = pd.DataFrame(rows)
|
|
146
|
-
|
|
261
|
+
|
|
147
262
|
if 'time' in result_df.columns:
|
|
148
263
|
result_gdf = gpd.GeoDataFrame(result_df, geometry='geometry')
|
|
149
264
|
result_gdf = result_gdf.set_index(['geometry', 'time'])
|
|
@@ -151,9 +266,10 @@ def expand_on_variables(gdf):
|
|
|
151
266
|
result_gdf = gpd.GeoDataFrame(result_df, geometry='geometry')
|
|
152
267
|
result_gdf = result_gdf.set_index(['geometry'])
|
|
153
268
|
|
|
269
|
+
result_gdf.attrs = gdf.attrs.copy()
|
|
270
|
+
|
|
154
271
|
return result_gdf
|
|
155
272
|
|
|
156
|
-
|
|
157
273
|
def expand_on_variables_and_time(gdf):
|
|
158
274
|
"""
|
|
159
275
|
Convenience function to expand on both variables and time.
|
|
@@ -169,7 +285,7 @@ def expand_on_variables_and_time(gdf):
|
|
|
169
285
|
return expanded_on_variables_and_time
|
|
170
286
|
except Exception as e:
|
|
171
287
|
return expanded_on_time
|
|
172
|
-
|
|
288
|
+
|
|
173
289
|
def estimate_geometry_size_ratio(queries: list):
|
|
174
290
|
"""Calculate size ratios for all geometries relative to the first geometry using bounding box area."""
|
|
175
291
|
|
|
@@ -217,101 +333,6 @@ async def estimate_query_size(
|
|
|
217
333
|
total_size_mb += first_query_dataset.nbytes * ratios[i] / (1024**2)
|
|
218
334
|
return total_size_mb
|
|
219
335
|
|
|
220
|
-
async def request_geoquery_list(
|
|
221
|
-
client,
|
|
222
|
-
quries: list[dict],
|
|
223
|
-
conc: int = 20,
|
|
224
|
-
):
|
|
225
|
-
"""
|
|
226
|
-
Execute multiple geo queries.
|
|
227
|
-
|
|
228
|
-
Args:
|
|
229
|
-
client: The Terrakio client instance
|
|
230
|
-
quries: List of dictionaries containing query parameters
|
|
231
|
-
conc: The concurrency level for the requests
|
|
232
|
-
|
|
233
|
-
Returns:
|
|
234
|
-
List of query results
|
|
235
|
-
|
|
236
|
-
Raises:
|
|
237
|
-
ValueError: If the queries list is empty
|
|
238
|
-
"""
|
|
239
|
-
if not quries:
|
|
240
|
-
raise ValueError("Queries list cannot be empty")
|
|
241
|
-
if conc > 100:
|
|
242
|
-
raise ValueError("Concurrency (conc) is too high. Please set conc to 100 or less.")
|
|
243
|
-
|
|
244
|
-
for i, query in enumerate(quries):
|
|
245
|
-
if 'expr' not in query:
|
|
246
|
-
raise ValueError(f"Query at index {i} is missing the required 'expr' key")
|
|
247
|
-
if 'feature' not in query:
|
|
248
|
-
raise ValueError(f"Query at index {i} is missing the required 'feature' key")
|
|
249
|
-
if 'in_crs' not in query:
|
|
250
|
-
raise ValueError(f"Query at index {i} is missing the required 'in_crs' key")
|
|
251
|
-
|
|
252
|
-
completed_count = 0
|
|
253
|
-
lock = asyncio.Lock()
|
|
254
|
-
async def single_geo_query(query):
|
|
255
|
-
"""
|
|
256
|
-
Execute multiple geo queries concurrently.
|
|
257
|
-
|
|
258
|
-
Args:
|
|
259
|
-
quries: List of dictionaries containing query parameters
|
|
260
|
-
"""
|
|
261
|
-
total_number_of_requests = len(quries)
|
|
262
|
-
nonlocal completed_count
|
|
263
|
-
try:
|
|
264
|
-
result = await client.geoquery(**query)
|
|
265
|
-
if isinstance(result, dict) and result.get("error"):
|
|
266
|
-
error_msg = f"Request failed: {result.get('error_message', 'Unknown error')}"
|
|
267
|
-
if result.get('status_code'):
|
|
268
|
-
error_msg = f"Request failed with status {result['status_code']}: {result.get('error_message', 'Unknown error')}"
|
|
269
|
-
raise APIError(error_msg)
|
|
270
|
-
if isinstance(result, list):
|
|
271
|
-
result = result[0]
|
|
272
|
-
timestamp_number = result['request_count']
|
|
273
|
-
return timestamp_number
|
|
274
|
-
if not isinstance(result, xr.Dataset):
|
|
275
|
-
raise ValueError(f"Expected xarray Dataset, got {type(result)}")
|
|
276
|
-
|
|
277
|
-
async with lock:
|
|
278
|
-
completed_count += 1
|
|
279
|
-
if completed_count % max(1, total_number_of_requests // 10) == 0:
|
|
280
|
-
client.logger.info(f"Progress: {completed_count}/{total_number_of_requests} requests processed")
|
|
281
|
-
return result
|
|
282
|
-
except Exception as e:
|
|
283
|
-
async with lock:
|
|
284
|
-
completed_count += 1
|
|
285
|
-
raise
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
async with BoundedTaskGroup(max_concurrency=conc) as tg:
|
|
289
|
-
tasks = [tg.create_task(single_geo_query(quries[idx])) for idx in range(len(quries))]
|
|
290
|
-
all_results = [task.result() for task in tasks]
|
|
291
|
-
|
|
292
|
-
except* Exception as eg:
|
|
293
|
-
for e in eg.exceptions:
|
|
294
|
-
if hasattr(e, 'response'):
|
|
295
|
-
raise APIError(f"API request failed: {e.response.text}")
|
|
296
|
-
raise
|
|
297
|
-
client.logger.info("All requests completed!")
|
|
298
|
-
|
|
299
|
-
if not all_results:
|
|
300
|
-
raise ValueError("No valid results were returned for any geometry")
|
|
301
|
-
if isinstance(all_results, list) and type(all_results[0]) == int:
|
|
302
|
-
return sum(all_results)/len(all_results)
|
|
303
|
-
else:
|
|
304
|
-
geometries = []
|
|
305
|
-
for query in quries:
|
|
306
|
-
feature = query['feature']
|
|
307
|
-
geometry = shape(feature['geometry'])
|
|
308
|
-
geometries.append(geometry)
|
|
309
|
-
result_gdf = gpd.GeoDataFrame({
|
|
310
|
-
'geometry': geometries,
|
|
311
|
-
'dataset': all_results
|
|
312
|
-
})
|
|
313
|
-
return result_gdf
|
|
314
|
-
|
|
315
336
|
async def estimate_timestamp_number(
|
|
316
337
|
client,
|
|
317
338
|
quries: list[dict],
|
|
@@ -388,9 +409,7 @@ def gdf_to_json(
|
|
|
388
409
|
"""
|
|
389
410
|
mass_stats_requests = []
|
|
390
411
|
|
|
391
|
-
# Loop through each row in the GeoDataFrame
|
|
392
412
|
for idx, row in gdf.iterrows():
|
|
393
|
-
# Create the request feature
|
|
394
413
|
request_feature = {
|
|
395
414
|
"expr": expr,
|
|
396
415
|
"feature": {
|
|
@@ -404,29 +423,24 @@ def gdf_to_json(
|
|
|
404
423
|
"geom_fix": geom_fix,
|
|
405
424
|
}
|
|
406
425
|
|
|
407
|
-
# Determine group name and file name based on id_column
|
|
408
426
|
if id_column is not None and id_column in gdf.columns:
|
|
409
|
-
# Use the value from the specified column as group and file name
|
|
410
427
|
identifier = str(row[id_column])
|
|
411
428
|
group_name = f"group_{identifier}"
|
|
412
429
|
file_name = f"file_{identifier}"
|
|
413
430
|
else:
|
|
414
|
-
# Use the index as group and file name
|
|
415
431
|
group_name = f"group_{idx}"
|
|
416
432
|
file_name = f"file_{idx}"
|
|
417
433
|
|
|
418
|
-
# Create the complete request entry
|
|
419
434
|
request_entry = {
|
|
420
435
|
"group": group_name,
|
|
421
436
|
"file": file_name,
|
|
422
437
|
"request": request_feature,
|
|
423
438
|
}
|
|
424
439
|
|
|
425
|
-
# Add the request to our list
|
|
426
440
|
mass_stats_requests.append(request_entry)
|
|
427
441
|
|
|
428
442
|
return mass_stats_requests
|
|
429
|
-
|
|
443
|
+
|
|
430
444
|
async def handle_mass_stats(
|
|
431
445
|
client,
|
|
432
446
|
gdf: GeoDataFrame,
|
|
@@ -436,17 +450,24 @@ async def handle_mass_stats(
|
|
|
436
450
|
resolution: int = -1,
|
|
437
451
|
geom_fix: bool = False,
|
|
438
452
|
id_column: Optional[str] = None,
|
|
439
|
-
|
|
440
453
|
):
|
|
441
|
-
request_json = gdf_to_json(gdf
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
454
|
+
request_json = gdf_to_json(gdf=gdf, expr=expr, in_crs=in_crs, out_crs=out_crs,
|
|
455
|
+
resolution=resolution, geom_fix=geom_fix, id_column=id_column)
|
|
456
|
+
|
|
457
|
+
job_response = await client.mass_stats.execute_job(
|
|
458
|
+
name=f"zonal-stats-{str(uuid.uuid4())[:6]}",
|
|
459
|
+
output="netcdf",
|
|
460
|
+
config={},
|
|
461
|
+
request_json=request_json,
|
|
462
|
+
overwrite=True,
|
|
448
463
|
)
|
|
449
|
-
|
|
464
|
+
|
|
465
|
+
# Extract the actual task ID from the response
|
|
466
|
+
if isinstance(job_response, dict) and 'task_id' in job_response:
|
|
467
|
+
return job_response['task_id'] # Return just the string ID
|
|
468
|
+
else:
|
|
469
|
+
return job_response # In case it's already just the ID
|
|
470
|
+
|
|
450
471
|
|
|
451
472
|
async def zonal_stats(
|
|
452
473
|
client,
|
|
@@ -461,7 +482,6 @@ async def zonal_stats(
|
|
|
461
482
|
id_column: Optional[str] = None,
|
|
462
483
|
):
|
|
463
484
|
"""Compute zonal statistics for all geometries in a GeoDataFrame."""
|
|
464
|
-
|
|
465
485
|
if mass_stats:
|
|
466
486
|
mass_stats_id = await handle_mass_stats(
|
|
467
487
|
client = client,
|
|
@@ -471,9 +491,13 @@ async def zonal_stats(
|
|
|
471
491
|
out_crs = out_crs,
|
|
472
492
|
resolution = resolution,
|
|
473
493
|
geom_fix = geom_fix,
|
|
474
|
-
id_column = id_column
|
|
494
|
+
id_column = id_column,
|
|
475
495
|
)
|
|
476
|
-
|
|
496
|
+
job_name = await client.mass_stats.track_job([mass_stats_id])
|
|
497
|
+
job_name = job_name[mass_stats_id]["name"]
|
|
498
|
+
cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
|
|
499
|
+
return cloud_files_object
|
|
500
|
+
|
|
477
501
|
quries = []
|
|
478
502
|
for i in range(len(gdf)):
|
|
479
503
|
quries.append({
|
|
@@ -494,130 +518,9 @@ async def zonal_stats(
|
|
|
494
518
|
raise ValueError(local_or_remote_result["reason"])
|
|
495
519
|
else:
|
|
496
520
|
gdf_with_datasets = await request_geoquery_list(client = client, quries = quries, conc = conc)
|
|
521
|
+
gdf_with_datasets.attrs["cloud_metadata"] = {
|
|
522
|
+
"is_cloud_backed": False,
|
|
523
|
+
}
|
|
497
524
|
gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
|
|
498
525
|
return gdf_with_datasets
|
|
499
526
|
|
|
500
|
-
async def create_dataset_file(
|
|
501
|
-
client,
|
|
502
|
-
aoi: str,
|
|
503
|
-
expression: str,
|
|
504
|
-
output: str,
|
|
505
|
-
download_path: str,
|
|
506
|
-
in_crs: str = "epsg:4326",
|
|
507
|
-
to_crs: str = "epsg:4326",
|
|
508
|
-
res: float = 0.0001,
|
|
509
|
-
region: str = None,
|
|
510
|
-
overwrite: bool = False,
|
|
511
|
-
skip_existing: bool = False,
|
|
512
|
-
non_interactive: bool = True,
|
|
513
|
-
name: str | None = None,
|
|
514
|
-
poll_interval: int = 30,
|
|
515
|
-
max_file_size_mb: int = 5120,
|
|
516
|
-
tile_size: int = 1024,
|
|
517
|
-
mask: bool = True
|
|
518
|
-
) -> dict:
|
|
519
|
-
|
|
520
|
-
if not name:
|
|
521
|
-
name = f"file-gen-{uuid.uuid4().hex[:8]}"
|
|
522
|
-
|
|
523
|
-
body, reqs, groups = tiles(
|
|
524
|
-
name = name,
|
|
525
|
-
aoi = aoi,
|
|
526
|
-
expression = expression,
|
|
527
|
-
output = output,
|
|
528
|
-
tile_size = tile_size,
|
|
529
|
-
crs = in_crs,
|
|
530
|
-
res = res,
|
|
531
|
-
region = region,
|
|
532
|
-
to_crs = to_crs,
|
|
533
|
-
mask = mask,
|
|
534
|
-
overwrite = overwrite,
|
|
535
|
-
skip_existing = skip_existing,
|
|
536
|
-
non_interactive = non_interactive
|
|
537
|
-
)
|
|
538
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
|
|
539
|
-
tempreq.write(reqs)
|
|
540
|
-
tempreqname = tempreq.name
|
|
541
|
-
|
|
542
|
-
task_id = await client.mass_stats.execute_job(
|
|
543
|
-
name=body["name"],
|
|
544
|
-
region=body["region"],
|
|
545
|
-
output=body["output"],
|
|
546
|
-
config = {},
|
|
547
|
-
overwrite=body["overwrite"],
|
|
548
|
-
skip_existing=body["skip_existing"],
|
|
549
|
-
request_json=tempreqname,
|
|
550
|
-
)
|
|
551
|
-
|
|
552
|
-
start_time = time.time()
|
|
553
|
-
status = None
|
|
554
|
-
client.logger.info(f"Tracking data generation job {task_id['task_id']}...")
|
|
555
|
-
while True:
|
|
556
|
-
try:
|
|
557
|
-
taskid = task_id['task_id']
|
|
558
|
-
trackinfo = await client.mass_stats.track_job([taskid])
|
|
559
|
-
status = trackinfo[taskid]['status']
|
|
560
|
-
if status == 'Completed':
|
|
561
|
-
client.logger.info('Data generated successfully!')
|
|
562
|
-
break
|
|
563
|
-
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
564
|
-
raise RuntimeError(f"Job {taskid} failed with status: {status}")
|
|
565
|
-
else:
|
|
566
|
-
elapsed_time = time.time() - start_time
|
|
567
|
-
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s")
|
|
568
|
-
await asyncio.sleep(poll_interval)
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
except KeyboardInterrupt:
|
|
572
|
-
client.logger.info(f"\nInterrupted! Job {taskid} is still running in the background.")
|
|
573
|
-
raise
|
|
574
|
-
except Exception as e:
|
|
575
|
-
client.logger.info(f"\nError tracking job: {e}")
|
|
576
|
-
raise
|
|
577
|
-
|
|
578
|
-
os.unlink(tempreqname)
|
|
579
|
-
|
|
580
|
-
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"], max_file_size_mb=max_file_size_mb)
|
|
581
|
-
combine_task_id = combine_result.get("task_id")
|
|
582
|
-
|
|
583
|
-
combine_start_time = time.time()
|
|
584
|
-
client.logger.info(f"Tracking file generation job {combine_task_id}...")
|
|
585
|
-
while True:
|
|
586
|
-
try:
|
|
587
|
-
trackinfo = await client.mass_stats.track_job([combine_task_id])
|
|
588
|
-
if body["output"] == "netcdf":
|
|
589
|
-
download_file_name = trackinfo[combine_task_id]['folder'] + '.nc'
|
|
590
|
-
elif body["output"] == "geotiff":
|
|
591
|
-
download_file_name = trackinfo[combine_task_id]['folder'] + '.tif'
|
|
592
|
-
bucket = trackinfo[combine_task_id]['bucket']
|
|
593
|
-
combine_status = trackinfo[combine_task_id]['status']
|
|
594
|
-
if combine_status == 'Completed':
|
|
595
|
-
client.logger.info('File/s generated successfully!')
|
|
596
|
-
break
|
|
597
|
-
elif combine_status in ['Failed', 'Cancelled', 'Error']:
|
|
598
|
-
raise RuntimeError(f"File generation job {combine_task_id} failed with status: {combine_status}")
|
|
599
|
-
else:
|
|
600
|
-
elapsed_time = time.time() - combine_start_time
|
|
601
|
-
client.logger.info(f"File generation job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s")
|
|
602
|
-
time.sleep(poll_interval)
|
|
603
|
-
except KeyboardInterrupt:
|
|
604
|
-
client.logger.info(f"\nInterrupted! File generation job {combine_task_id} is still running in the background.")
|
|
605
|
-
raise
|
|
606
|
-
except Exception as e:
|
|
607
|
-
client.logger.info(f"\nError tracking file generation job: {e}")
|
|
608
|
-
raise
|
|
609
|
-
|
|
610
|
-
if download_path:
|
|
611
|
-
await client.mass_stats.download_file(
|
|
612
|
-
job_name=body["name"],
|
|
613
|
-
bucket=bucket,
|
|
614
|
-
file_type='processed',
|
|
615
|
-
folder='file-gen',
|
|
616
|
-
page_size=100,
|
|
617
|
-
output_path=download_path,
|
|
618
|
-
)
|
|
619
|
-
else:
|
|
620
|
-
path = f"{body['name']}/outputs/merged/{download_file_name}"
|
|
621
|
-
client.logger.info(f"Dataset file/s is available at {path}")
|
|
622
|
-
|
|
623
|
-
return {"generation_task_id": task_id, "combine_task_id": combine_task_id}
|