terrakio-core 0.4.8__py3-none-any.whl → 0.4.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/__init__.py +1 -1
- terrakio_core/accessors.py +800 -328
- terrakio_core/async_client.py +10 -3
- terrakio_core/convenience_functions/create_dataset_file.py +132 -0
- terrakio_core/convenience_functions/geoquries.py +102 -0
- terrakio_core/convenience_functions/{convenience_functions.py → zonal_stats.py} +168 -263
- terrakio_core/endpoints/mass_stats.py +94 -162
- terrakio_core/sync_client.py +0 -340
- terrakio_core-0.4.94.dist-info/METADATA +31 -0
- {terrakio_core-0.4.8.dist-info → terrakio_core-0.4.94.dist-info}/RECORD +11 -10
- {terrakio_core-0.4.8.dist-info → terrakio_core-0.4.94.dist-info}/WHEEL +1 -2
- terrakio_core-0.4.8.dist-info/METADATA +0 -47
- terrakio_core-0.4.8.dist-info/top_level.txt +0 -1
|
@@ -1,30 +1,145 @@
|
|
|
1
|
-
|
|
2
|
-
import os
|
|
1
|
+
# Standard library imports
|
|
3
2
|
import asyncio
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import geopandas as gpd
|
|
8
|
-
from geopandas import GeoDataFrame
|
|
9
|
-
from shapely.geometry import mapping
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from ..exceptions import APIError, ConfigurationError
|
|
12
|
-
from ..helper.bounded_taskgroup import BoundedTaskGroup
|
|
13
|
-
from ..helper.tiles import tiles
|
|
3
|
+
import psutil
|
|
4
|
+
import random
|
|
14
5
|
import uuid
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
# Third-party library imports
|
|
10
|
+
import aiohttp
|
|
11
|
+
import geopandas as gpd
|
|
12
|
+
import nest_asyncio
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import pyproj
|
|
15
15
|
import xarray as xr
|
|
16
|
-
import
|
|
17
|
-
import
|
|
18
|
-
import copy
|
|
19
|
-
from shapely.geometry import shape
|
|
16
|
+
from geopandas import GeoDataFrame
|
|
17
|
+
from shapely.geometry import box, mapping, shape
|
|
20
18
|
from shapely.ops import transform
|
|
21
|
-
from shapely.geometry import box
|
|
22
|
-
import pyproj
|
|
23
19
|
|
|
24
|
-
|
|
25
|
-
|
|
20
|
+
# Local imports
|
|
21
|
+
from .geoquries import request_geoquery_list
|
|
26
22
|
|
|
27
|
-
|
|
23
|
+
nest_asyncio.apply()
|
|
24
|
+
class cloud_object(gpd.GeoDataFrame):
|
|
25
|
+
"""
|
|
26
|
+
This class is a class used for cloud
|
|
27
|
+
"""
|
|
28
|
+
def __init__(self, job_id: str, job_name: str, client=None):
|
|
29
|
+
|
|
30
|
+
super().__init__({
|
|
31
|
+
'geometry': [],
|
|
32
|
+
'dataset': []
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
self.job_id = job_id
|
|
36
|
+
self.client = client
|
|
37
|
+
self.job_name = job_name
|
|
38
|
+
|
|
39
|
+
def head(self, n = 5):
|
|
40
|
+
"""
|
|
41
|
+
Returns the first n files stored in the cloud bucket.
|
|
42
|
+
"""
|
|
43
|
+
return asyncio.run(self._head_async(n))
|
|
44
|
+
|
|
45
|
+
async def _head_async(self, n = 5):
|
|
46
|
+
"""
|
|
47
|
+
Returns the first n files stored in the cloud bucket.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
n (int): Number of files to return. Default is 5.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
GeoDataFrame: A GeoDataFrame containing the first n files.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
track_info = await self.client.mass_stats.track_job([self.job_id])
|
|
57
|
+
job_info = track_info[self.job_id]
|
|
58
|
+
status = job_info['status']
|
|
59
|
+
|
|
60
|
+
if status == "Completed":
|
|
61
|
+
payload = {
|
|
62
|
+
"job_name": job_info["name"],
|
|
63
|
+
"file_type": "raw",
|
|
64
|
+
"bucket": job_info["bucket"],
|
|
65
|
+
}
|
|
66
|
+
result = await self.client._terrakio_request("POST", "mass_stats/download_files", json=payload)
|
|
67
|
+
download_urls = result["download_urls"][:n]
|
|
68
|
+
datasets = []
|
|
69
|
+
|
|
70
|
+
async with aiohttp.ClientSession() as session:
|
|
71
|
+
for i, url in enumerate(download_urls):
|
|
72
|
+
try:
|
|
73
|
+
self.client.logger.info(f"Downloading dataset {i+1}/{len(download_urls)}...")
|
|
74
|
+
async with session.get(url) as response:
|
|
75
|
+
if response.status == 200:
|
|
76
|
+
content = await response.read()
|
|
77
|
+
dataset = xr.open_dataset(BytesIO(content))
|
|
78
|
+
datasets.append(dataset)
|
|
79
|
+
self.client.logger.info(f"Successfully processed dataset {i+1}")
|
|
80
|
+
else:
|
|
81
|
+
self.client.logger.warning(f"Failed to download dataset {i+1}: HTTP {response.status}")
|
|
82
|
+
except Exception as e:
|
|
83
|
+
self.client.logger.error(f"Error downloading dataset {i+1}: {e}")
|
|
84
|
+
continue
|
|
85
|
+
if not datasets:
|
|
86
|
+
self.client.logger.warning("No datasets were successfully downloaded")
|
|
87
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
88
|
+
try:
|
|
89
|
+
json_response = await self.client._terrakio_request(
|
|
90
|
+
"POST", "mass_stats/download_json",
|
|
91
|
+
params={"job_name": job_info['name']}
|
|
92
|
+
)
|
|
93
|
+
json_url = json_response["download_url"]
|
|
94
|
+
|
|
95
|
+
async with session.get(json_url) as response:
|
|
96
|
+
if response.status == 200:
|
|
97
|
+
json_data = await response.json()
|
|
98
|
+
self.client.logger.info("Successfully downloaded geometry data")
|
|
99
|
+
|
|
100
|
+
geometries = []
|
|
101
|
+
max_geometries = min(n, len(json_data), len(datasets))
|
|
102
|
+
|
|
103
|
+
for i in range(max_geometries):
|
|
104
|
+
try:
|
|
105
|
+
geom_dict = json_data[i]["request"]["feature"]["geometry"]
|
|
106
|
+
shapely_geom = shape(geom_dict)
|
|
107
|
+
geometries.append(shapely_geom)
|
|
108
|
+
except (KeyError, ValueError) as e:
|
|
109
|
+
self.client.logger.warning(f"Error parsing geometry {i}: {e}")
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
min_length = min(len(datasets), len(geometries))
|
|
113
|
+
if min_length == 0:
|
|
114
|
+
self.client.logger.warning("No matching datasets and geometries found")
|
|
115
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
116
|
+
|
|
117
|
+
gdf = gpd.GeoDataFrame({
|
|
118
|
+
'geometry': geometries[:min_length],
|
|
119
|
+
'dataset': datasets[:min_length]
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
|
|
123
|
+
try:
|
|
124
|
+
expanded_gdf = expand_on_variables_and_time(gdf)
|
|
125
|
+
return expanded_gdf
|
|
126
|
+
except NameError:
|
|
127
|
+
self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
|
|
128
|
+
return gdf
|
|
129
|
+
|
|
130
|
+
else:
|
|
131
|
+
self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
|
|
132
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
self.client.logger.error(f"Error downloading geometry data: {e}")
|
|
136
|
+
return gpd.GeoDataFrame({'geometry': [], 'dataset': []})
|
|
137
|
+
|
|
138
|
+
elif status in ["Failed", "Cancelled", "Error"]:
|
|
139
|
+
raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) has failed, cancelled, or errored. Please check the job status!")
|
|
140
|
+
|
|
141
|
+
else:
|
|
142
|
+
raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) is still running. Please come back at a later time!")
|
|
28
143
|
|
|
29
144
|
def expand_on_time(gdf):
|
|
30
145
|
"""
|
|
@@ -90,6 +205,8 @@ def expand_on_time(gdf):
|
|
|
90
205
|
result_gdf = gpd.GeoDataFrame(result_df, geometry='geometry')
|
|
91
206
|
result_gdf = result_gdf.set_index(['geometry'])
|
|
92
207
|
|
|
208
|
+
result_gdf.attrs = gdf.attrs.copy()
|
|
209
|
+
|
|
93
210
|
return result_gdf
|
|
94
211
|
|
|
95
212
|
def expand_on_variables(gdf):
|
|
@@ -143,7 +260,7 @@ def expand_on_variables(gdf):
|
|
|
143
260
|
raise ValueError("Expected 'dataset' column for variable expansion")
|
|
144
261
|
|
|
145
262
|
result_df = pd.DataFrame(rows)
|
|
146
|
-
|
|
263
|
+
|
|
147
264
|
if 'time' in result_df.columns:
|
|
148
265
|
result_gdf = gpd.GeoDataFrame(result_df, geometry='geometry')
|
|
149
266
|
result_gdf = result_gdf.set_index(['geometry', 'time'])
|
|
@@ -151,9 +268,10 @@ def expand_on_variables(gdf):
|
|
|
151
268
|
result_gdf = gpd.GeoDataFrame(result_df, geometry='geometry')
|
|
152
269
|
result_gdf = result_gdf.set_index(['geometry'])
|
|
153
270
|
|
|
271
|
+
result_gdf.attrs = gdf.attrs.copy()
|
|
272
|
+
|
|
154
273
|
return result_gdf
|
|
155
274
|
|
|
156
|
-
|
|
157
275
|
def expand_on_variables_and_time(gdf):
|
|
158
276
|
"""
|
|
159
277
|
Convenience function to expand on both variables and time.
|
|
@@ -169,7 +287,7 @@ def expand_on_variables_and_time(gdf):
|
|
|
169
287
|
return expanded_on_variables_and_time
|
|
170
288
|
except Exception as e:
|
|
171
289
|
return expanded_on_time
|
|
172
|
-
|
|
290
|
+
|
|
173
291
|
def estimate_geometry_size_ratio(queries: list):
|
|
174
292
|
"""Calculate size ratios for all geometries relative to the first geometry using bounding box area."""
|
|
175
293
|
|
|
@@ -217,101 +335,6 @@ async def estimate_query_size(
|
|
|
217
335
|
total_size_mb += first_query_dataset.nbytes * ratios[i] / (1024**2)
|
|
218
336
|
return total_size_mb
|
|
219
337
|
|
|
220
|
-
async def request_geoquery_list(
|
|
221
|
-
client,
|
|
222
|
-
quries: list[dict],
|
|
223
|
-
conc: int = 20,
|
|
224
|
-
):
|
|
225
|
-
"""
|
|
226
|
-
Execute multiple geo queries.
|
|
227
|
-
|
|
228
|
-
Args:
|
|
229
|
-
client: The Terrakio client instance
|
|
230
|
-
quries: List of dictionaries containing query parameters
|
|
231
|
-
conc: The concurrency level for the requests
|
|
232
|
-
|
|
233
|
-
Returns:
|
|
234
|
-
List of query results
|
|
235
|
-
|
|
236
|
-
Raises:
|
|
237
|
-
ValueError: If the queries list is empty
|
|
238
|
-
"""
|
|
239
|
-
if not quries:
|
|
240
|
-
raise ValueError("Queries list cannot be empty")
|
|
241
|
-
if conc > 100:
|
|
242
|
-
raise ValueError("Concurrency (conc) is too high. Please set conc to 100 or less.")
|
|
243
|
-
|
|
244
|
-
for i, query in enumerate(quries):
|
|
245
|
-
if 'expr' not in query:
|
|
246
|
-
raise ValueError(f"Query at index {i} is missing the required 'expr' key")
|
|
247
|
-
if 'feature' not in query:
|
|
248
|
-
raise ValueError(f"Query at index {i} is missing the required 'feature' key")
|
|
249
|
-
if 'in_crs' not in query:
|
|
250
|
-
raise ValueError(f"Query at index {i} is missing the required 'in_crs' key")
|
|
251
|
-
|
|
252
|
-
completed_count = 0
|
|
253
|
-
lock = asyncio.Lock()
|
|
254
|
-
async def single_geo_query(query):
|
|
255
|
-
"""
|
|
256
|
-
Execute multiple geo queries concurrently.
|
|
257
|
-
|
|
258
|
-
Args:
|
|
259
|
-
quries: List of dictionaries containing query parameters
|
|
260
|
-
"""
|
|
261
|
-
total_number_of_requests = len(quries)
|
|
262
|
-
nonlocal completed_count
|
|
263
|
-
try:
|
|
264
|
-
result = await client.geoquery(**query)
|
|
265
|
-
if isinstance(result, dict) and result.get("error"):
|
|
266
|
-
error_msg = f"Request failed: {result.get('error_message', 'Unknown error')}"
|
|
267
|
-
if result.get('status_code'):
|
|
268
|
-
error_msg = f"Request failed with status {result['status_code']}: {result.get('error_message', 'Unknown error')}"
|
|
269
|
-
raise APIError(error_msg)
|
|
270
|
-
if isinstance(result, list):
|
|
271
|
-
result = result[0]
|
|
272
|
-
timestamp_number = result['request_count']
|
|
273
|
-
return timestamp_number
|
|
274
|
-
if not isinstance(result, xr.Dataset):
|
|
275
|
-
raise ValueError(f"Expected xarray Dataset, got {type(result)}")
|
|
276
|
-
|
|
277
|
-
async with lock:
|
|
278
|
-
completed_count += 1
|
|
279
|
-
if completed_count % max(1, total_number_of_requests // 10) == 0:
|
|
280
|
-
client.logger.info(f"Progress: {completed_count}/{total_number_of_requests} requests processed")
|
|
281
|
-
return result
|
|
282
|
-
except Exception as e:
|
|
283
|
-
async with lock:
|
|
284
|
-
completed_count += 1
|
|
285
|
-
raise
|
|
286
|
-
|
|
287
|
-
try:
|
|
288
|
-
async with BoundedTaskGroup(max_concurrency=conc) as tg:
|
|
289
|
-
tasks = [tg.create_task(single_geo_query(quries[idx])) for idx in range(len(quries))]
|
|
290
|
-
all_results = [task.result() for task in tasks]
|
|
291
|
-
|
|
292
|
-
except* Exception as eg:
|
|
293
|
-
for e in eg.exceptions:
|
|
294
|
-
if hasattr(e, 'response'):
|
|
295
|
-
raise APIError(f"API request failed: {e.response.text}")
|
|
296
|
-
raise
|
|
297
|
-
client.logger.info("All requests completed!")
|
|
298
|
-
|
|
299
|
-
if not all_results:
|
|
300
|
-
raise ValueError("No valid results were returned for any geometry")
|
|
301
|
-
if isinstance(all_results, list) and type(all_results[0]) == int:
|
|
302
|
-
return sum(all_results)/len(all_results)
|
|
303
|
-
else:
|
|
304
|
-
geometries = []
|
|
305
|
-
for query in quries:
|
|
306
|
-
feature = query['feature']
|
|
307
|
-
geometry = shape(feature['geometry'])
|
|
308
|
-
geometries.append(geometry)
|
|
309
|
-
result_gdf = gpd.GeoDataFrame({
|
|
310
|
-
'geometry': geometries,
|
|
311
|
-
'dataset': all_results
|
|
312
|
-
})
|
|
313
|
-
return result_gdf
|
|
314
|
-
|
|
315
338
|
async def estimate_timestamp_number(
|
|
316
339
|
client,
|
|
317
340
|
quries: list[dict],
|
|
@@ -388,9 +411,7 @@ def gdf_to_json(
|
|
|
388
411
|
"""
|
|
389
412
|
mass_stats_requests = []
|
|
390
413
|
|
|
391
|
-
# Loop through each row in the GeoDataFrame
|
|
392
414
|
for idx, row in gdf.iterrows():
|
|
393
|
-
# Create the request feature
|
|
394
415
|
request_feature = {
|
|
395
416
|
"expr": expr,
|
|
396
417
|
"feature": {
|
|
@@ -404,29 +425,24 @@ def gdf_to_json(
|
|
|
404
425
|
"geom_fix": geom_fix,
|
|
405
426
|
}
|
|
406
427
|
|
|
407
|
-
# Determine group name and file name based on id_column
|
|
408
428
|
if id_column is not None and id_column in gdf.columns:
|
|
409
|
-
# Use the value from the specified column as group and file name
|
|
410
429
|
identifier = str(row[id_column])
|
|
411
430
|
group_name = f"group_{identifier}"
|
|
412
431
|
file_name = f"file_{identifier}"
|
|
413
432
|
else:
|
|
414
|
-
# Use the index as group and file name
|
|
415
433
|
group_name = f"group_{idx}"
|
|
416
434
|
file_name = f"file_{idx}"
|
|
417
435
|
|
|
418
|
-
# Create the complete request entry
|
|
419
436
|
request_entry = {
|
|
420
437
|
"group": group_name,
|
|
421
438
|
"file": file_name,
|
|
422
439
|
"request": request_feature,
|
|
423
440
|
}
|
|
424
441
|
|
|
425
|
-
# Add the request to our list
|
|
426
442
|
mass_stats_requests.append(request_entry)
|
|
427
443
|
|
|
428
444
|
return mass_stats_requests
|
|
429
|
-
|
|
445
|
+
|
|
430
446
|
async def handle_mass_stats(
|
|
431
447
|
client,
|
|
432
448
|
gdf: GeoDataFrame,
|
|
@@ -436,17 +452,24 @@ async def handle_mass_stats(
|
|
|
436
452
|
resolution: int = -1,
|
|
437
453
|
geom_fix: bool = False,
|
|
438
454
|
id_column: Optional[str] = None,
|
|
439
|
-
|
|
440
455
|
):
|
|
441
|
-
request_json = gdf_to_json(gdf
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
456
|
+
request_json = gdf_to_json(gdf=gdf, expr=expr, in_crs=in_crs, out_crs=out_crs,
|
|
457
|
+
resolution=resolution, geom_fix=geom_fix, id_column=id_column)
|
|
458
|
+
|
|
459
|
+
job_response = await client.mass_stats.execute_job(
|
|
460
|
+
name=f"zonal-stats-{str(uuid.uuid4())[:6]}",
|
|
461
|
+
output="netcdf",
|
|
462
|
+
config={},
|
|
463
|
+
request_json=request_json,
|
|
464
|
+
overwrite=True,
|
|
448
465
|
)
|
|
449
|
-
|
|
466
|
+
|
|
467
|
+
# Extract the actual task ID from the response
|
|
468
|
+
if isinstance(job_response, dict) and 'task_id' in job_response:
|
|
469
|
+
return job_response['task_id'] # Return just the string ID
|
|
470
|
+
else:
|
|
471
|
+
return job_response # In case it's already just the ID
|
|
472
|
+
|
|
450
473
|
|
|
451
474
|
async def zonal_stats(
|
|
452
475
|
client,
|
|
@@ -461,7 +484,6 @@ async def zonal_stats(
|
|
|
461
484
|
id_column: Optional[str] = None,
|
|
462
485
|
):
|
|
463
486
|
"""Compute zonal statistics for all geometries in a GeoDataFrame."""
|
|
464
|
-
|
|
465
487
|
if mass_stats:
|
|
466
488
|
mass_stats_id = await handle_mass_stats(
|
|
467
489
|
client = client,
|
|
@@ -471,9 +493,13 @@ async def zonal_stats(
|
|
|
471
493
|
out_crs = out_crs,
|
|
472
494
|
resolution = resolution,
|
|
473
495
|
geom_fix = geom_fix,
|
|
474
|
-
id_column = id_column
|
|
496
|
+
id_column = id_column,
|
|
475
497
|
)
|
|
476
|
-
|
|
498
|
+
job_name = await client.mass_stats.track_job([mass_stats_id])
|
|
499
|
+
job_name = job_name[mass_stats_id]["name"]
|
|
500
|
+
cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
|
|
501
|
+
return cloud_files_object
|
|
502
|
+
|
|
477
503
|
quries = []
|
|
478
504
|
for i in range(len(gdf)):
|
|
479
505
|
quries.append({
|
|
@@ -494,130 +520,9 @@ async def zonal_stats(
|
|
|
494
520
|
raise ValueError(local_or_remote_result["reason"])
|
|
495
521
|
else:
|
|
496
522
|
gdf_with_datasets = await request_geoquery_list(client = client, quries = quries, conc = conc)
|
|
523
|
+
gdf_with_datasets.attrs["cloud_metadata"] = {
|
|
524
|
+
"is_cloud_backed": False,
|
|
525
|
+
}
|
|
497
526
|
gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
|
|
498
527
|
return gdf_with_datasets
|
|
499
528
|
|
|
500
|
-
async def create_dataset_file(
|
|
501
|
-
client,
|
|
502
|
-
aoi: str,
|
|
503
|
-
expression: str,
|
|
504
|
-
output: str,
|
|
505
|
-
download_path: str,
|
|
506
|
-
in_crs: str = "epsg:4326",
|
|
507
|
-
to_crs: str = "epsg:4326",
|
|
508
|
-
res: float = 0.0001,
|
|
509
|
-
region: str = None,
|
|
510
|
-
overwrite: bool = False,
|
|
511
|
-
skip_existing: bool = False,
|
|
512
|
-
non_interactive: bool = True,
|
|
513
|
-
name: str | None = None,
|
|
514
|
-
poll_interval: int = 30,
|
|
515
|
-
max_file_size_mb: int = 5120,
|
|
516
|
-
tile_size: int = 1024,
|
|
517
|
-
mask: bool = True
|
|
518
|
-
) -> dict:
|
|
519
|
-
|
|
520
|
-
if not name:
|
|
521
|
-
name = f"file-gen-{uuid.uuid4().hex[:8]}"
|
|
522
|
-
|
|
523
|
-
body, reqs, groups = tiles(
|
|
524
|
-
name = name,
|
|
525
|
-
aoi = aoi,
|
|
526
|
-
expression = expression,
|
|
527
|
-
output = output,
|
|
528
|
-
tile_size = tile_size,
|
|
529
|
-
crs = in_crs,
|
|
530
|
-
res = res,
|
|
531
|
-
region = region,
|
|
532
|
-
to_crs = to_crs,
|
|
533
|
-
mask = mask,
|
|
534
|
-
overwrite = overwrite,
|
|
535
|
-
skip_existing = skip_existing,
|
|
536
|
-
non_interactive = non_interactive
|
|
537
|
-
)
|
|
538
|
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
|
|
539
|
-
tempreq.write(reqs)
|
|
540
|
-
tempreqname = tempreq.name
|
|
541
|
-
|
|
542
|
-
task_id = await client.mass_stats.execute_job(
|
|
543
|
-
name=body["name"],
|
|
544
|
-
region=body["region"],
|
|
545
|
-
output=body["output"],
|
|
546
|
-
config = {},
|
|
547
|
-
overwrite=body["overwrite"],
|
|
548
|
-
skip_existing=body["skip_existing"],
|
|
549
|
-
request_json=tempreqname,
|
|
550
|
-
)
|
|
551
|
-
|
|
552
|
-
start_time = time.time()
|
|
553
|
-
status = None
|
|
554
|
-
client.logger.info(f"Tracking data generation job {task_id['task_id']}...")
|
|
555
|
-
while True:
|
|
556
|
-
try:
|
|
557
|
-
taskid = task_id['task_id']
|
|
558
|
-
trackinfo = await client.mass_stats.track_job([taskid])
|
|
559
|
-
status = trackinfo[taskid]['status']
|
|
560
|
-
if status == 'Completed':
|
|
561
|
-
client.logger.info('Data generated successfully!')
|
|
562
|
-
break
|
|
563
|
-
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
564
|
-
raise RuntimeError(f"Job {taskid} failed with status: {status}")
|
|
565
|
-
else:
|
|
566
|
-
elapsed_time = time.time() - start_time
|
|
567
|
-
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s")
|
|
568
|
-
await asyncio.sleep(poll_interval)
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
except KeyboardInterrupt:
|
|
572
|
-
client.logger.info(f"\nInterrupted! Job {taskid} is still running in the background.")
|
|
573
|
-
raise
|
|
574
|
-
except Exception as e:
|
|
575
|
-
client.logger.info(f"\nError tracking job: {e}")
|
|
576
|
-
raise
|
|
577
|
-
|
|
578
|
-
os.unlink(tempreqname)
|
|
579
|
-
|
|
580
|
-
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"], max_file_size_mb=max_file_size_mb)
|
|
581
|
-
combine_task_id = combine_result.get("task_id")
|
|
582
|
-
|
|
583
|
-
combine_start_time = time.time()
|
|
584
|
-
client.logger.info(f"Tracking file generation job {combine_task_id}...")
|
|
585
|
-
while True:
|
|
586
|
-
try:
|
|
587
|
-
trackinfo = await client.mass_stats.track_job([combine_task_id])
|
|
588
|
-
if body["output"] == "netcdf":
|
|
589
|
-
download_file_name = trackinfo[combine_task_id]['folder'] + '.nc'
|
|
590
|
-
elif body["output"] == "geotiff":
|
|
591
|
-
download_file_name = trackinfo[combine_task_id]['folder'] + '.tif'
|
|
592
|
-
bucket = trackinfo[combine_task_id]['bucket']
|
|
593
|
-
combine_status = trackinfo[combine_task_id]['status']
|
|
594
|
-
if combine_status == 'Completed':
|
|
595
|
-
client.logger.info('File/s generated successfully!')
|
|
596
|
-
break
|
|
597
|
-
elif combine_status in ['Failed', 'Cancelled', 'Error']:
|
|
598
|
-
raise RuntimeError(f"File generation job {combine_task_id} failed with status: {combine_status}")
|
|
599
|
-
else:
|
|
600
|
-
elapsed_time = time.time() - combine_start_time
|
|
601
|
-
client.logger.info(f"File generation job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s")
|
|
602
|
-
time.sleep(poll_interval)
|
|
603
|
-
except KeyboardInterrupt:
|
|
604
|
-
client.logger.info(f"\nInterrupted! File generation job {combine_task_id} is still running in the background.")
|
|
605
|
-
raise
|
|
606
|
-
except Exception as e:
|
|
607
|
-
client.logger.info(f"\nError tracking file generation job: {e}")
|
|
608
|
-
raise
|
|
609
|
-
|
|
610
|
-
if download_path:
|
|
611
|
-
await client.mass_stats.download_file(
|
|
612
|
-
job_name=body["name"],
|
|
613
|
-
bucket=bucket,
|
|
614
|
-
file_type='processed',
|
|
615
|
-
folder='file-gen',
|
|
616
|
-
page_size=100,
|
|
617
|
-
output_path=download_path,
|
|
618
|
-
)
|
|
619
|
-
else:
|
|
620
|
-
path = f"{body['name']}/outputs/merged/{download_file_name}"
|
|
621
|
-
client.logger.info(f"Dataset file/s is available at {path}")
|
|
622
|
-
|
|
623
|
-
return {"generation_task_id": task_id, "combine_task_id": combine_task_id}
|