terrakio-core 0.4.8__py3-none-any.whl → 0.4.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/__init__.py +1 -1
- terrakio_core/accessors.py +800 -328
- terrakio_core/async_client.py +6 -2
- terrakio_core/convenience_functions/create_dataset_file.py +132 -0
- terrakio_core/convenience_functions/geoquries.py +102 -0
- terrakio_core/convenience_functions/{convenience_functions.py → zonal_stats.py} +166 -263
- terrakio_core/endpoints/mass_stats.py +42 -147
- terrakio_core/sync_client.py +0 -340
- terrakio_core-0.4.93.dist-info/METADATA +31 -0
- {terrakio_core-0.4.8.dist-info → terrakio_core-0.4.93.dist-info}/RECORD +11 -10
- {terrakio_core-0.4.8.dist-info → terrakio_core-0.4.93.dist-info}/WHEEL +1 -2
- terrakio_core-0.4.8.dist-info/METADATA +0 -47
- terrakio_core-0.4.8.dist-info/top_level.txt +0 -1
terrakio_core/async_client.py
CHANGED
|
@@ -34,11 +34,15 @@ class AsyncClient(BaseClient):
|
|
|
34
34
|
|
|
35
35
|
async def _terrakio_request(self, method: str, endpoint: str, **kwargs):
|
|
36
36
|
if self.session is None:
|
|
37
|
+
# To this:
|
|
37
38
|
headers = {
|
|
38
|
-
'
|
|
39
|
-
'x-api-key': self.key,
|
|
39
|
+
'x-api-key': self.key,
|
|
40
40
|
'Authorization': self.token
|
|
41
41
|
}
|
|
42
|
+
|
|
43
|
+
# Only add Content-Type if it's a JSON request
|
|
44
|
+
if 'json' in kwargs:
|
|
45
|
+
headers['Content-Type'] = 'application/json'
|
|
42
46
|
clean_headers = {k: v for k, v in headers.items() if v is not None}
|
|
43
47
|
async with aiohttp.ClientSession(headers=clean_headers, timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
|
|
44
48
|
return await self._make_request_with_retry(session, method, endpoint, **kwargs)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from ..helper.tiles import tiles
|
|
8
|
+
|
|
9
|
+
async def create_dataset_file(
|
|
10
|
+
client,
|
|
11
|
+
aoi: str,
|
|
12
|
+
expression: str,
|
|
13
|
+
output: str,
|
|
14
|
+
download_path: str,
|
|
15
|
+
in_crs: str = "epsg:4326",
|
|
16
|
+
to_crs: str = "epsg:4326",
|
|
17
|
+
res: float = 0.0001,
|
|
18
|
+
region: str = None,
|
|
19
|
+
overwrite: bool = False,
|
|
20
|
+
skip_existing: bool = False,
|
|
21
|
+
non_interactive: bool = True,
|
|
22
|
+
name: str | None = None,
|
|
23
|
+
poll_interval: int = 30,
|
|
24
|
+
max_file_size_mb: int = 5120,
|
|
25
|
+
tile_size: int = 1024,
|
|
26
|
+
mask: bool = True
|
|
27
|
+
) -> dict:
|
|
28
|
+
|
|
29
|
+
if not name:
|
|
30
|
+
name = f"file-gen-{uuid.uuid4().hex[:8]}"
|
|
31
|
+
|
|
32
|
+
body, reqs, groups = tiles(
|
|
33
|
+
name = name,
|
|
34
|
+
aoi = aoi,
|
|
35
|
+
expression = expression,
|
|
36
|
+
output = output,
|
|
37
|
+
tile_size = tile_size,
|
|
38
|
+
crs = in_crs,
|
|
39
|
+
res = res,
|
|
40
|
+
region = region,
|
|
41
|
+
to_crs = to_crs,
|
|
42
|
+
mask = mask,
|
|
43
|
+
overwrite = overwrite,
|
|
44
|
+
skip_existing = skip_existing,
|
|
45
|
+
non_interactive = non_interactive
|
|
46
|
+
)
|
|
47
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
|
|
48
|
+
tempreq.write(reqs)
|
|
49
|
+
tempreqname = tempreq.name
|
|
50
|
+
|
|
51
|
+
task_id = await client.mass_stats.execute_job(
|
|
52
|
+
name=body["name"],
|
|
53
|
+
region=body["region"],
|
|
54
|
+
output=body["output"],
|
|
55
|
+
config = {},
|
|
56
|
+
overwrite=body["overwrite"],
|
|
57
|
+
skip_existing=body["skip_existing"],
|
|
58
|
+
request_json=tempreqname,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
start_time = time.time()
|
|
62
|
+
status = None
|
|
63
|
+
client.logger.info(f"Tracking data generation job {task_id['task_id']}...")
|
|
64
|
+
while True:
|
|
65
|
+
try:
|
|
66
|
+
taskid = task_id['task_id']
|
|
67
|
+
trackinfo = await client.mass_stats.track_job([taskid])
|
|
68
|
+
status = trackinfo[taskid]['status']
|
|
69
|
+
if status == 'Completed':
|
|
70
|
+
client.logger.info('Data generated successfully!')
|
|
71
|
+
break
|
|
72
|
+
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
73
|
+
raise RuntimeError(f"Job {taskid} failed with status: {status}")
|
|
74
|
+
else:
|
|
75
|
+
elapsed_time = time.time() - start_time
|
|
76
|
+
client.logger.info(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s")
|
|
77
|
+
await asyncio.sleep(poll_interval)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
except KeyboardInterrupt:
|
|
81
|
+
client.logger.info(f"\nInterrupted! Job {taskid} is still running in the background.")
|
|
82
|
+
raise
|
|
83
|
+
except Exception as e:
|
|
84
|
+
client.logger.info(f"\nError tracking job: {e}")
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
os.unlink(tempreqname)
|
|
88
|
+
|
|
89
|
+
combine_result = await client.mass_stats.combine_tiles(body["name"], body["overwrite"], body["output"], max_file_size_mb=max_file_size_mb)
|
|
90
|
+
combine_task_id = combine_result.get("task_id")
|
|
91
|
+
|
|
92
|
+
combine_start_time = time.time()
|
|
93
|
+
client.logger.info(f"Tracking file generation job {combine_task_id}...")
|
|
94
|
+
while True:
|
|
95
|
+
try:
|
|
96
|
+
trackinfo = await client.mass_stats.track_job([combine_task_id])
|
|
97
|
+
if body["output"] == "netcdf":
|
|
98
|
+
download_file_name = trackinfo[combine_task_id]['folder'] + '.nc'
|
|
99
|
+
elif body["output"] == "geotiff":
|
|
100
|
+
download_file_name = trackinfo[combine_task_id]['folder'] + '.tif'
|
|
101
|
+
bucket = trackinfo[combine_task_id]['bucket']
|
|
102
|
+
combine_status = trackinfo[combine_task_id]['status']
|
|
103
|
+
if combine_status == 'Completed':
|
|
104
|
+
client.logger.info('File/s generated successfully!')
|
|
105
|
+
break
|
|
106
|
+
elif combine_status in ['Failed', 'Cancelled', 'Error']:
|
|
107
|
+
raise RuntimeError(f"File generation job {combine_task_id} failed with status: {combine_status}")
|
|
108
|
+
else:
|
|
109
|
+
elapsed_time = time.time() - combine_start_time
|
|
110
|
+
client.logger.info(f"File generation job status: {combine_status} - Elapsed time: {elapsed_time:.1f}s")
|
|
111
|
+
time.sleep(poll_interval)
|
|
112
|
+
except KeyboardInterrupt:
|
|
113
|
+
client.logger.info(f"\nInterrupted! File generation job {combine_task_id} is still running in the background.")
|
|
114
|
+
raise
|
|
115
|
+
except Exception as e:
|
|
116
|
+
client.logger.info(f"\nError tracking file generation job: {e}")
|
|
117
|
+
raise
|
|
118
|
+
|
|
119
|
+
if download_path:
|
|
120
|
+
await client.mass_stats.download_file(
|
|
121
|
+
job_name=body["name"],
|
|
122
|
+
bucket=bucket,
|
|
123
|
+
file_type='processed',
|
|
124
|
+
folder='file-gen',
|
|
125
|
+
page_size=100,
|
|
126
|
+
output_path=download_path,
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
path = f"{body['name']}/outputs/merged/{download_file_name}"
|
|
130
|
+
client.logger.info(f"Dataset file/s is available at {path}")
|
|
131
|
+
|
|
132
|
+
return {"generation_task_id": task_id, "combine_task_id": combine_task_id}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
import geopandas as gpd
|
|
4
|
+
from shapely.geometry import shape
|
|
5
|
+
|
|
6
|
+
from ..exceptions import APIError
|
|
7
|
+
from ..helper.bounded_taskgroup import BoundedTaskGroup
|
|
8
|
+
|
|
9
|
+
async def request_geoquery_list(
|
|
10
|
+
client,
|
|
11
|
+
quries: list[dict],
|
|
12
|
+
conc: int = 20,
|
|
13
|
+
):
|
|
14
|
+
"""
|
|
15
|
+
Execute multiple geo queries.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
client: The Terrakio client instance
|
|
19
|
+
quries: List of dictionaries containing query parameters
|
|
20
|
+
conc: The concurrency level for the requests
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
List of query results
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If the queries list is empty
|
|
27
|
+
"""
|
|
28
|
+
if not quries:
|
|
29
|
+
raise ValueError("Queries list cannot be empty")
|
|
30
|
+
if conc > 100:
|
|
31
|
+
raise ValueError("Concurrency (conc) is too high. Please set conc to 100 or less.")
|
|
32
|
+
|
|
33
|
+
for i, query in enumerate(quries):
|
|
34
|
+
if 'expr' not in query:
|
|
35
|
+
raise ValueError(f"Query at index {i} is missing the required 'expr' key")
|
|
36
|
+
if 'feature' not in query:
|
|
37
|
+
raise ValueError(f"Query at index {i} is missing the required 'feature' key")
|
|
38
|
+
if 'in_crs' not in query:
|
|
39
|
+
raise ValueError(f"Query at index {i} is missing the required 'in_crs' key")
|
|
40
|
+
|
|
41
|
+
completed_count = 0
|
|
42
|
+
lock = asyncio.Lock()
|
|
43
|
+
async def single_geo_query(query):
|
|
44
|
+
"""
|
|
45
|
+
Execute multiple geo queries concurrently.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
quries: List of dictionaries containing query parameters
|
|
49
|
+
"""
|
|
50
|
+
total_number_of_requests = len(quries)
|
|
51
|
+
nonlocal completed_count
|
|
52
|
+
try:
|
|
53
|
+
result = await client.geoquery(**query)
|
|
54
|
+
if isinstance(result, dict) and result.get("error"):
|
|
55
|
+
error_msg = f"Request failed: {result.get('error_message', 'Unknown error')}"
|
|
56
|
+
if result.get('status_code'):
|
|
57
|
+
error_msg = f"Request failed with status {result['status_code']}: {result.get('error_message', 'Unknown error')}"
|
|
58
|
+
raise APIError(error_msg)
|
|
59
|
+
if isinstance(result, list):
|
|
60
|
+
result = result[0]
|
|
61
|
+
timestamp_number = result['request_count']
|
|
62
|
+
return timestamp_number
|
|
63
|
+
if not isinstance(result, xr.Dataset):
|
|
64
|
+
raise ValueError(f"Expected xarray Dataset, got {type(result)}")
|
|
65
|
+
|
|
66
|
+
async with lock:
|
|
67
|
+
completed_count += 1
|
|
68
|
+
if completed_count % max(1, total_number_of_requests // 10) == 0:
|
|
69
|
+
client.logger.info(f"Progress: {completed_count}/{total_number_of_requests} requests processed")
|
|
70
|
+
return result
|
|
71
|
+
except Exception as e:
|
|
72
|
+
async with lock:
|
|
73
|
+
completed_count += 1
|
|
74
|
+
raise
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
async with BoundedTaskGroup(max_concurrency=conc) as tg:
|
|
78
|
+
tasks = [tg.create_task(single_geo_query(quries[idx])) for idx in range(len(quries))]
|
|
79
|
+
all_results = [task.result() for task in tasks]
|
|
80
|
+
|
|
81
|
+
except* Exception as eg:
|
|
82
|
+
for e in eg.exceptions:
|
|
83
|
+
if hasattr(e, 'response'):
|
|
84
|
+
raise APIError(f"API request failed: {e.response.text}")
|
|
85
|
+
raise
|
|
86
|
+
client.logger.info("All requests completed!")
|
|
87
|
+
|
|
88
|
+
if not all_results:
|
|
89
|
+
raise ValueError("No valid results were returned for any geometry")
|
|
90
|
+
if isinstance(all_results, list) and type(all_results[0]) == int:
|
|
91
|
+
return sum(all_results)/len(all_results)
|
|
92
|
+
else:
|
|
93
|
+
geometries = []
|
|
94
|
+
for query in quries:
|
|
95
|
+
feature = query['feature']
|
|
96
|
+
geometry = shape(feature['geometry'])
|
|
97
|
+
geometries.append(geometry)
|
|
98
|
+
result_gdf = gpd.GeoDataFrame({
|
|
99
|
+
'geometry': geometries,
|
|
100
|
+
'dataset': all_results
|
|
101
|
+
})
|
|
102
|
+
return result_gdf
|