terrakio-core 0.4.97__tar.gz → 0.4.98.1b1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/PKG-INFO +2 -1
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/pyproject.toml +2 -1
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/async_client.py +26 -169
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/config.py +3 -44
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/convenience_functions/zonal_stats.py +86 -33
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/endpoints/auth.py +96 -47
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/endpoints/dataset_management.py +120 -54
- terrakio_core-0.4.98.1b1/terrakio_core/endpoints/group_management.py +421 -0
- terrakio_core-0.4.98.1b1/terrakio_core/endpoints/mass_stats.py +835 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/endpoints/model_management.py +213 -109
- terrakio_core-0.4.98.1b1/terrakio_core/endpoints/user_management.py +216 -0
- terrakio_core-0.4.98.1b1/terrakio_core/exceptions.py +390 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/sync_client.py +9 -124
- terrakio_core-0.4.97/terrakio_core/endpoints/group_management.py +0 -228
- terrakio_core-0.4.97/terrakio_core/endpoints/mass_stats.py +0 -712
- terrakio_core-0.4.97/terrakio_core/endpoints/user_management.py +0 -131
- terrakio_core-0.4.97/terrakio_core/exceptions.py +0 -20
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/.gitignore +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/README.md +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/accessors.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/client.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/convenience_functions/create_dataset_file.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/convenience_functions/geoquries.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/endpoints/space_management.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/helper/bounded_taskgroup.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/helper/decorators.py +0 -0
- {terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/helper/tiles.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.98.1b1
|
|
4
4
|
Summary: Core package for the terrakio-python-api
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: aiofiles>=24.1.0
|
|
@@ -14,6 +14,7 @@ Requires-Dist: onnxruntime>=1.22.1
|
|
|
14
14
|
Requires-Dist: psutil>=7.0.0
|
|
15
15
|
Requires-Dist: scipy>=1.16.1
|
|
16
16
|
Requires-Dist: shapely>=2.1.1
|
|
17
|
+
Requires-Dist: typer>=0.19.2
|
|
17
18
|
Requires-Dist: xarray>=2025.7.1
|
|
18
19
|
Provides-Extra: ml
|
|
19
20
|
Requires-Dist: scikit-learn>=1.7.1; extra == 'ml'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "terrakio-core"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.98.1b1"
|
|
4
4
|
description = "Core package for the terrakio-python-api"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|
|
@@ -20,6 +20,7 @@ dependencies = [
|
|
|
20
20
|
"shapely>=2.1.1",
|
|
21
21
|
"xarray>=2025.7.1",
|
|
22
22
|
"nest-asyncio>=1.6.0",
|
|
23
|
+
"typer>=0.19.2",
|
|
23
24
|
]
|
|
24
25
|
|
|
25
26
|
[project.optional-dependencies]
|
|
@@ -9,7 +9,7 @@ from geopandas import GeoDataFrame
|
|
|
9
9
|
from shapely.geometry.base import BaseGeometry as ShapelyGeometry
|
|
10
10
|
from shapely.geometry import mapping
|
|
11
11
|
from .client import BaseClient
|
|
12
|
-
from .exceptions import APIError
|
|
12
|
+
from .exceptions import APIError, NetworkError
|
|
13
13
|
from .endpoints.dataset_management import DatasetManagement
|
|
14
14
|
from .endpoints.user_management import UserManagement
|
|
15
15
|
from .endpoints.mass_stats import MassStats
|
|
@@ -36,13 +36,10 @@ class AsyncClient(BaseClient):
|
|
|
36
36
|
|
|
37
37
|
async def _terrakio_request(self, method: str, endpoint: str, **kwargs):
|
|
38
38
|
if self.session is None:
|
|
39
|
-
# To this:
|
|
40
39
|
headers = {
|
|
41
40
|
'x-api-key': self.key,
|
|
42
41
|
'Authorization': self.token
|
|
43
42
|
}
|
|
44
|
-
|
|
45
|
-
# Only add Content-Type if it's a JSON request
|
|
46
43
|
if 'json' in kwargs:
|
|
47
44
|
headers['Content-Type'] = 'application/json'
|
|
48
45
|
clean_headers = {k: v for k, v in headers.items() if v is not None}
|
|
@@ -57,20 +54,17 @@ class AsyncClient(BaseClient):
|
|
|
57
54
|
for attempt in range(self.retry + 1):
|
|
58
55
|
try:
|
|
59
56
|
async with session.request(method, url, **kwargs) as response:
|
|
57
|
+
content = await response.text()
|
|
58
|
+
|
|
60
59
|
if not response.ok and self._should_retry(response.status, attempt):
|
|
61
60
|
self.logger.info(f"Request failed (attempt {attempt+1}/{self.retry+1}): {response.status}. Retrying...")
|
|
62
61
|
continue
|
|
63
|
-
if
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
except:
|
|
70
|
-
pass
|
|
71
|
-
raise APIError(error_msg, status_code=response.status)
|
|
72
|
-
return await self._parse_response(response)
|
|
73
|
-
|
|
62
|
+
if response.ok:
|
|
63
|
+
data = await self._parse_response(response)
|
|
64
|
+
return data, response.status
|
|
65
|
+
else:
|
|
66
|
+
error_data = await response.json()
|
|
67
|
+
return error_data, response.status
|
|
74
68
|
except aiohttp.ClientError as e:
|
|
75
69
|
last_exception = e
|
|
76
70
|
if attempt < self.retry:
|
|
@@ -78,8 +72,8 @@ class AsyncClient(BaseClient):
|
|
|
78
72
|
continue
|
|
79
73
|
else:
|
|
80
74
|
break
|
|
81
|
-
|
|
82
|
-
raise
|
|
75
|
+
|
|
76
|
+
raise NetworkError(f"Network failure after {self.retry+1} attempts: {last_exception}")
|
|
83
77
|
|
|
84
78
|
def _should_retry(self, status_code: int, attempt: int) -> bool:
|
|
85
79
|
"""Determine if the request should be retried based on status code."""
|
|
@@ -193,158 +187,21 @@ class AsyncClient(BaseClient):
|
|
|
193
187
|
|
|
194
188
|
return result
|
|
195
189
|
|
|
196
|
-
async def zonal_stats(
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
Args:
|
|
212
|
-
gdf (GeoDataFrame): GeoDataFrame containing geometries
|
|
213
|
-
expr (str): Terrakio expression to evaluate, can include spatial aggregations
|
|
214
|
-
conc (int): Number of concurrent requests to make
|
|
215
|
-
in_crs (str): Input coordinate reference system
|
|
216
|
-
out_crs (str): Output coordinate reference system
|
|
217
|
-
resolution (int): Resolution parameter
|
|
218
|
-
geom_fix (bool): Whether to fix the geometry (default False)
|
|
219
|
-
mass_stats (bool): Whether to use mass stats for processing (default False)
|
|
220
|
-
id_column (Optional[str]): Name of the ID column to use (default None)
|
|
221
|
-
|
|
222
|
-
Returns:
|
|
223
|
-
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
224
|
-
|
|
225
|
-
Raises:
|
|
226
|
-
ValueError: If concurrency is too high or if data exceeds memory limit without streaming
|
|
227
|
-
APIError: If the API request fails
|
|
228
|
-
"""
|
|
229
|
-
# the sync client didn't pass the self here, so the client is now async
|
|
230
|
-
return await _zonal_stats(
|
|
231
|
-
client=self,
|
|
232
|
-
gdf=gdf,
|
|
233
|
-
expr=expr,
|
|
234
|
-
conc=conc,
|
|
235
|
-
in_crs=in_crs,
|
|
236
|
-
out_crs=out_crs,
|
|
237
|
-
resolution=resolution,
|
|
238
|
-
geom_fix=geom_fix,
|
|
239
|
-
mass_stats=mass_stats,
|
|
240
|
-
id_column=id_column,
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
async def create_dataset_file(
|
|
244
|
-
self,
|
|
245
|
-
name: str,
|
|
246
|
-
aoi: str,
|
|
247
|
-
expression: str,
|
|
248
|
-
output: str,
|
|
249
|
-
in_crs: str = "epsg:4326",
|
|
250
|
-
res: float = 0.0001,
|
|
251
|
-
region: str = "aus",
|
|
252
|
-
to_crs: str = "epsg:4326",
|
|
253
|
-
overwrite: bool = True,
|
|
254
|
-
skip_existing: bool = False,
|
|
255
|
-
non_interactive: bool = True,
|
|
256
|
-
poll_interval: int = 30,
|
|
257
|
-
download_path: str = "/home/user/Downloads",
|
|
258
|
-
mask = True,
|
|
259
|
-
max_file_size_mb: int = 5120, # Default to 5GB
|
|
260
|
-
tile_size: int = 1024,
|
|
261
|
-
) -> dict:
|
|
262
|
-
"""
|
|
263
|
-
Create a dataset file using mass stats operations.
|
|
264
|
-
|
|
265
|
-
Args:
|
|
266
|
-
aoi (str): Area of interest
|
|
267
|
-
expression (str): Terrakio expression to evaluate
|
|
268
|
-
output (str): Output format
|
|
269
|
-
in_crs (str): Input coordinate reference system (default "epsg:4326")
|
|
270
|
-
res (float): Resolution (default 0.0001)
|
|
271
|
-
region (str): Region (default "aus")
|
|
272
|
-
to_crs (str): Target coordinate reference system (default "epsg:4326")
|
|
273
|
-
overwrite (bool): Whether to overwrite existing files (default True)
|
|
274
|
-
skip_existing (bool): Whether to skip existing files (default False)
|
|
275
|
-
non_interactive (bool): Whether to run non-interactively (default True)
|
|
276
|
-
poll_interval (int): Polling interval in seconds (default 30)
|
|
277
|
-
download_path (str): Download path (default "/home/user/Downloads")
|
|
278
|
-
|
|
279
|
-
Returns:
|
|
280
|
-
dict: Dictionary containing generation_task_id and combine_task_id
|
|
281
|
-
|
|
282
|
-
Raises:
|
|
283
|
-
ConfigurationError: If mass stats client is not properly configured
|
|
284
|
-
RuntimeError: If job fails
|
|
285
|
-
"""
|
|
286
|
-
return await _create_dataset_file(
|
|
287
|
-
client=self,
|
|
288
|
-
aoi=aoi,
|
|
289
|
-
expression=expression,
|
|
290
|
-
output=output,
|
|
291
|
-
in_crs=in_crs,
|
|
292
|
-
res=res,
|
|
293
|
-
region=region,
|
|
294
|
-
to_crs=to_crs,
|
|
295
|
-
overwrite=overwrite,
|
|
296
|
-
skip_existing=skip_existing,
|
|
297
|
-
non_interactive=non_interactive,
|
|
298
|
-
poll_interval=poll_interval,
|
|
299
|
-
download_path=download_path,
|
|
300
|
-
name=name,
|
|
301
|
-
mask=mask,
|
|
302
|
-
max_file_size_mb=max_file_size_mb,
|
|
303
|
-
tile_size=tile_size
|
|
304
|
-
)
|
|
305
|
-
|
|
306
|
-
async def geo_queries(
|
|
307
|
-
self,
|
|
308
|
-
queries: list[dict],
|
|
309
|
-
conc: int = 20,
|
|
310
|
-
):
|
|
311
|
-
"""
|
|
312
|
-
Execute multiple geo queries concurrently.
|
|
313
|
-
|
|
314
|
-
Args:
|
|
315
|
-
queries (list[dict]): List of dictionaries containing query parameters.
|
|
316
|
-
Each query must have 'expr', 'feature', and 'in_crs' keys.
|
|
317
|
-
conc (int): Number of concurrent requests to make (default 20, max 100)
|
|
318
|
-
|
|
319
|
-
Returns:
|
|
320
|
-
Union[float, geopandas.GeoDataFrame]:
|
|
321
|
-
- float: Average of all results if results are integers
|
|
322
|
-
- GeoDataFrame: GeoDataFrame with geometry and dataset columns if results are xarray datasets
|
|
323
|
-
|
|
324
|
-
Raises:
|
|
325
|
-
ValueError: If queries list is empty, concurrency is too high, or queries are malformed
|
|
326
|
-
APIError: If the API request fails
|
|
327
|
-
|
|
328
|
-
Example:
|
|
329
|
-
queries = [
|
|
330
|
-
{
|
|
331
|
-
'expr': 'WCF.wcf',
|
|
332
|
-
'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
|
|
333
|
-
'in_crs': 'epsg:4326'
|
|
334
|
-
},
|
|
335
|
-
{
|
|
336
|
-
'expr': 'NDVI.ndvi',
|
|
337
|
-
'feature': {'type': 'Feature', 'geometry': {...}, 'properties': {}},
|
|
338
|
-
'in_crs': 'epsg:4326'
|
|
339
|
-
}
|
|
340
|
-
]
|
|
341
|
-
result = await client.geo_queries(queries)
|
|
342
|
-
"""
|
|
343
|
-
return await _request_geoquery_list(
|
|
344
|
-
client=self,
|
|
345
|
-
quries=queries, # Note: keeping original parameter name for compatibility
|
|
346
|
-
conc=conc,
|
|
347
|
-
)
|
|
190
|
+
async def zonal_stats(self, *args, **kwargs):
|
|
191
|
+
"""Proxy to convenience zonal_stats with full argument passthrough."""
|
|
192
|
+
return await _zonal_stats(self, *args, **kwargs)
|
|
193
|
+
|
|
194
|
+
async def create_dataset_file(self, *args, **kwargs) -> dict:
|
|
195
|
+
"""Proxy to convenience create_dataset_file with full argument passthrough."""
|
|
196
|
+
kwargs.setdefault('download_path', "/home/user/Downloads")
|
|
197
|
+
kwargs.setdefault('region', "aus")
|
|
198
|
+
return await _create_dataset_file(self, *args, **kwargs)
|
|
199
|
+
|
|
200
|
+
async def geo_queries(self, *args, **kwargs):
|
|
201
|
+
"""Proxy to convenience request_geoquery_list with full argument passthrough."""
|
|
202
|
+
if 'queries' in kwargs:
|
|
203
|
+
kwargs['quries'] = kwargs.pop('queries')
|
|
204
|
+
return await _request_geoquery_list(self, *args, **kwargs)
|
|
348
205
|
|
|
349
206
|
async def __aenter__(self):
|
|
350
207
|
if self._session is None:
|
|
@@ -7,7 +7,7 @@ from .exceptions import ConfigurationError
|
|
|
7
7
|
|
|
8
8
|
# Default configuration file locations
|
|
9
9
|
DEFAULT_CONFIG_FILE = os.path.join(os.environ.get("HOME", ""), ".tkio_config.json")
|
|
10
|
-
DEFAULT_API_URL = "https://
|
|
10
|
+
DEFAULT_API_URL = "https://dev-au.terrak.io"
|
|
11
11
|
|
|
12
12
|
def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Logger = None) -> Dict[str, Any]:
|
|
13
13
|
"""
|
|
@@ -27,14 +27,8 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
|
|
|
27
27
|
file if one doesn't exist and returns appropriate status flags.
|
|
28
28
|
"""
|
|
29
29
|
config_path = Path(os.path.expanduser(config_file))
|
|
30
|
-
|
|
31
|
-
# that we need to login before using any of the functions
|
|
32
|
-
# Check if config file exists
|
|
30
|
+
|
|
33
31
|
if not config_path.exists():
|
|
34
|
-
# Create an empty config file
|
|
35
|
-
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
-
with open(config_path, 'w') as f:
|
|
37
|
-
json.dump({}, f)
|
|
38
32
|
logger.info("No API key found. Please provide an API key to use this client.")
|
|
39
33
|
return {
|
|
40
34
|
'url': DEFAULT_API_URL,
|
|
@@ -45,12 +39,9 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
|
|
|
45
39
|
}
|
|
46
40
|
|
|
47
41
|
try:
|
|
48
|
-
# Read the config file
|
|
49
42
|
with open(config_path, 'r') as f:
|
|
50
43
|
config_data = json.load(f)
|
|
51
44
|
|
|
52
|
-
# Read the config file data
|
|
53
|
-
# Check if config has an API key
|
|
54
45
|
if not config_data or 'TERRAKIO_API_KEY' not in config_data or not config_data.get('TERRAKIO_API_KEY'):
|
|
55
46
|
logger.info("No API key found. Please provide an API key to use this client.")
|
|
56
47
|
return {
|
|
@@ -61,11 +52,8 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
|
|
|
61
52
|
'token': config_data.get('PERSONAL_TOKEN')
|
|
62
53
|
}
|
|
63
54
|
logger.info(f"Currently logged in as: {config_data.get('EMAIL')}")
|
|
64
|
-
# this meanb that we have already logged in to the tkio account
|
|
65
55
|
|
|
66
|
-
# Convert the JSON config to our expected format
|
|
67
56
|
config = {
|
|
68
|
-
# Always use the default URL, not from config file
|
|
69
57
|
'url': DEFAULT_API_URL,
|
|
70
58
|
'key': config_data.get('TERRAKIO_API_KEY'),
|
|
71
59
|
'is_logged_in': True,
|
|
@@ -84,33 +72,4 @@ def read_config_file(config_file: str = DEFAULT_CONFIG_FILE, logger: logging.Log
|
|
|
84
72
|
'is_logged_in': False,
|
|
85
73
|
'user_email': None,
|
|
86
74
|
'token': None
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
def create_default_config(email: str, api_key: str, config_file: str = DEFAULT_CONFIG_FILE) -> None:
|
|
90
|
-
"""
|
|
91
|
-
Create a default configuration file in JSON format.
|
|
92
|
-
|
|
93
|
-
Args:
|
|
94
|
-
email: User email
|
|
95
|
-
api_key: Terrakio API key
|
|
96
|
-
config_file: Path to configuration file
|
|
97
|
-
|
|
98
|
-
Raises:
|
|
99
|
-
ConfigurationError: If the configuration file can't be created
|
|
100
|
-
"""
|
|
101
|
-
config_path = Path(os.path.expanduser(config_file))
|
|
102
|
-
|
|
103
|
-
# Ensure directory exists
|
|
104
|
-
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
105
|
-
|
|
106
|
-
try:
|
|
107
|
-
config_data = {
|
|
108
|
-
"EMAIL": email,
|
|
109
|
-
"TERRAKIO_API_KEY": api_key
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
with open(config_path, 'w') as f:
|
|
113
|
-
json.dump(config_data, f, indent=2)
|
|
114
|
-
|
|
115
|
-
except Exception as e:
|
|
116
|
-
raise ConfigurationError(f"Failed to create configuration file: {e}")
|
|
75
|
+
}
|
{terrakio_core-0.4.97 → terrakio_core-0.4.98.1b1}/terrakio_core/convenience_functions/zonal_stats.py
RENAMED
|
@@ -9,18 +9,18 @@ from typing import Optional
|
|
|
9
9
|
# Third-party library imports
|
|
10
10
|
import aiohttp
|
|
11
11
|
import geopandas as gpd
|
|
12
|
-
import nest_asyncio
|
|
13
12
|
import pandas as pd
|
|
14
13
|
import pyproj
|
|
15
14
|
import xarray as xr
|
|
16
15
|
from geopandas import GeoDataFrame
|
|
17
16
|
from shapely.geometry import box, mapping, shape
|
|
18
17
|
from shapely.ops import transform
|
|
18
|
+
import threading
|
|
19
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
20
|
|
|
20
21
|
# Local imports
|
|
21
22
|
from .geoquries import request_geoquery_list
|
|
22
23
|
|
|
23
|
-
nest_asyncio.apply()
|
|
24
24
|
class cloud_object(gpd.GeoDataFrame):
|
|
25
25
|
"""
|
|
26
26
|
This class is a class used for cloud
|
|
@@ -36,12 +36,51 @@ class cloud_object(gpd.GeoDataFrame):
|
|
|
36
36
|
self.client = client
|
|
37
37
|
self.job_name = job_name
|
|
38
38
|
|
|
39
|
+
def __repr__(self):
|
|
40
|
+
return (
|
|
41
|
+
f"<CloudZonalStats job_id='{self.job_id}', job_name='{self.job_name}'>\n"
|
|
42
|
+
f"Call .head(n) to fetch a preview GeoDataFrame when the job completes."
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def _repr_html_(self):
|
|
46
|
+
# Jupyter HTML-friendly representation to avoid auto-rendering an empty DataFrame
|
|
47
|
+
return (
|
|
48
|
+
f"<div style='font-family:system-ui,Segoe UI,Helvetica,Arial,sans-serif'>"
|
|
49
|
+
f"<strong>Cloud Zonal Stats</strong><br/>"
|
|
50
|
+
f"job_id: <code>{self.job_id}</code><br/>"
|
|
51
|
+
f"job_name: <code>{self.job_name}</code><br/>"
|
|
52
|
+
f"<em>Use <code>.head(n)</code> to retrieve a preview once the job is completed.</em>"
|
|
53
|
+
f"</div>"
|
|
54
|
+
)
|
|
55
|
+
|
|
39
56
|
def head(self, n = 5):
|
|
40
57
|
"""
|
|
41
58
|
Returns the first n files stored in the cloud bucket.
|
|
42
59
|
"""
|
|
43
|
-
|
|
60
|
+
# Detect if we're inside an existing event loop (e.g., Jupyter)
|
|
61
|
+
in_running_loop = False
|
|
62
|
+
try:
|
|
63
|
+
asyncio.get_running_loop()
|
|
64
|
+
in_running_loop = True
|
|
65
|
+
except RuntimeError:
|
|
66
|
+
in_running_loop = False
|
|
44
67
|
|
|
68
|
+
if in_running_loop:
|
|
69
|
+
# Run the async function in a separate thread with its own loop
|
|
70
|
+
def run_async_in_thread():
|
|
71
|
+
new_loop = asyncio.new_event_loop()
|
|
72
|
+
try:
|
|
73
|
+
return new_loop.run_until_complete(self._head_async(n))
|
|
74
|
+
finally:
|
|
75
|
+
new_loop.close()
|
|
76
|
+
|
|
77
|
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
78
|
+
future = executor.submit(run_async_in_thread)
|
|
79
|
+
return future.result()
|
|
80
|
+
else:
|
|
81
|
+
# No running loop - safe to use asyncio.run
|
|
82
|
+
return asyncio.run(self._head_async(n))
|
|
83
|
+
|
|
45
84
|
async def _head_async(self, n = 5):
|
|
46
85
|
"""
|
|
47
86
|
Returns the first n files stored in the cloud bucket.
|
|
@@ -120,12 +159,54 @@ class cloud_object(gpd.GeoDataFrame):
|
|
|
120
159
|
})
|
|
121
160
|
|
|
122
161
|
self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
|
|
162
|
+
|
|
163
|
+
# Derive id values from json metadata (prefer 'file', fallback to 'group')
|
|
164
|
+
id_values = []
|
|
165
|
+
for i in range(min_length):
|
|
166
|
+
entry = json_data[i] if i < len(json_data) else {}
|
|
167
|
+
id_candidate = entry.get('file') or entry.get('group') or ''
|
|
168
|
+
if isinstance(id_candidate, str) and id_candidate.startswith('file_'):
|
|
169
|
+
id_val = id_candidate[len('file_'):]
|
|
170
|
+
elif isinstance(id_candidate, str) and id_candidate.startswith('group_'):
|
|
171
|
+
id_val = id_candidate[len('group_'):]
|
|
172
|
+
else:
|
|
173
|
+
id_val = str(id_candidate) if id_candidate else str(i)
|
|
174
|
+
id_values.append(id_val)
|
|
175
|
+
|
|
176
|
+
# Geometry to id mapping using WKB to avoid precision issues
|
|
177
|
+
geom_to_id = {geometries[i].wkb: id_values[i] for i in range(min_length)}
|
|
178
|
+
|
|
123
179
|
try:
|
|
124
180
|
expanded_gdf = expand_on_variables_and_time(gdf)
|
|
181
|
+
|
|
182
|
+
# Attach id as first index level, geometry second, time third if present
|
|
183
|
+
if hasattr(expanded_gdf.index, 'names') and 'geometry' in expanded_gdf.index.names:
|
|
184
|
+
if isinstance(expanded_gdf.index, pd.MultiIndex):
|
|
185
|
+
geometry_index = expanded_gdf.index.get_level_values('geometry')
|
|
186
|
+
else:
|
|
187
|
+
geometry_index = expanded_gdf.index
|
|
188
|
+
id_col = [geom_to_id.get(geom.wkb) for geom in geometry_index]
|
|
189
|
+
expanded_gdf['id'] = id_col
|
|
190
|
+
expanded_gdf = expanded_gdf.reset_index()
|
|
191
|
+
if 'time' in expanded_gdf.columns:
|
|
192
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
|
|
193
|
+
else:
|
|
194
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
|
|
195
|
+
else:
|
|
196
|
+
# geometry exists as a column
|
|
197
|
+
id_col = [geom_to_id.get(geom.wkb) for geom in expanded_gdf['geometry']]
|
|
198
|
+
expanded_gdf['id'] = id_col
|
|
199
|
+
if 'time' in expanded_gdf.columns:
|
|
200
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
|
|
201
|
+
else:
|
|
202
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
|
|
203
|
+
|
|
125
204
|
return expanded_gdf
|
|
126
205
|
except NameError:
|
|
127
206
|
self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
|
|
128
|
-
|
|
207
|
+
# Set id on raw gdf and index appropriately
|
|
208
|
+
gdf['id'] = id_values
|
|
209
|
+
return gdf.set_index(['id', 'geometry'])
|
|
129
210
|
|
|
130
211
|
else:
|
|
131
212
|
self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
|
|
@@ -513,35 +594,7 @@ async def zonal_stats(
|
|
|
513
594
|
job_name = await client.mass_stats.track_job([mass_stats_id])
|
|
514
595
|
job_name = job_name[mass_stats_id]["name"]
|
|
515
596
|
cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
|
|
516
|
-
|
|
517
|
-
# Attach id column behavior to cloud object via a wrapper method
|
|
518
|
-
async def _head_with_id(n = 5):
|
|
519
|
-
result_gdf = await cloud_files_object._head_async(n)
|
|
520
|
-
if id_column is not None and id_column in gdf.columns:
|
|
521
|
-
geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
|
|
522
|
-
if hasattr(result_gdf.index, 'names') and 'geometry' in result_gdf.index.names:
|
|
523
|
-
if isinstance(result_gdf.index, pd.MultiIndex):
|
|
524
|
-
geometry_index = result_gdf.index.get_level_values('geometry')
|
|
525
|
-
else:
|
|
526
|
-
geometry_index = result_gdf.index
|
|
527
|
-
id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
|
|
528
|
-
result_gdf[id_column] = id_values
|
|
529
|
-
result_gdf = result_gdf.reset_index()
|
|
530
|
-
if 'time' in result_gdf.columns:
|
|
531
|
-
result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
|
|
532
|
-
else:
|
|
533
|
-
result_gdf = result_gdf.set_index([id_column, 'geometry'])
|
|
534
|
-
else:
|
|
535
|
-
id_values = [geometry_to_id.get(geom.wkb) for geom in result_gdf['geometry']]
|
|
536
|
-
result_gdf[id_column] = id_values
|
|
537
|
-
if 'time' in result_gdf.columns:
|
|
538
|
-
result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
|
|
539
|
-
else:
|
|
540
|
-
result_gdf = result_gdf.set_index([id_column, 'geometry'])
|
|
541
|
-
return result_gdf
|
|
542
|
-
|
|
543
|
-
# Monkey-patch a convenience method without modifying original class contract
|
|
544
|
-
cloud_files_object.head_with_id = lambda n=5: asyncio.run(_head_with_id(n))
|
|
597
|
+
|
|
545
598
|
return cloud_files_object
|
|
546
599
|
|
|
547
600
|
quries = []
|