terrakio-core 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of terrakio-core might be problematic. Click here for more details.

terrakio_core/client.py DELETED
@@ -1,829 +0,0 @@
1
- import requests
2
- import xarray as xr
3
- from io import BytesIO
4
- from typing import Dict, Any, Optional, Union
5
- import json
6
- import aiohttp
7
- import asyncio
8
- from shapely.geometry import shape
9
- from shapely.geometry.base import BaseGeometry as ShapelyGeometry
10
- from .exceptions import APIError, ConfigurationError
11
-
12
- class BaseClient:
13
- def __init__(self, url: Optional[str] = None, key: Optional[str] = None,
14
- auth_url: Optional[str] = "https://dev-au.terrak.io",
15
- quiet: bool = False, config_file: Optional[str] = None,
16
- verify: bool = True, timeout: int = 60):
17
- self.quiet = quiet
18
- self.verify = verify
19
- self.timeout = timeout
20
- self.auth_client = None
21
- if auth_url:
22
- from terrakio_core.auth import AuthClient
23
- self.auth_client = AuthClient(
24
- base_url=auth_url,
25
- verify=verify,
26
- timeout=timeout
27
- )
28
- self.url = url
29
- self.key = key
30
- if self.url is None or self.key is None:
31
- from terrakio_core.config import read_config_file, DEFAULT_CONFIG_FILE
32
- if config_file is None:
33
- config_file = DEFAULT_CONFIG_FILE
34
- try:
35
- config = read_config_file(config_file)
36
- if self.url is None:
37
- self.url = config.get('url')
38
- if self.key is None:
39
- self.key = config.get('key')
40
- except Exception as e:
41
- raise ConfigurationError(
42
- f"Failed to read configuration: {e}\n\n"
43
- "To fix this issue:\n"
44
- "1. Create a file at ~/.terrakioapirc with:\n"
45
- "url: https://api.terrak.io\n"
46
- "key: your-api-key\n\n"
47
- "OR\n\n"
48
- "2. Initialize the client with explicit parameters:\n"
49
- "client = terrakio_api.Client(\n"
50
- " url='https://api.terrak.io',\n"
51
- " key='your-api-key'\n"
52
- ")"
53
- )
54
- if not self.url:
55
- raise ConfigurationError("Missing API URL in configuration")
56
- if not self.key:
57
- raise ConfigurationError("Missing API key in configuration")
58
- self.url = self.url.rstrip('/')
59
- if not self.quiet:
60
- print(f"Using Terrakio API at: {self.url}")
61
- self.session = requests.Session()
62
- self.session.headers.update({
63
- 'Content-Type': 'application/json',
64
- 'x-api-key': self.key
65
- })
66
- self.user_management = None
67
- self.dataset_management = None
68
- self.mass_stats = None
69
- self._aiohttp_session = None
70
-
71
- @property
72
- async def aiohttp_session(self):
73
- if self._aiohttp_session is None or self._aiohttp_session.closed:
74
- self._aiohttp_session = aiohttp.ClientSession(
75
- headers={
76
- 'Content-Type': 'application/json',
77
- 'x-api-key': self.key
78
- },
79
- timeout=aiohttp.ClientTimeout(total=self.timeout)
80
- )
81
- return self._aiohttp_session
82
-
83
- async def wcs_async(self, expr: str, feature: Union[Dict[str, Any], ShapelyGeometry],
84
- in_crs: str = "epsg:4326", out_crs: str = "epsg:4326",
85
- output: str = "csv", resolution: int = -1, **kwargs):
86
- """
87
- Asynchronous version of the wcs() method using aiohttp.
88
-
89
- Args:
90
- expr (str): The WCS expression to evaluate
91
- feature (Union[Dict[str, Any], ShapelyGeometry]): The geographic feature
92
- in_crs (str): Input coordinate reference system
93
- out_crs (str): Output coordinate reference system
94
- output (str): Output format ('csv' or 'netcdf')
95
- resolution (int): Resolution parameter
96
- **kwargs: Additional parameters to pass to the WCS request
97
-
98
- Returns:
99
- Union[pd.DataFrame, xr.Dataset, bytes]: The response data in the requested format
100
- """
101
- if hasattr(feature, 'is_valid'):
102
- from shapely.geometry import mapping
103
- feature = {
104
- "type": "Feature",
105
- "geometry": mapping(feature),
106
- "properties": {}
107
- }
108
- self.validate_feature(feature)
109
-
110
- payload = {
111
- "feature": feature,
112
- "in_crs": in_crs,
113
- "out_crs": out_crs,
114
- "output": output,
115
- "resolution": resolution,
116
- "expr": expr,
117
- **kwargs
118
- }
119
-
120
- if not self.quiet:
121
- print(f"Requesting data with expression: {expr}")
122
-
123
- request_url = f"{self.url}/wcs"
124
- print("the payload is ", payload)
125
- print("the request url is ", request_url)
126
-
127
- try:
128
- # Get the shared aiohttp session
129
- session = await self.aiohttp_session
130
- async with session.post(request_url, json=payload, ssl=self.verify) as response:
131
- if not response.ok:
132
- error_msg = f"API request failed: {response.status} {response.reason}"
133
- try:
134
- error_data = await response.json()
135
- if "detail" in error_data:
136
- error_msg += f" - {error_data['detail']}"
137
- except:
138
- pass
139
- raise APIError(error_msg)
140
-
141
- content = await response.read()
142
- print("the content is ", content)
143
-
144
- if output.lower() == "csv":
145
- import pandas as pd
146
- df = pd.read_csv(BytesIO(content))
147
- print("the content is ", df)
148
- return df
149
- elif output.lower() == "netcdf":
150
- return xr.open_dataset(BytesIO(content))
151
- else:
152
- try:
153
- return xr.open_dataset(BytesIO(content))
154
- except ValueError:
155
- import pandas as pd
156
- try:
157
- return pd.read_csv(BytesIO(content))
158
- except:
159
- return content
160
-
161
- except aiohttp.ClientError as e:
162
- print(f"Client error in wcs_async: {str(e)}")
163
- raise APIError(f"Request failed: {str(e)}")
164
- except Exception as e:
165
- print(f"Unexpected error in wcs_async: {str(e)}")
166
- raise
167
-
168
- async def close_async(self):
169
- """Close the aiohttp session"""
170
- if self._aiohttp_session and not self._aiohttp_session.closed:
171
- await self._aiohttp_session.close()
172
- self._aiohttp_session = None
173
-
174
- async def __aenter__(self):
175
- return self
176
-
177
- async def __aexit__(self, exc_type, exc_val, exc_tb):
178
- await self.close_async()
179
-
180
- def validate_feature(self, feature: Dict[str, Any]) -> None:
181
- if hasattr(feature, 'is_valid'):
182
- from shapely.geometry import mapping
183
- feature = {
184
- "type": "Feature",
185
- "geometry": mapping(feature),
186
- "properties": {}
187
- }
188
- if not isinstance(feature, dict):
189
- raise ValueError("Feature must be a dictionary or a Shapely geometry")
190
- if feature.get("type") != "Feature":
191
- raise ValueError("GeoJSON object must be of type 'Feature'")
192
- if "geometry" not in feature:
193
- raise ValueError("Feature must contain a 'geometry' field")
194
- if "properties" not in feature:
195
- raise ValueError("Feature must contain a 'properties' field")
196
- try:
197
- geometry = shape(feature["geometry"])
198
- except Exception as e:
199
- raise ValueError(f"Invalid geometry format: {str(e)}")
200
- if not geometry.is_valid:
201
- raise ValueError(f"Invalid geometry: {geometry.is_valid_reason}")
202
- geom_type = feature["geometry"]["type"]
203
- if geom_type == "Point":
204
- if len(feature["geometry"]["coordinates"]) != 2:
205
- raise ValueError("Point must have exactly 2 coordinates")
206
- elif geom_type == "Polygon":
207
- if not geometry.is_simple:
208
- raise ValueError("Polygon must be simple (not self-intersecting)")
209
- if geometry.area == 0:
210
- raise ValueError("Polygon must have non-zero area")
211
- coords = feature["geometry"]["coordinates"][0]
212
- if coords[0] != coords[-1]:
213
- raise ValueError("Polygon must be closed (first and last points must match)")
214
-
215
- def signup(self, email: str, password: str) -> Dict[str, Any]:
216
- if not self.auth_client:
217
- raise ConfigurationError("Authentication client not initialized. Please provide auth_url during client initialization.")
218
- return self.auth_client.signup(email, password)
219
-
220
- def login(self, email: str, password: str) -> str:
221
- if not self.auth_client:
222
- raise ConfigurationError("Authentication client not initialized. Please provide auth_url during client initialization.")
223
- token = self.auth_client.login(email, password)
224
- if not self.quiet:
225
- print(f"Successfully authenticated as: {email}")
226
- return token
227
-
228
- def refresh_api_key(self) -> str:
229
- if not self.auth_client:
230
- raise ConfigurationError("Authentication client not initialized. Please provide auth_url during client initialization.")
231
- if not self.auth_client.token:
232
- raise ConfigurationError("Not authenticated. Call login() first.")
233
- self.key = self.auth_client.refresh_api_key()
234
- self.session.headers.update({'x-api-key': self.key})
235
- import os
236
- config_path = os.path.join(os.environ.get("HOME", ""), ".tkio_config.json")
237
- try:
238
- config = {"EMAIL": "", "TERRAKIO_API_KEY": ""}
239
- if os.path.exists(config_path):
240
- with open(config_path, 'r') as f:
241
- config = json.load(f)
242
- config["TERRAKIO_API_KEY"] = self.key
243
- os.makedirs(os.path.dirname(config_path), exist_ok=True)
244
- with open(config_path, 'w') as f:
245
- json.dump(config, f, indent=4)
246
- if not self.quiet:
247
- print(f"API key generated successfully and updated in {config_path}")
248
- except Exception as e:
249
- if not self.quiet:
250
- print(f"Warning: Failed to update config file: {e}")
251
- return self.key
252
-
253
- def view_api_key(self) -> str:
254
- if not self.auth_client:
255
- raise ConfigurationError("Authentication client not initialized. Please provide auth_url during client initialization.")
256
- if not self.auth_client.token:
257
- raise ConfigurationError("Not authenticated. Call login() first.")
258
- self.key = self.auth_client.view_api_key()
259
- self.session.headers.update({'x-api-key': self.key})
260
- return self.key
261
-
262
- def get_user_info(self) -> Dict[str, Any]:
263
- if not self.auth_client:
264
- raise ConfigurationError("Authentication client not initialized. Please provide auth_url during client initialization.")
265
- if not self.auth_client.token:
266
- raise ConfigurationError("Not authenticated. Call login() first.")
267
- return self.auth_client.get_user_info()
268
-
269
- def wcs(self, expr: str, feature: Union[Dict[str, Any], ShapelyGeometry], in_crs: str = "epsg:4326",
270
- out_crs: str = "epsg:4326", output: str = "csv", resolution: int = -1,
271
- **kwargs):
272
- if hasattr(feature, 'is_valid'):
273
- from shapely.geometry import mapping
274
- feature = {
275
- "type": "Feature",
276
- "geometry": mapping(feature),
277
- "properties": {}
278
- }
279
- self.validate_feature(feature)
280
- payload = {
281
- "feature": feature,
282
- "in_crs": in_crs,
283
- "out_crs": out_crs,
284
- "output": output,
285
- "resolution": resolution,
286
- "expr": expr,
287
- **kwargs
288
- }
289
- if not self.quiet:
290
- print(f"Requesting data with expression: {expr}")
291
- request_url = f"{self.url}/wcs"
292
- print("the payload is ", payload)
293
- print("the request url is ", request_url)
294
- try:
295
- response = self.session.post(request_url, json=payload, timeout=self.timeout, verify=self.verify)
296
- if not response.ok:
297
- error_msg = f"API request failed: {response.status_code} {response.reason}"
298
- try:
299
- error_data = response.json()
300
- if "detail" in error_data:
301
- error_msg += f" - {error_data['detail']}"
302
- except:
303
- pass
304
- raise APIError(error_msg)
305
- if output.lower() == "csv":
306
- import pandas as pd
307
- return pd.read_csv(BytesIO(response.content))
308
- elif output.lower() == "netcdf":
309
- return xr.open_dataset(BytesIO(response.content))
310
- else:
311
- try:
312
- return xr.open_dataset(BytesIO(response.content))
313
- except ValueError:
314
- import pandas as pd
315
- try:
316
- return pd.read_csv(BytesIO(response.content))
317
- except:
318
- return response.content
319
- except requests.RequestException as e:
320
- raise APIError(f"Request failed: {str(e)}")
321
-
322
- # Admin/protected methods
323
- def _get_user_by_id(self, user_id: str):
324
- if not self.user_management:
325
- from terrakio_core.user_management import UserManagement
326
- if not self.url or not self.key:
327
- raise ConfigurationError("User management client not initialized. Make sure API URL and key are set.")
328
- self.user_management = UserManagement(
329
- api_url=self.url,
330
- api_key=self.key,
331
- verify=self.verify,
332
- timeout=self.timeout
333
- )
334
- return self.user_management.get_user_by_id(user_id)
335
-
336
- def _get_user_by_email(self, email: str):
337
- if not self.user_management:
338
- from terrakio_core.user_management import UserManagement
339
- if not self.url or not self.key:
340
- raise ConfigurationError("User management client not initialized. Make sure API URL and key are set.")
341
- self.user_management = UserManagement(
342
- api_url=self.url,
343
- api_key=self.key,
344
- verify=self.verify,
345
- timeout=self.timeout
346
- )
347
- return self.user_management.get_user_by_email(email)
348
-
349
- def _list_users(self, substring: str = None, uid: bool = False):
350
- if not self.user_management:
351
- from terrakio_core.user_management import UserManagement
352
- if not self.url or not self.key:
353
- raise ConfigurationError("User management client not initialized. Make sure API URL and key are set.")
354
- self.user_management = UserManagement(
355
- api_url=self.url,
356
- api_key=self.key,
357
- verify=self.verify,
358
- timeout=self.timeout
359
- )
360
- return self.user_management.list_users(substring=substring, uid=uid)
361
-
362
- def _edit_user(self, user_id: str, uid: str = None, email: str = None, role: str = None, apiKey: str = None, groups: list = None, quota: int = None):
363
- if not self.user_management:
364
- from terrakio_core.user_management import UserManagement
365
- if not self.url or not self.key:
366
- raise ConfigurationError("User management client not initialized. Make sure API URL and key are set.")
367
- self.user_management = UserManagement(
368
- api_url=self.url,
369
- api_key=self.key,
370
- verify=self.verify,
371
- timeout=self.timeout
372
- )
373
- return self.user_management.edit_user(
374
- user_id=user_id,
375
- uid=uid,
376
- email=email,
377
- role=role,
378
- apiKey=apiKey,
379
- groups=groups,
380
- quota=quota
381
- )
382
-
383
- def _reset_quota(self, email: str, quota: int = None):
384
- if not self.user_management:
385
- from terrakio_core.user_management import UserManagement
386
- if not self.url or not self.key:
387
- raise ConfigurationError("User management client not initialized. Make sure API URL and key are set.")
388
- self.user_management = UserManagement(
389
- api_url=self.url,
390
- api_key=self.key,
391
- verify=self.verify,
392
- timeout=self.timeout
393
- )
394
- return self.user_management.reset_quota(email=email, quota=quota)
395
-
396
- def _delete_user(self, uid: str):
397
- if not self.user_management:
398
- from terrakio_core.user_management import UserManagement
399
- if not self.url or not self.key:
400
- raise ConfigurationError("User management client not initialized. Make sure API URL and key are set.")
401
- self.user_management = UserManagement(
402
- api_url=self.url,
403
- api_key=self.key,
404
- verify=self.verify,
405
- timeout=self.timeout
406
- )
407
- return self.user_management.delete_user(uid=uid)
408
-
409
- # Dataset management protected methods
410
- def _get_dataset(self, name: str, collection: str = "terrakio-datasets"):
411
- if not self.dataset_management:
412
- from terrakio_core.dataset_management import DatasetManagement
413
- if not self.url or not self.key:
414
- raise ConfigurationError("Dataset management client not initialized. Make sure API URL and key are set.")
415
- self.dataset_management = DatasetManagement(
416
- api_url=self.url,
417
- api_key=self.key,
418
- verify=self.verify,
419
- timeout=self.timeout
420
- )
421
- return self.dataset_management.get_dataset(name=name, collection=collection)
422
-
423
- def _list_datasets(self, substring: str = None, collection: str = "terrakio-datasets"):
424
- if not self.dataset_management:
425
- from terrakio_core.dataset_management import DatasetManagement
426
- if not self.url or not self.key:
427
- raise ConfigurationError("Dataset management client not initialized. Make sure API URL and key are set.")
428
- self.dataset_management = DatasetManagement(
429
- api_url=self.url,
430
- api_key=self.key,
431
- verify=self.verify,
432
- timeout=self.timeout
433
- )
434
- return self.dataset_management.list_datasets(substring=substring, collection=collection)
435
-
436
- def _create_dataset(self, name: str, collection: str = "terrakio-datasets", **kwargs):
437
- if not self.dataset_management:
438
- from terrakio_core.dataset_management import DatasetManagement
439
- if not self.url or not self.key:
440
- raise ConfigurationError("Dataset management client not initialized. Make sure API URL and key are set.")
441
- self.dataset_management = DatasetManagement(
442
- api_url=self.url,
443
- api_key=self.key,
444
- verify=self.verify,
445
- timeout=self.timeout
446
- )
447
- return self.dataset_management.create_dataset(name=name, collection=collection, **kwargs)
448
-
449
- def _update_dataset(self, name: str, append: bool = True, collection: str = "terrakio-datasets", **kwargs):
450
- if not self.dataset_management:
451
- from terrakio_core.dataset_management import DatasetManagement
452
- if not self.url or not self.key:
453
- raise ConfigurationError("Dataset management client not initialized. Make sure API URL and key are set.")
454
- self.dataset_management = DatasetManagement(
455
- api_url=self.url,
456
- api_key=self.key,
457
- verify=self.verify,
458
- timeout=self.timeout
459
- )
460
- return self.dataset_management.update_dataset(name=name, append=append, collection=collection, **kwargs)
461
-
462
- def _overwrite_dataset(self, name: str, collection: str = "terrakio-datasets", **kwargs):
463
- if not self.dataset_management:
464
- from terrakio_core.dataset_management import DatasetManagement
465
- if not self.url or not self.key:
466
- raise ConfigurationError("Dataset management client not initialized. Make sure API URL and key are set.")
467
- self.dataset_management = DatasetManagement(
468
- api_url=self.url,
469
- api_key=self.key,
470
- verify=self.verify,
471
- timeout=self.timeout
472
- )
473
- return self.dataset_management.overwrite_dataset(name=name, collection=collection, **kwargs)
474
-
475
- def _delete_dataset(self, name: str, collection: str = "terrakio-datasets"):
476
- if not self.dataset_management:
477
- from terrakio_core.dataset_management import DatasetManagement
478
- if not self.url or not self.key:
479
- raise ConfigurationError("Dataset management client not initialized. Make sure API URL and key are set.")
480
- self.dataset_management = DatasetManagement(
481
- api_url=self.url,
482
- api_key=self.key,
483
- verify=self.verify,
484
- timeout=self.timeout
485
- )
486
- return self.dataset_management.delete_dataset(name=name, collection=collection)
487
-
488
- def close(self):
489
- self.session.close()
490
- if self.auth_client:
491
- self.auth_client.session.close()
492
- def __enter__(self):
493
- return self
494
- def __exit__(self, exc_type, exc_val, exc_tb):
495
- self.close()
496
-
497
- # Mass Stats methods
498
- def upload_mass_stats(self, name, size, bucket, output, location=None, **kwargs):
499
- if not self.mass_stats:
500
- from terrakio_core.mass_stats import MassStats
501
- if not self.url or not self.key:
502
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
503
- self.mass_stats = MassStats(
504
- base_url=self.url,
505
- api_key=self.key,
506
- verify=self.verify,
507
- timeout=self.timeout
508
- )
509
- return self.mass_stats.upload_request(name, size, bucket, output, location, **kwargs)
510
-
511
- def start_mass_stats_job(self, task_id):
512
- if not self.mass_stats:
513
- from terrakio_core.mass_stats import MassStats
514
- if not self.url or not self.key:
515
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
516
- self.mass_stats = MassStats(
517
- base_url=self.url,
518
- api_key=self.key,
519
- verify=self.verify,
520
- timeout=self.timeout
521
- )
522
- return self.mass_stats.start_job(task_id)
523
-
524
- def get_mass_stats_task_id(self, name, stage, uid=None):
525
- if not self.mass_stats:
526
- from terrakio_core.mass_stats import MassStats
527
- if not self.url or not self.key:
528
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
529
- self.mass_stats = MassStats(
530
- base_url=self.url,
531
- api_key=self.key,
532
- verify=self.verify,
533
- timeout=self.timeout
534
- )
535
- return self.mass_stats.get_task_id(name, stage, uid)
536
-
537
- def track_mass_stats_job(self, ids=None):
538
- if not self.mass_stats:
539
- from terrakio_core.mass_stats import MassStats
540
- if not self.url or not self.key:
541
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
542
- self.mass_stats = MassStats(
543
- base_url=self.url,
544
- api_key=self.key,
545
- verify=self.verify,
546
- timeout=self.timeout
547
- )
548
- return self.mass_stats.track_job(ids)
549
-
550
- def get_mass_stats_history(self, limit=100):
551
- if not self.mass_stats:
552
- from terrakio_core.mass_stats import MassStats
553
- if not self.url or not self.key:
554
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
555
- self.mass_stats = MassStats(
556
- base_url=self.url,
557
- api_key=self.key,
558
- verify=self.verify,
559
- timeout=self.timeout
560
- )
561
- return self.mass_stats.get_history(limit)
562
-
563
- def start_mass_stats_post_processing(self, process_name, data_name, output, consumer_path, overwrite=False):
564
- if not self.mass_stats:
565
- from terrakio_core.mass_stats import MassStats
566
- if not self.url or not self.key:
567
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
568
- self.mass_stats = MassStats(
569
- base_url=self.url,
570
- api_key=self.key,
571
- verify=self.verify,
572
- timeout=self.timeout
573
- )
574
- return self.mass_stats.start_post_processing(process_name, data_name, output, consumer_path, overwrite)
575
-
576
- def download_mass_stats_results(self, id=None, force_loc=False, **kwargs):
577
- if not self.mass_stats:
578
- from terrakio_core.mass_stats import MassStats
579
- if not self.url or not self.key:
580
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
581
- self.mass_stats = MassStats(
582
- base_url=self.url,
583
- api_key=self.key,
584
- verify=self.verify,
585
- timeout=self.timeout
586
- )
587
- return self.mass_stats.download_results(id, force_loc, **kwargs)
588
-
589
- def cancel_mass_stats_job(self, id):
590
- if not self.mass_stats:
591
- from terrakio_core.mass_stats import MassStats
592
- if not self.url or not self.key:
593
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
594
- self.mass_stats = MassStats(
595
- base_url=self.url,
596
- api_key=self.key,
597
- verify=self.verify,
598
- timeout=self.timeout
599
- )
600
- return self.mass_stats.cancel_job(id)
601
-
602
- def cancel_all_mass_stats_jobs(self):
603
- if not self.mass_stats:
604
- from terrakio_core.mass_stats import MassStats
605
- if not self.url or not self.key:
606
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
607
- self.mass_stats = MassStats(
608
- base_url=self.url,
609
- api_key=self.key,
610
- verify=self.verify,
611
- timeout=self.timeout
612
- )
613
- return self.mass_stats.cancel_all_jobs()
614
-
615
- def _create_pyramids(self, name, levels, config):
616
- if not self.mass_stats:
617
- from terrakio_core.mass_stats import MassStats
618
- if not self.url or not self.key:
619
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
620
- self.mass_stats = MassStats(
621
- base_url=self.url,
622
- api_key=self.key,
623
- verify=self.verify,
624
- timeout=self.timeout
625
- )
626
- return self.mass_stats.create_pyramids(name, levels, config)
627
-
628
- def random_sample(self, name, **kwargs):
629
- if not self.mass_stats:
630
- from terrakio_core.mass_stats import MassStats
631
- if not self.url or not self.key:
632
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
633
- self.mass_stats = MassStats(
634
- base_url=self.url,
635
- api_key=self.key,
636
- verify=self.verify,
637
- timeout=self.timeout
638
- )
639
- return self.mass_stats.random_sample(name, **kwargs)
640
-
641
- async def zonal_stats_async(self, gdb, expr, conc=20, inplace=False, output="csv"):
642
- """
643
- Compute zonal statistics for all geometries in a GeoDataFrame using asyncio for concurrency.
644
- """
645
- import asyncio
646
- import pandas as pd
647
- import geopandas as gpd
648
- from shapely.geometry import mapping
649
-
650
- print(f"Starting zonal_stats_async with {len(gdb)} geometries")
651
-
652
- # Process geometries in batches
653
- all_results = []
654
- row_indices = []
655
-
656
- async def process_geometry(geom, index):
657
- """Process a single geometry"""
658
- try:
659
- feature = {
660
- "type": "Feature",
661
- "geometry": mapping(geom),
662
- "properties": {"index": index}
663
- }
664
- print(f"Processing geometry {index}")
665
- result = await self.wcs_async(expr=expr, feature=feature, output=output)
666
- print(f"Got result for geometry {index}: {type(result)}")
667
- # Add original index to track which geometry this result belongs to
668
- if isinstance(result, pd.DataFrame):
669
- result['_geometry_index'] = index
670
- return result
671
- except Exception as e:
672
- print(f"Error in process_geometry for index {index}: {str(e)}")
673
- raise
674
-
675
- async def process_batch(batch_indices):
676
- """Process a batch of geometries concurrently using TaskGroup"""
677
- print(f"Processing batch with indices: {list(batch_indices)}")
678
- try:
679
- async with asyncio.TaskGroup() as tg:
680
- tasks = []
681
- for idx in batch_indices:
682
- geom = gdb.geometry.iloc[idx]
683
- task = tg.create_task(process_geometry(geom, idx))
684
- tasks.append(task)
685
-
686
- # Get results from completed tasks
687
- results = []
688
- for task in tasks:
689
- try:
690
- result = task.result()
691
- print(f"Task completed successfully: {type(result)}")
692
- results.append(result)
693
- except Exception as e:
694
- print(f"Error getting task result: {str(e)}")
695
- raise
696
-
697
- return results
698
- except* Exception as e:
699
- print(f"TaskGroup error: {str(e)}")
700
- # Get the actual exceptions from the tasks
701
- for task in tasks:
702
- if task.done() and task.exception():
703
- print(f"Task exception: {str(task.exception())}")
704
- raise
705
-
706
- # Process in batches to control concurrency
707
- for i in range(0, len(gdb), conc):
708
- batch_indices = range(i, min(i + conc, len(gdb)))
709
- try:
710
- print(f"Starting batch {i//conc + 1}")
711
- batch_results = await process_batch(batch_indices)
712
- print(f"Batch {i//conc + 1} completed successfully")
713
- all_results.extend(batch_results)
714
- row_indices.extend(batch_indices)
715
- except Exception as e:
716
- print(f"Error processing batch starting at index {i}: {str(e)}")
717
- if hasattr(e, 'response'):
718
- print(f"API Response: {e.response.text}")
719
- raise
720
-
721
- if not all_results:
722
- raise ValueError("No valid results were returned for any geometry")
723
-
724
- # Combine all results
725
- combined_df = pd.concat(all_results, ignore_index=True)
726
-
727
- # Check if we have temporal results
728
- has_time = 'time' in combined_df.columns
729
-
730
- # Create a result GeoDataFrame
731
- if has_time:
732
- # For temporal data, we'll create a hierarchical index
733
- # First make sure we have the geometry index and time columns
734
- if '_geometry_index' not in combined_df.columns:
735
- raise ValueError("Missing geometry index in results")
736
-
737
- # Create hierarchical index on geometry_index and time
738
- combined_df.set_index(['_geometry_index', 'time'], inplace=True)
739
-
740
- # For each unique geometry index, we need the corresponding geometry
741
- geometry_series = gdb.geometry.copy()
742
-
743
- # Get columns that will become new attributes (exclude index/utility columns)
744
- result_cols = combined_df.columns
745
-
746
- # Create a new GeoDataFrame with multi-index
747
- result_rows = []
748
- geometries = []
749
-
750
- # Iterate through the hierarchical index
751
- for (geom_idx, time_val), row in combined_df.iterrows():
752
- # Create a new row with geometry properties + result columns
753
- new_row = {}
754
-
755
- # Add original GeoDataFrame columns (except geometry)
756
- for col in gdb.columns:
757
- if col != 'geometry':
758
- new_row[col] = gdb.loc[geom_idx, col]
759
-
760
- # Add result columns
761
- for col in result_cols:
762
- new_row[col] = row[col]
763
-
764
- result_rows.append(new_row)
765
- geometries.append(gdb.geometry.iloc[geom_idx])
766
-
767
- # Create a new GeoDataFrame with multi-index
768
- multi_index = pd.MultiIndex.from_tuples(
769
- combined_df.index.tolist(),
770
- names=['geometry_index', 'time']
771
- )
772
-
773
- result_gdf = gpd.GeoDataFrame(
774
- result_rows,
775
- geometry=geometries,
776
- index=multi_index
777
- )
778
-
779
- if inplace:
780
- # Can't really do inplace with multi-temporal results as we're changing the structure
781
- print("Warning: inplace=True ignored for temporal results, returning new GeoDataFrame")
782
- return result_gdf
783
- else:
784
- return result_gdf
785
- else:
786
- # Non-temporal data - just add new columns to the existing GeoDataFrame
787
- result_gdf = gdb.copy() if not inplace else gdb
788
-
789
- # Get column names from the results (excluding utility columns)
790
- result_cols = [col for col in combined_df.columns if col not in ['_geometry_index']]
791
-
792
- # Create a mapping from geometry index to result rows
793
- geom_idx_to_row = {}
794
- for idx, row in combined_df.iterrows():
795
- geom_idx = int(row['_geometry_index'])
796
- geom_idx_to_row[geom_idx] = row
797
-
798
- # Add results as new columns to the GeoDataFrame
799
- for col in result_cols:
800
- # Initialize the column with None or appropriate default
801
- if col not in result_gdf.columns:
802
- result_gdf[col] = None
803
-
804
- # Fill in values from results
805
- for geom_idx, row in geom_idx_to_row.items():
806
- result_gdf.loc[geom_idx, col] = row[col]
807
-
808
- if inplace:
809
- return None
810
- else:
811
- return result_gdf
812
-
813
- def zonal_stats(self, gdb, expr, conc=20, inplace=False, output="csv"):
814
- """
815
- Compute zonal statistics for all geometries in a GeoDataFrame.
816
-
817
- Args:
818
- gdb (geopandas.GeoDataFrame): GeoDataFrame containing geometries
819
- expr (str): Terrakio expression to evaluate, can include spatial aggregations
820
- conc (int): Number of concurrent requests to make
821
- inplace (bool): Whether to modify the input GeoDataFrame in place
822
- output (str): Output format (csv or netcdf)
823
-
824
- Returns:
825
- geopandas.GeoDataFrame: GeoDataFrame with added columns for results, or None if inplace=True
826
- """
827
- import asyncio
828
- return asyncio.run(self.zonal_stats_async(gdb, expr, conc, inplace, output))
829
-