terrakio-core 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of terrakio-core might be problematic. Click here for more details.

@@ -1,504 +0,0 @@
1
- import requests
2
- from typing import Optional, Dict, Any, List
3
- import json
4
- import json as json_lib
5
- import gzip
6
-
7
- class MassStats:
8
- def __init__(self, base_url: str, api_key: str, verify: bool = True, timeout: int = 60):
9
- self.base_url = base_url.rstrip('/')
10
- self.api_key = api_key
11
- self.verify = verify
12
- self.timeout = timeout
13
- self.session = requests.Session()
14
- self.session.headers.update({
15
- 'x-api-key': self.api_key
16
- })
17
-
18
- def _upload_file(self, file_path: str, url: str, use_gzip: bool = False):
19
- """
20
- Helper method to upload a JSON file to a signed URL.
21
-
22
- Args:
23
- file_path: Path to the JSON file
24
- url: Signed URL to upload to
25
- use_gzip: Whether to compress the file with gzip
26
- """
27
- try:
28
- with open(file_path, 'r') as file:
29
- json_data = json_lib.load(file)
30
- except FileNotFoundError:
31
- raise FileNotFoundError(f"JSON file not found: {file_path}")
32
- except json.JSONDecodeError as e:
33
- raise ValueError(f"Invalid JSON in file {file_path}: {e}")
34
-
35
- # Check if using simplejson and support ignore_nan
36
- if hasattr(json_lib, 'dumps') and 'ignore_nan' in json_lib.dumps.__code__.co_varnames:
37
- dumps_kwargs = {'ignore_nan': True}
38
- else:
39
- dumps_kwargs = {}
40
-
41
- if use_gzip:
42
- # Serialize and compress the JSON data
43
- body = gzip.compress(json_lib.dumps(json_data, **dumps_kwargs).encode('utf-8'))
44
- headers = {
45
- 'Content-Type': 'application/json',
46
- 'Content-Encoding': 'gzip'
47
- }
48
- else:
49
- body = json_lib.dumps(json_data, **dumps_kwargs).encode('utf-8')
50
- headers = {
51
- 'Content-Type': 'application/json'
52
- }
53
-
54
- # Make the PUT request to the signed URL
55
- response = requests.put(
56
- url,
57
- data=body,
58
- headers=headers
59
- )
60
-
61
- return response
62
-
63
-
64
- def download_file(self, job_name: str, bucket:str, file_name: str, output_path: str) -> str:
65
- """
66
- Download a file from mass_stats using job name and file name.
67
-
68
- Args:
69
- job_name: Name of the job
70
- file_name: Name of the file to download
71
- output_path: Path where the file should be saved
72
-
73
- Returns:
74
- str: Path to the downloaded file
75
- """
76
- import os
77
- from pathlib import Path
78
-
79
- endpoint_url = f"{self.base_url}/mass_stats/download_files"
80
- request_body = {
81
- "job_name": job_name,
82
- "bucket": bucket,
83
- "file_name": file_name
84
- }
85
-
86
- try:
87
- # Get signed URL
88
- response = self.session.post(
89
- endpoint_url,
90
- json=request_body,
91
- verify=self.verify,
92
- timeout=self.timeout
93
- )
94
- signed_url = response.json().get('download_url')
95
- if not signed_url:
96
- raise Exception("No download URL received from server")
97
- print(f"Generated signed URL for download")
98
-
99
- # Create output directory if it doesn't exist
100
- output_dir = Path(output_path).parent
101
- output_dir.mkdir(parents=True, exist_ok=True)
102
-
103
- # Download the file using the signed URL
104
- download_response = self.session.get(
105
- signed_url,
106
- verify=self.verify,
107
- timeout=self.timeout,
108
- stream=True # Stream for large files
109
- )
110
- download_response.raise_for_status()
111
-
112
- # Check if file exists in the response (content-length header)
113
- content_length = download_response.headers.get('content-length')
114
- if content_length and int(content_length) == 0:
115
- raise Exception("File appears to be empty")
116
-
117
- # Write the file
118
- with open(output_path, 'wb') as file:
119
- for chunk in download_response.iter_content(chunk_size=8192):
120
- if chunk:
121
- file.write(chunk)
122
-
123
- # Verify file was written
124
- if not os.path.exists(output_path):
125
- raise Exception(f"File was not written to {output_path}")
126
-
127
- file_size = os.path.getsize(output_path)
128
- print(f"File downloaded successfully to {output_path} (size: {file_size / (1024 * 1024):.4f} mb)")
129
-
130
- return output_path
131
-
132
- except self.session.exceptions.RequestException as e:
133
- if hasattr(e, 'response') and e.response is not None:
134
- error_detail = e.response.text
135
- raise Exception(f"Error getting signed URL: {e}. Details: {error_detail}")
136
- raise Exception(f"Error in download process: {e}")
137
- except IOError as e:
138
- raise Exception(f"Error writing file to {output_path}: {e}")
139
- except Exception as e:
140
- # Clean up partial file if it exists
141
- if os.path.exists(output_path):
142
- try:
143
- os.remove(output_path)
144
- except:
145
- pass
146
- raise
147
-
148
-
149
-
150
-
151
- def upload_request(
152
- self,
153
- name: str,
154
- size: int,
155
- region: List[str],
156
- output: str,
157
- config: Dict[str, Any],
158
- location: Optional[str] = None,
159
- force_loc: Optional[bool] = None,
160
- overwrite: bool = False,
161
- server: Optional[str] = None,
162
- skip_existing: bool = False,
163
- ) -> Dict[str, Any]:
164
- """
165
- Initiate a mass stats upload job.
166
-
167
- Args:
168
- name: Name of the job
169
- size: Size of the job
170
- region: Region to run job [aus, eu, us]
171
- output: Output type
172
- config: Configuration dictionary
173
- location: (Optional) Location for the upload
174
- force_loc: Force location usage
175
- overwrite: Overwrite existing data
176
- server: Optional server
177
- skip_existing: Skip existing files
178
- """
179
-
180
-
181
-
182
- # Step 2: Create the upload job and get signed URLs
183
- url = f"{self.base_url}/mass_stats/upload"
184
-
185
- data = {
186
- "name": name,
187
- "size": size,
188
- "region": region,
189
- "output": output,
190
- "config": config,
191
- "overwrite": overwrite,
192
- "skip_existing": skip_existing
193
- }
194
-
195
- if location is not None:
196
- data["location"] = location
197
- if force_loc is not None:
198
- data["force_loc"] = force_loc
199
- if server is not None:
200
- data["server"] = server
201
- response = self.session.post(
202
- url,
203
- json=data,
204
- verify=self.verify,
205
- timeout=self.timeout
206
- )
207
- return response.json()
208
-
209
-
210
-
211
-
212
- def execute_job(
213
- self,
214
- name: str,
215
- region: str,
216
- output: str,
217
- config: Dict[str, Any],
218
- overwrite: bool = False,
219
- skip_existing: bool = False,
220
- request_json: Optional[str] = None,
221
- manifest_json: Optional[str] = None,
222
- location: Optional[str] = None,
223
- force_loc: Optional[bool] = None,
224
- server: Optional[str] = None
225
- ) -> Dict[str, Any]:
226
- # Step 1: Calculate size from request JSON file if provided
227
- size = 0
228
- if request_json is not None:
229
- try:
230
- with open(request_json, 'r') as file:
231
- request_data = json_lib.load(file)
232
-
233
- if isinstance(request_data, list):
234
- size = len(request_data)
235
- else:
236
- raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
237
-
238
- except FileNotFoundError:
239
- raise FileNotFoundError(f"Request JSON file not found: {request_json}")
240
- except json.JSONDecodeError as e:
241
- raise ValueError(f"Invalid JSON in request file {request_json}: {e}")
242
-
243
- upload_result = self.upload_request(name, size, region, output, config, location, force_loc, overwrite, server, skip_existing)
244
-
245
- # Step 3: Upload JSON files if provided
246
- if request_json is not None or manifest_json is not None:
247
- requests_url = upload_result.get('requests_url')
248
- manifest_url = upload_result.get('manifest_url')
249
-
250
- if request_json is not None:
251
- if not requests_url:
252
- raise ValueError("No requests_url returned from server for request JSON upload")
253
-
254
- try:
255
- requests_response = self._upload_file(request_json, requests_url, use_gzip=True)
256
- if requests_response.status_code not in [200, 201, 204]:
257
- print(f"Requests upload error: {requests_response.text}")
258
- raise Exception(f"Failed to upload request JSON: {requests_response.text}")
259
- except Exception as e:
260
- raise Exception(f"Error uploading request JSON file {request_json}: {e}")
261
-
262
- if manifest_json is not None:
263
- if not manifest_url:
264
- raise ValueError("No manifest_url returned from server for manifest JSON upload")
265
-
266
- try:
267
- manifest_response = self._upload_file(manifest_json, manifest_url, use_gzip=False)
268
- if manifest_response.status_code not in [200, 201, 204]:
269
- print(f"Manifest upload error: {manifest_response.text}")
270
- raise Exception(f"Failed to upload manifest JSON: {manifest_response.text}")
271
- except Exception as e:
272
- raise Exception(f"Error uploading manifest JSON file {manifest_json}: {e}")
273
-
274
-
275
- start_job_task_id =self.start_job(upload_result.get("id"))
276
- return start_job_task_id
277
-
278
-
279
- def start_job(self, task_id: str) -> Dict[str, Any]:
280
- """
281
- Start a mass stats job by task ID.
282
- """
283
- url = f"{self.base_url}/mass_stats/start/{task_id}"
284
- response = self.session.post(url, verify=self.verify, timeout=self.timeout)
285
- response.raise_for_status()
286
- return response.json()
287
-
288
- def get_task_id(self, name: str, stage: str, uid: Optional[str] = None) -> Dict[str, Any]:
289
- """
290
- Get the task ID for a mass stats job by name and stage (and optionally user ID).
291
- """
292
- url = f"{self.base_url}/mass_stats/job_id?name={name}&stage={stage}"
293
- if uid is not None:
294
- url += f"&uid={uid}"
295
- response = self.session.get(url, verify=self.verify, timeout=self.timeout)
296
- #print("response text is ", response.text)
297
- return response.json()
298
-
299
- def track_job(self, ids: Optional[list] = None) -> Dict[str, Any]:
300
- """
301
- Track the status of one or more mass stats jobs.
302
- If ids is None, gets progress for all of the user's jobs.
303
- """
304
- url = f"{self.base_url}/mass_stats/track"
305
- data = {"ids": ids} if ids is not None else {}
306
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
307
- response.raise_for_status()
308
- return response.json()
309
-
310
- def get_history(self, limit: int = 100) -> Dict[str, Any]:
311
- """
312
- Get the history of mass stats jobs.
313
- """
314
- url = f"{self.base_url}/mass_stats/history"
315
- params = {"limit": limit}
316
- response = self.session.get(url, params=params, verify=self.verify, timeout=self.timeout)
317
- response.raise_for_status()
318
- return response.json()
319
-
320
- def start_post_processing(
321
- self,
322
- process_name: str,
323
- data_name: str,
324
- output: str,
325
- consumer_path: str,
326
- overwrite: bool = False
327
- ) -> Dict[str, Any]:
328
- """
329
- Start post processing for a mass stats job.
330
- Args:
331
- process_name: Folder to store output
332
- data_name: Name of job used to create data
333
- output: Output type
334
- consumer_path: Path to the post processing script (Python file)
335
- overwrite: Overwrite existing post processing output in same location
336
- Returns:
337
- Dict with task_id
338
- """
339
- url = f"{self.base_url}/mass_stats/post_process"
340
- files = {
341
- 'consumer': (consumer_path, open(consumer_path, 'rb'), 'text/x-python')
342
- }
343
- data = {
344
- 'process_name': process_name,
345
- 'data_name': data_name,
346
- 'output': output,
347
- 'overwrite': str(overwrite).lower()
348
- }
349
- response = self.session.post(url, data=data, files=files, verify=self.verify, timeout=self.timeout)
350
- print("the response is ", response.text)
351
- # response.raise_for_status()
352
- return response.json()
353
-
354
- def download_results(
355
- self,
356
- id: Optional[str] = None,
357
- force_loc: bool = False,
358
- bucket: Optional[str] = None,
359
- location: Optional[str] = None,
360
- output: Optional[str] = None,
361
- file_name: Optional[str] = None
362
- ) -> bytes:
363
- """
364
- Download results from a mass stats job or arbitrary results if force_loc is True.
365
- Returns the content of the .zip file.
366
- """
367
- url = f"{self.base_url}/mass_stats/download"
368
- data = {}
369
- if id is not None:
370
- data["id"] = id
371
- if force_loc:
372
- data["force_loc"] = True
373
- if bucket is not None:
374
- data["bucket"] = bucket
375
- if location is not None:
376
- data["location"] = location
377
- if output is not None:
378
- data["output"] = output
379
- if file_name is not None:
380
- data["file_name"] = file_name
381
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
382
- print("the response is ", response.text)
383
- # response.raise_for_status()
384
- print("the response content is ", response.content)
385
- return response.content
386
-
387
- def cancel_job(self, id: str) -> Dict[str, Any]:
388
- """
389
- Cancel a mass stats job by ID.
390
- """
391
- url = f"{self.base_url}/mass_stats/cancel/{id}"
392
- response = self.session.post(url, verify=self.verify, timeout=self.timeout)
393
- response.raise_for_status()
394
- return response.json()
395
-
396
- def cancel_all_jobs(self) -> Dict[str, Any]:
397
- """
398
- Cancel all mass stats jobs for the user.
399
- """
400
- url = f"{self.base_url}/mass_stats/cancel"
401
- response = self.session.post(url, verify=self.verify, timeout=self.timeout)
402
- response.raise_for_status()
403
- return response.json()
404
-
405
- def create_pyramids(self, name: str, levels: int, config: Dict[str, Any]) -> Dict[str, Any]:
406
- """
407
- Create pyramids for a dataset.
408
- Args:
409
- name: Name for the pyramid job
410
- levels: Number of zoom levels to compute
411
- config: Dataset config (mapping)
412
- Returns:
413
- Dict with task_id
414
- """
415
- url = f"{self.base_url}/pyramids/create"
416
- data = {
417
- "name": name,
418
- "levels": levels,
419
- "config": config
420
- }
421
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
422
- print("the url is ", url)
423
- print("the response is ", response.text)
424
- print("the response status code is ", response.status_code)
425
- # response.raise_for_status()
426
- return response.json()
427
-
428
- def random_sample(
429
- self,
430
- name: str,
431
- config: dict,
432
- aoi: dict,
433
- samples: int,
434
- year_range: list,
435
- crs: str,
436
- tile_size: int,
437
- res: float,
438
- output: str,
439
- server: str,
440
- region: str,
441
- bucket: str,
442
- overwrite: bool = False
443
- ) -> Dict[str, Any]:
444
- """
445
- Submit a random sample job.
446
- """
447
- if year_range is None or len(year_range) != 2:
448
- raise ValueError("year_range must be a list of two integers")
449
- start_year, end_year = year_range
450
- if start_year is None or end_year is None:
451
- raise ValueError("Both start_year and end_year must be provided for year_range.")
452
-
453
- url = f"{self.base_url}/random_sample"
454
- data = {
455
- "name": name,
456
- "overwrite": overwrite,
457
- "config": config,
458
- "aoi": aoi,
459
- "samples": samples,
460
- "year_range": [start_year, end_year],
461
- "crs": crs,
462
- "tile_size": tile_size,
463
- "res": res,
464
- "output": output,
465
- "server": server,
466
- "region": region,
467
- "bucket": bucket
468
- }
469
- print("the data is ", data)
470
- print("the url is ", url)
471
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
472
- print("Status code:", response.status_code)
473
- print("Response text:", response.text)
474
- # response.raise_for_status()
475
- return response.json()
476
-
477
-
478
- ### Adding the wrapper function to call endpoint /mass_stats/combine_tiles
479
- def combine_tiles(
480
- self,
481
- data_name: str,
482
- usezarr: bool = False,
483
- overwrite: bool = True,
484
- output : str = "netcdf"
485
- ) -> Dict[str, Any]:
486
-
487
- url = f"{self.base_url}/mass_stats/combine_tiles"
488
- request_body = {
489
- 'data_name': data_name,
490
- 'usezarr': str(usezarr).lower(),
491
- 'output': output,
492
- 'overwrite': str(overwrite).lower()
493
- }
494
- print(f"Request body: {json.dumps(request_body, indent=2)}")
495
- response = self.session.post(url, json=request_body, verify=self.verify, timeout=self.timeout)
496
- print(f"Response text: {response.text}")
497
- return response.json()
498
-
499
-
500
-
501
-
502
-
503
-
504
-
@@ -1,101 +0,0 @@
1
- import requests
2
- from typing import Dict, Any, Optional
3
- from .exceptions import APIError
4
-
5
- class SpaceManagement:
6
- def __init__(self, api_url: str, api_key: str, verify: bool = True, timeout: int = 60):
7
- self.api_url = api_url.rstrip('/')
8
- self.api_key = api_key
9
- self.verify = verify
10
- self.timeout = timeout
11
- self.session = requests.Session()
12
- self.session.headers.update({
13
- 'x-api-key': self.api_key,
14
- 'Content-Type': 'application/json'
15
- })
16
-
17
- def get_total_space_used(self) -> Dict[str, Any]:
18
- """
19
- Get total space used by the user.
20
- Returns a dict with user, total, and jobs breakdown.
21
- """
22
- endpoint = f"{self.api_url}/users/jobs"
23
- try:
24
- response = self.session.get(endpoint, timeout=self.timeout, verify=self.verify)
25
- if not response.ok:
26
- error_msg = f"API request failed: {response.status_code} {response.reason}"
27
- try:
28
- error_data = response.json()
29
- if "detail" in error_data:
30
- error_msg += f" - {error_data['detail']}"
31
- except:
32
- pass
33
- raise APIError(error_msg)
34
- return response.json()
35
- except requests.RequestException as e:
36
- raise APIError(f"Request failed: {str(e)}")
37
-
38
- def get_space_used_by_job(self, name: str, region: Optional[str] = None) -> Dict[str, Any]:
39
- """
40
- Get space used by a specific job.
41
- """
42
- endpoint = f"{self.api_url}/users/jobs/{name}"
43
- params = {"region": region} if region else {}
44
- try:
45
- response = self.session.get(endpoint, params=params, timeout=self.timeout, verify=self.verify)
46
- if not response.ok:
47
- error_msg = f"API request failed: {response.status_code} {response.reason}"
48
- try:
49
- error_data = response.json()
50
- if "detail" in error_data:
51
- error_msg += f" - {error_data['detail']}"
52
- except:
53
- pass
54
- raise APIError(error_msg)
55
- return response.json()
56
- except requests.RequestException as e:
57
- raise APIError(f"Request failed: {str(e)}")
58
-
59
- def delete_user_job(self, name: str, region: Optional[str] = None) -> Dict[str, Any]:
60
- """
61
- Delete a user job by name and region.
62
- """
63
- endpoint = f"{self.api_url}/users/job/{name}"
64
- params = {"region": region} if region else {}
65
- try:
66
- response = self.session.delete(endpoint, params=params, timeout=self.timeout, verify=self.verify)
67
- if not response.ok:
68
- error_msg = f"API request failed: {response.status_code} {response.reason}"
69
- try:
70
- error_data = response.json()
71
- if "detail" in error_data:
72
- error_msg += f" - {error_data['detail']}"
73
- except:
74
- pass
75
- raise APIError(error_msg)
76
- return response.json()
77
- except requests.RequestException as e:
78
- raise APIError(f"Request failed: {str(e)}")
79
-
80
- def delete_data_in_path(self, path: str, region: Optional[str] = None) -> Dict[str, Any]:
81
- """
82
- Delete data in a GCS path for a given region.
83
- """
84
- endpoint = f"{self.api_url}/users/jobs"
85
- params = {"path": path}
86
- if region:
87
- params["region"] = region
88
- try:
89
- response = self.session.delete(endpoint, params=params, timeout=self.timeout, verify=self.verify)
90
- if not response.ok:
91
- error_msg = f"API request failed: {response.status_code} {response.reason}"
92
- try:
93
- error_data = response.json()
94
- if "detail" in error_data:
95
- error_msg += f" - {error_data['detail']}"
96
- except:
97
- pass
98
- raise APIError(error_msg)
99
- return response.json()
100
- except requests.RequestException as e:
101
- raise APIError(f"Request failed: {str(e)}")