terrakio-core 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of terrakio-core might be problematic. Click here for more details.

Files changed (22) hide show
  1. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/PKG-INFO +1 -1
  2. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/pyproject.toml +1 -1
  3. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/__init__.py +1 -1
  4. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/client.py +203 -35
  5. terrakio_core-0.3.1/terrakio_core/decorators.py +18 -0
  6. terrakio_core-0.3.1/terrakio_core/generation/tiles.py +95 -0
  7. terrakio_core-0.3.1/terrakio_core/mass_stats.py +496 -0
  8. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core.egg-info/PKG-INFO +1 -1
  9. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core.egg-info/SOURCES.txt +3 -1
  10. terrakio_core-0.3.0/terrakio_core/mass_stats.py +0 -262
  11. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/README.md +0 -0
  12. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/setup.cfg +0 -0
  13. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/auth.py +0 -0
  14. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/config.py +0 -0
  15. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/dataset_management.py +0 -0
  16. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/exceptions.py +0 -0
  17. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/group_access_management.py +0 -0
  18. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/space_management.py +0 -0
  19. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core/user_management.py +0 -0
  20. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core.egg-info/dependency_links.txt +0 -0
  21. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core.egg-info/requires.txt +0 -0
  22. {terrakio_core-0.3.0 → terrakio_core-0.3.1}/terrakio_core.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: terrakio-core
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Core components for Terrakio API clients
5
5
  Author-email: Yupeng Chao <yupeng@haizea.com.au>
6
6
  Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "terrakio-core"
7
- version = "0.3.0"
7
+ version = "0.3.1"
8
8
  authors = [
9
9
  {name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
10
10
  ]
@@ -4,4 +4,4 @@ Terrakio Core
4
4
  Core components for Terrakio API clients.
5
5
  """
6
6
 
7
- __version__ = "0.3.0"
7
+ __version__ = "0.3.1"
@@ -13,6 +13,7 @@ from shapely.geometry import shape, mapping
13
13
  from shapely.geometry.base import BaseGeometry as ShapelyGeometry
14
14
  from google.cloud import storage
15
15
  from .exceptions import APIError, ConfigurationError
16
+ from .decorators import admin_only_params
16
17
  import logging
17
18
  import textwrap
18
19
 
@@ -536,8 +537,8 @@ class BaseClient:
536
537
  def __exit__(self, exc_type, exc_val, exc_tb):
537
538
  self.close()
538
539
 
539
- # Mass Stats methods
540
- def upload_mass_stats(self, name, size, bucket, output, location=None, **kwargs):
540
+ @admin_only_params('location', 'force_loc', 'server')
541
+ def execute_job(self, name, region, output, config, overwrite=False, skip_existing=False, request_json=None, manifest_json=None, location=None, force_loc=None, server="dev-au.terrak.io"):
541
542
  if not self.mass_stats:
542
543
  from terrakio_core.mass_stats import MassStats
543
544
  if not self.url or not self.key:
@@ -548,20 +549,8 @@ class BaseClient:
548
549
  verify=self.verify,
549
550
  timeout=self.timeout
550
551
  )
551
- return self.mass_stats.upload_request(name, size, bucket, output, location, **kwargs)
552
+ return self.mass_stats.execute_job(name, region, output, config, overwrite, skip_existing, request_json, manifest_json, location, force_loc, server)
552
553
 
553
- def start_mass_stats_job(self, task_id):
554
- if not self.mass_stats:
555
- from terrakio_core.mass_stats import MassStats
556
- if not self.url or not self.key:
557
- raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
558
- self.mass_stats = MassStats(
559
- base_url=self.url,
560
- api_key=self.key,
561
- verify=self.verify,
562
- timeout=self.timeout
563
- )
564
- return self.mass_stats.start_job(task_id)
565
554
 
566
555
  def get_mass_stats_task_id(self, name, stage, uid=None):
567
556
  if not self.mass_stats:
@@ -1123,22 +1112,66 @@ class BaseClient:
1123
1112
  return task_id
1124
1113
 
1125
1114
 
1126
- def train_model(self, model_name: str, training_data: dict) -> dict:
1115
+ # def train_model(self, model_name: str, training_data: dict) -> dict:
1116
+ # """
1117
+ # Train a model using the external model training API.
1118
+
1119
+ # Args:
1120
+ # model_name (str): The name of the model to train.
1121
+ # training_data (dict): Dictionary containing training data parameters.
1122
+
1123
+ # Returns:
1124
+ # dict: The response from the model training API.
1125
+ # """
1126
+ # endpoint = "https://modeltraining-573248941006.australia-southeast1.run.app/train_model"
1127
+ # payload = {
1128
+ # "model_name": model_name,
1129
+ # "training_data": training_data
1130
+ # }
1131
+ # try:
1132
+ # response = self.session.post(endpoint, json=payload, timeout=self.timeout, verify=self.verify)
1133
+ # if not response.ok:
1134
+ # error_msg = f"Model training request failed: {response.status_code} {response.reason}"
1135
+ # try:
1136
+ # error_data = response.json()
1137
+ # if "detail" in error_data:
1138
+ # error_msg += f" - {error_data['detail']}"
1139
+ # except Exception:
1140
+ # if response.text:
1141
+ # error_msg += f" - {response.text}"
1142
+ # raise APIError(error_msg)
1143
+ # return response.json()
1144
+ # except requests.RequestException as e:
1145
+ # raise APIError(f"Model training request failed: {str(e)}")
1146
+
1147
+
1148
+ def train_model(self, model_name: str, training_dataset: str, task_type: str, model_category: str, architecture: str, region: str, hyperparameters: dict = None) -> dict:
1127
1149
  """
1128
1150
  Train a model using the external model training API.
1129
-
1151
+
1130
1152
  Args:
1131
1153
  model_name (str): The name of the model to train.
1132
- training_data (dict): Dictionary containing training data parameters.
1133
-
1154
+ training_dataset (str): The training dataset identifier.
1155
+ task_type (str): The type of ML task (e.g., regression, classification).
1156
+ model_category (str): The category of model (e.g., random_forest).
1157
+ architecture (str): The model architecture.
1158
+ region (str): The region identifier.
1159
+ hyperparameters (dict, optional): Additional hyperparameters for training.
1160
+
1134
1161
  Returns:
1135
1162
  dict: The response from the model training API.
1136
1163
  """
1137
- endpoint = "https://modeltraining-573248941006.australia-southeast1.run.app/train_model"
1138
1164
  payload = {
1139
1165
  "model_name": model_name,
1140
- "training_data": training_data
1166
+ "training_dataset": training_dataset,
1167
+ "task_type": task_type,
1168
+ "model_category": model_category,
1169
+ "architecture": architecture,
1170
+ "region": region,
1171
+ "hyperparameters": hyperparameters
1141
1172
  }
1173
+ endpoint = f"{self.url.rstrip('/')}/train_model"
1174
+ print("the payload is ", payload)
1142
1175
  try:
1143
1176
  response = self.session.post(endpoint, json=payload, timeout=self.timeout, verify=self.verify)
1144
1177
  if not response.ok:
@@ -1155,22 +1188,158 @@ class BaseClient:
1155
1188
  except requests.RequestException as e:
1156
1189
  raise APIError(f"Model training request failed: {str(e)}")
1157
1190
 
1158
- def deploy_model(self, dataset: str, product:str, model_name:str, input_expression: str, model_training_job_name: str, uid: str, dates_iso8601: list):
1159
- # we have the dataset and we have the product, and we have the model name, we need to create a new json file and add that to the dataset as our virtual dataset
1160
- # upload the script to the bucket, the script should be able to download the model and do the inferencing
1161
- # we need to upload the the json to the to the dataset as our virtual dataset
1162
- # then we do nothing and wait for the user to make the request call to the explorer
1163
- # we should have a uniform script for the random forest deployment
1164
- # create a script for each model
1165
- # upload script to google bucket,
1166
- #
1191
+ # Mass Stats methods
1192
+ def combine_tiles(self,
1193
+ data_name: str,
1194
+ usezarr: bool,
1195
+ overwrite: bool,
1196
+ output : str) -> dict:
1197
+
1198
+ if not self.mass_stats:
1199
+ from terrakio_core.mass_stats import MassStats
1200
+ if not self.url or not self.key:
1201
+ raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
1202
+ self.mass_stats = MassStats(
1203
+ base_url=self.url,
1204
+ api_key=self.key,
1205
+ verify=self.verify,
1206
+ timeout=self.timeout
1207
+ )
1208
+ return self.mass_stats.combine_tiles(data_name, usezarr, overwrite, output)
1209
+
1210
+
1211
+
1212
+ def generate_combine_tiles(
1213
+ self,
1214
+ name: str,
1215
+ aoi: str,
1216
+ expression: str,
1217
+ output: str,
1218
+ tile_size: float = 128.0,
1219
+ crs: str = "epsg:4326",
1220
+ res: float = 0.0001,
1221
+ region: str = "aus",
1222
+ to_crs: str = "epsg:4326",
1223
+ overwrite: bool = True,
1224
+ skip_existing: bool = False,
1225
+ non_interactive: bool = True,
1226
+ usezarr: bool = False,
1227
+ poll_interval: int = 30 # seconds between job status checks
1228
+ ) -> dict:
1229
+
1230
+ from terrakio_core.generation.tiles import tiles
1231
+ import tempfile
1232
+ import time
1233
+
1234
+ body, reqs, groups = tiles(
1235
+ name = name,
1236
+ aoi = aoi,
1237
+ expression = expression,
1238
+ output = output,
1239
+ tile_size = tile_size,
1240
+ crs = crs,
1241
+ res = res,
1242
+ region = region,
1243
+ to_crs = to_crs,
1244
+ fully_cover = True,
1245
+ overwrite = overwrite,
1246
+ skip_existing = skip_existing,
1247
+ non_interactive = non_interactive
1248
+ )
1249
+
1250
+ # Create temp json files before upload
1251
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
1252
+ tempreq.write(reqs)
1253
+ tempreqname = tempreq.name
1254
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempmanifest:
1255
+ tempmanifest.write(groups)
1256
+ tempmanifestname = tempmanifest.name
1257
+
1258
+ if not self.mass_stats:
1259
+ from terrakio_core.mass_stats import MassStats
1260
+ if not self.url or not self.key:
1261
+ raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
1262
+ self.mass_stats = MassStats(
1263
+ base_url=self.url,
1264
+ api_key=self.key,
1265
+ verify=self.verify,
1266
+ timeout=self.timeout
1267
+ )
1268
+
1269
+ task_id = self.mass_stats.execute_job(
1270
+ name=body["name"],
1271
+ region=body["region"],
1272
+ output=body["output"],
1273
+ config = {},
1274
+ overwrite=body["overwrite"],
1275
+ skip_existing=body["skip_existing"],
1276
+ request_json=tempreqname,
1277
+ manifest_json=tempmanifestname,
1278
+ )
1279
+
1280
+ ### Start combining tiles when generation-tiles job is done
1281
+ start_time = time.time()
1282
+ status = None
1283
+
1284
+ while True:
1285
+ try:
1286
+ taskid = task_id['task_id']
1287
+ trackinfo = self.mass_stats.track_job([taskid])
1288
+ status = trackinfo[taskid]['status']
1289
+
1290
+ # Check completion states
1291
+ if status == 'Completed':
1292
+ print('Tiles generated successfully!')
1293
+ break
1294
+ elif status in ['Failed', 'Cancelled', 'Error']:
1295
+ raise RuntimeError(f"Job {taskid} failed with status: {status}")
1296
+ else:
1297
+ # Job is still running
1298
+ elapsed_time = time.time() - start_time
1299
+ print(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s", end='\r')
1300
+
1301
+ # Sleep before next check
1302
+ time.sleep(poll_interval)
1303
+
1304
+
1305
+ except KeyboardInterrupt:
1306
+ print(f"\nInterrupted! Job {taskid} is still running in the background.")
1307
+ raise
1308
+ except Exception as e:
1309
+ print(f"\nError tracking job: {e}")
1310
+ raise
1311
+
1312
+ # Clean up temporary files
1313
+ import os
1314
+ os.unlink(tempreqname)
1315
+ os.unlink(tempmanifestname)
1316
+
1167
1317
 
1318
+ # Start combining tiles
1319
+ if not self.mass_stats:
1320
+ from terrakio_core.mass_stats import MassStats
1321
+ if not self.url or not self.key:
1322
+ raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
1323
+ self.mass_stats = MassStats(
1324
+ base_url=self.url,
1325
+ api_key=self.key,
1326
+ verify=self.verify,
1327
+ timeout=self.timeout
1328
+ )
1329
+
1330
+ return self.mass_stats.combine_tiles(body["name"], usezarr, body["overwrite"], body["output"])
1331
+
1332
+
1333
+
1334
+
1335
+
1336
+
1337
+
1338
+
1339
+
1340
+ def deploy_model(self, dataset: str, product:str, model_name:str, input_expression: str, model_training_job_name: str, uid: str, dates_iso8601: list):
1168
1341
  script_content = self._generate_script(model_name, product, model_training_job_name, uid)
1169
- # self.create_dataset(collection = "terrakio-datasets", input = input, )
1170
- # we have the script, we need to upload it to the bucket
1171
1342
  script_name = f"{product}.py"
1172
- print("the script content is ", script_content)
1173
- print("the script name is ", script_name)
1174
1343
  self._upload_script_to_bucket(script_content, script_name, model_training_job_name, uid)
1175
1344
  # after uploading the script, we need to create a new virtual dataset
1176
1345
  self._create_dataset(name = dataset, collection = "terrakio-datasets", products = [product], path = f"gs://terrakio-mass-requests/{uid}/{model_training_job_name}/inference_scripts", input = input_expression, dates_iso8601 = dates_iso8601, padding = 0)
@@ -1238,7 +1407,6 @@ class BaseClient:
1238
1407
  client = storage.Client()
1239
1408
  bucket = client.get_bucket('terrakio-mass-requests')
1240
1409
  blob = bucket.blob(f'{uid}/{model_training_job_name}/inference_scripts/{script_name}')
1241
- # the first layer is the uid, the second layer is the model training job name
1242
1410
  blob.upload_from_string(script_content, content_type='text/plain')
1243
1411
  logging.info(f"Script uploaded successfully to {uid}/{model_training_job_name}/inference_scripts/{script_name}")
1244
1412
 
@@ -0,0 +1,18 @@
1
+ # terrakio_core/decorators.py
2
+ def admin_only_params(*restricted_params):
3
+ """
4
+ Decorator factory for restricting method parameters to admin users only.
5
+ """
6
+ def decorator(func):
7
+ def wrapper(self, *args, **kwargs):
8
+ if hasattr(self, '_is_admin') and self._is_admin:
9
+ return func(self, *args, **kwargs)
10
+
11
+ admin_params_used = set(kwargs.keys()) & set(restricted_params)
12
+ if admin_params_used:
13
+ raise PermissionError(f"Parameters {admin_params_used} are only available to admin users")
14
+
15
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k not in restricted_params}
16
+ return func(self, *args, **filtered_kwargs)
17
+ return wrapper
18
+ return decorator
@@ -0,0 +1,95 @@
1
+ ### implementing generation-tiles in python api
2
+ ### function should just generate the json file for mass_stats to pick up.
3
+
4
+ import geopandas as gpd
5
+ import shapely.geometry
6
+ import json
7
+ from rich import print
8
+
9
+ def escape_newline(string):
10
+ if isinstance(string, list):
11
+ return [s.replace('\\n', '\n') for s in string]
12
+ else:
13
+ return string.replace('\\n', '\n')
14
+
15
+ def get_bounds(aoi, crs, to_crs = None):
16
+ aoi : gpd.GeoDataFrame = gpd.read_file(aoi)
17
+ aoi = aoi.set_crs(crs, allow_override=True)
18
+ if to_crs:
19
+ aoi = aoi.to_crs(to_crs)
20
+ bounds = aoi.geometry[0].bounds
21
+ return *bounds, aoi
22
+
23
+ def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output, fully_cover=True):
24
+ i_max = int((x_max-x_min)/(tile_size*res))
25
+ j_max = int((y_max-y_min)/(tile_size*res))
26
+ if fully_cover:
27
+ i_max += 1
28
+ j_max += 1
29
+ for j in range(0, int(j_max)):
30
+ for i in range(0, int(i_max)):
31
+ #print(f"Processing tile {i} {j}")
32
+ x = x_min + i*(tile_size*res)
33
+ y = y_max - j*(tile_size*res)
34
+ bbox = shapely.geometry.box(x, y-(tile_size*res), x + (tile_size*res), y)
35
+ if not aoi.geometry[0].intersects(bbox):
36
+ continue
37
+ feat = {"type": "Feature", "geometry": bbox.__geo_interface__}
38
+ data = {
39
+ "feature": feat,
40
+ "in_crs": crs,
41
+ "out_crs": crs,
42
+ "resolution": res,
43
+ "expr" : expression,
44
+ "output" : output,
45
+ }
46
+ yield data, i , j
47
+
48
+
49
+ def tiles(
50
+ name: str,
51
+ aoi : str,
52
+ expression: str = "red=S2v2#(year,median).red@(year =2024) \n red",
53
+ output: str = "netcdf",
54
+ tile_size : float = 512,
55
+ crs : str = "epsg:3577",
56
+ res: float = 10,
57
+ region : str = "eu",
58
+ to_crs: str = None,
59
+ fully_cover: bool = True,
60
+ overwrite: bool = False,
61
+ skip_existing: bool = False,
62
+ non_interactive: bool = False,
63
+ ):
64
+
65
+ # Create requests for each tile
66
+ reqs = []
67
+ x_min, y_min, x_max, y_max, aoi = get_bounds(aoi, crs, to_crs)
68
+ #print(f"Bounds: {x_min}, {y_min}, {x_max}, {y_max}")
69
+
70
+ if to_crs is None:
71
+ to_crs = crs
72
+ for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output, fully_cover):
73
+ req_name = f"{name}_{i:02d}_{j:02d}"
74
+ reqs.append({"group": "tiles", "file": req_name, "request": tile_req})
75
+
76
+ #print(f"Generated {len(reqs)} tile requests.")
77
+
78
+
79
+ count = len(reqs)
80
+ groups = list(set(dic["group"] for dic in reqs))
81
+
82
+ body = {
83
+ "name" : name,
84
+ "output" : output,
85
+ "region" : region,
86
+ "size" : count,
87
+ "overwrite" : overwrite,
88
+ "non_interactive": non_interactive,
89
+ "skip_existing" : skip_existing,
90
+ }
91
+ request_json = json.dumps(reqs)
92
+ manifest_json = json.dumps(groups)
93
+
94
+ return body, request_json, manifest_json
95
+
@@ -0,0 +1,496 @@
1
+ import requests
2
+ from typing import Optional, Dict, Any, List
3
+ import json
4
+ import json as json_lib
5
+ import gzip
6
+
7
+ class MassStats:
8
+ def __init__(self, base_url: str, api_key: str, verify: bool = True, timeout: int = 60):
9
+ self.base_url = base_url.rstrip('/')
10
+ self.api_key = api_key
11
+ self.verify = verify
12
+ self.timeout = timeout
13
+ self.session = requests.Session()
14
+ self.session.headers.update({
15
+ 'x-api-key': self.api_key
16
+ })
17
+
18
+ def _upload_file(self, file_path: str, url: str, use_gzip: bool = False):
19
+ """
20
+ Helper method to upload a JSON file to a signed URL.
21
+
22
+ Args:
23
+ file_path: Path to the JSON file
24
+ url: Signed URL to upload to
25
+ use_gzip: Whether to compress the file with gzip
26
+ """
27
+ try:
28
+ with open(file_path, 'r') as file:
29
+ json_data = json_lib.load(file)
30
+ except FileNotFoundError:
31
+ raise FileNotFoundError(f"JSON file not found: {file_path}")
32
+ except json.JSONDecodeError as e:
33
+ raise ValueError(f"Invalid JSON in file {file_path}: {e}")
34
+
35
+ # Check if using simplejson and support ignore_nan
36
+ if hasattr(json_lib, 'dumps') and 'ignore_nan' in json_lib.dumps.__code__.co_varnames:
37
+ dumps_kwargs = {'ignore_nan': True}
38
+ else:
39
+ dumps_kwargs = {}
40
+
41
+ if use_gzip:
42
+ # Serialize and compress the JSON data
43
+ body = gzip.compress(json_lib.dumps(json_data, **dumps_kwargs).encode('utf-8'))
44
+ headers = {
45
+ 'Content-Type': 'application/json',
46
+ 'Content-Encoding': 'gzip'
47
+ }
48
+ else:
49
+ body = json_lib.dumps(json_data, **dumps_kwargs).encode('utf-8')
50
+ headers = {
51
+ 'Content-Type': 'application/json'
52
+ }
53
+
54
+ # Make the PUT request to the signed URL
55
+ response = requests.put(
56
+ url,
57
+ data=body,
58
+ headers=headers
59
+ )
60
+
61
+ return response
62
+
63
+
64
+ # def _download_file(self, url: str, output_path: str) -> str:
65
+ # """
66
+ # Helper method to download a file from a signed URL.
67
+
68
+ # Args:
69
+ # url: Signed URL to download from
70
+ # output_path: Path where the file should be saved
71
+
72
+ # Returns:
73
+ # str: Path to the downloaded file
74
+ # """
75
+
76
+ # try:
77
+ # response = requests.get(
78
+ # url,
79
+ # verify=self.verify,
80
+ # timeout=self.timeout
81
+ # )
82
+ # response.raise_for_status()
83
+
84
+ # # Download and write the file
85
+ # with open(output_path, 'wb') as file:
86
+ # file.write(response.content)
87
+ # print(f"File downloaded successfully to {output_path}")
88
+ # return output_path
89
+
90
+ # except requests.exceptions.RequestException as e:
91
+ # raise Exception(f"Error downloading file from {url}: {e}")
92
+ # except IOError as e:
93
+ # raise Exception(f"Error writing file to {output_path}: {e}")
94
+
95
+
96
+
97
+
98
+ def upload_request(
99
+ self,
100
+ name: str,
101
+ size: int,
102
+ region: List[str],
103
+ output: str,
104
+ config: Dict[str, Any],
105
+ location: Optional[str] = None,
106
+ force_loc: Optional[bool] = None,
107
+ overwrite: bool = False,
108
+ server: Optional[str] = None,
109
+ skip_existing: bool = False,
110
+ ) -> Dict[str, Any]:
111
+ """
112
+ Initiate a mass stats upload job.
113
+
114
+ Args:
115
+ name: Name of the job
116
+ size: Size of the job
117
+ region: Region to run job [aus, eu, us]
118
+ output: Output type
119
+ config: Configuration dictionary
120
+ location: (Optional) Location for the upload
121
+ force_loc: Force location usage
122
+ overwrite: Overwrite existing data
123
+ server: Optional server
124
+ skip_existing: Skip existing files
125
+ """
126
+
127
+
128
+
129
+ # Step 2: Create the upload job and get signed URLs
130
+ url = f"{self.base_url}/mass_stats/upload"
131
+
132
+ data = {
133
+ "name": name,
134
+ "size": size,
135
+ "region": region,
136
+ "output": output,
137
+ "config": config,
138
+ "overwrite": overwrite,
139
+ "skip_existing": skip_existing
140
+ }
141
+
142
+ if location is not None:
143
+ data["location"] = location
144
+ if force_loc is not None:
145
+ data["force_loc"] = force_loc
146
+ if server is not None:
147
+ data["server"] = server
148
+ response = self.session.post(
149
+ url,
150
+ json=data,
151
+ verify=self.verify,
152
+ timeout=self.timeout
153
+ )
154
+ return response.json()
155
+
156
+
157
+ # def construct_download_url(
158
+ # self,
159
+ # name: str,
160
+ # output: str,
161
+ # region: Optional[str] = None,
162
+ # ) -> Dict[str, Any]:
163
+ # """
164
+ # Request a signed download URL for a file.
165
+
166
+ # Args:
167
+ # name: job name
168
+ # file_type: Type of file to download (e.g., "output", "manifest", "log")
169
+ # region: Region where the file is stored
170
+
171
+ # Returns:
172
+ # Dict containing download_url and file metadata
173
+ # """
174
+ # url = f"{self.base_url}/mass_stats/download"
175
+
176
+ # data = {
177
+ # "name": name,
178
+ # "output": output
179
+ # }
180
+
181
+ # if region is not None:
182
+ # data["region"] = region
183
+
184
+ # response = self.session.post(
185
+ # url,
186
+ # json=data,
187
+ # verify=self.verify,
188
+ # timeout=self.timeout
189
+ # )
190
+
191
+ # return response.json()
192
+
193
+
194
+ # def testdownload(
195
+ # self,
196
+ # name: str,
197
+ # region: str,
198
+ # output: str,
199
+ # ):
200
+ # upload_result = self.construct_download_url(name, region, output)
201
+ # return upload_result
202
+
203
+
204
+
205
+ def execute_job(
206
+ self,
207
+ name: str,
208
+ region: str,
209
+ output: str,
210
+ config: Dict[str, Any],
211
+ overwrite: bool = False,
212
+ skip_existing: bool = False,
213
+ request_json: Optional[str] = None,
214
+ manifest_json: Optional[str] = None,
215
+ location: Optional[str] = None,
216
+ force_loc: Optional[bool] = None,
217
+ server: Optional[str] = None
218
+ ) -> Dict[str, Any]:
219
+ # Step 1: Calculate size from request JSON file if provided
220
+ size = 0
221
+ if request_json is not None:
222
+ try:
223
+ with open(request_json, 'r') as file:
224
+ request_data = json_lib.load(file)
225
+
226
+ if isinstance(request_data, list):
227
+ size = len(request_data)
228
+ else:
229
+ raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
230
+
231
+ except FileNotFoundError:
232
+ raise FileNotFoundError(f"Request JSON file not found: {request_json}")
233
+ except json.JSONDecodeError as e:
234
+ raise ValueError(f"Invalid JSON in request file {request_json}: {e}")
235
+
236
+ upload_result = self.upload_request(name, size, region, output, config, location, force_loc, overwrite, server, skip_existing)
237
+
238
+ # Step 3: Upload JSON files if provided
239
+ if request_json is not None or manifest_json is not None:
240
+ requests_url = upload_result.get('requests_url')
241
+ manifest_url = upload_result.get('manifest_url')
242
+
243
+ if request_json is not None:
244
+ if not requests_url:
245
+ raise ValueError("No requests_url returned from server for request JSON upload")
246
+
247
+ try:
248
+ requests_response = self._upload_file(request_json, requests_url, use_gzip=True)
249
+ if requests_response.status_code not in [200, 201, 204]:
250
+ print(f"Requests upload error: {requests_response.text}")
251
+ raise Exception(f"Failed to upload request JSON: {requests_response.text}")
252
+ except Exception as e:
253
+ raise Exception(f"Error uploading request JSON file {request_json}: {e}")
254
+
255
+ if manifest_json is not None:
256
+ if not manifest_url:
257
+ raise ValueError("No manifest_url returned from server for manifest JSON upload")
258
+
259
+ try:
260
+ manifest_response = self._upload_file(manifest_json, manifest_url, use_gzip=False)
261
+ if manifest_response.status_code not in [200, 201, 204]:
262
+ print(f"Manifest upload error: {manifest_response.text}")
263
+ raise Exception(f"Failed to upload manifest JSON: {manifest_response.text}")
264
+ except Exception as e:
265
+ raise Exception(f"Error uploading manifest JSON file {manifest_json}: {e}")
266
+
267
+
268
+ start_job_task_id =self.start_job(upload_result.get("id"))
269
+ return start_job_task_id
270
+
271
+
272
+ def start_job(self, task_id: str) -> Dict[str, Any]:
273
+ """
274
+ Start a mass stats job by task ID.
275
+ """
276
+ url = f"{self.base_url}/mass_stats/start/{task_id}"
277
+ response = self.session.post(url, verify=self.verify, timeout=self.timeout)
278
+ response.raise_for_status()
279
+ return response.json()
280
+
281
+ def get_task_id(self, name: str, stage: str, uid: Optional[str] = None) -> Dict[str, Any]:
282
+ """
283
+ Get the task ID for a mass stats job by name and stage (and optionally user ID).
284
+ """
285
+ url = f"{self.base_url}/mass_stats/job_id?name={name}&stage={stage}"
286
+ if uid is not None:
287
+ url += f"&uid={uid}"
288
+ response = self.session.get(url, verify=self.verify, timeout=self.timeout)
289
+ print("response text is ", response.text)
290
+ return response.json()
291
+
292
+ def track_job(self, ids: Optional[list] = None) -> Dict[str, Any]:
293
+ """
294
+ Track the status of one or more mass stats jobs.
295
+ If ids is None, gets progress for all of the user's jobs.
296
+ """
297
+ url = f"{self.base_url}/mass_stats/track"
298
+ data = {"ids": ids} if ids is not None else {}
299
+ response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
300
+ response.raise_for_status()
301
+ return response.json()
302
+
303
+ def get_history(self, limit: int = 100) -> Dict[str, Any]:
304
+ """
305
+ Get the history of mass stats jobs.
306
+ """
307
+ url = f"{self.base_url}/mass_stats/history"
308
+ params = {"limit": limit}
309
+ response = self.session.get(url, params=params, verify=self.verify, timeout=self.timeout)
310
+ response.raise_for_status()
311
+ return response.json()
312
+
313
+ def start_post_processing(
314
+ self,
315
+ process_name: str,
316
+ data_name: str,
317
+ output: str,
318
+ consumer_path: str,
319
+ overwrite: bool = False
320
+ ) -> Dict[str, Any]:
321
+ """
322
+ Start post processing for a mass stats job.
323
+ Args:
324
+ process_name: Folder to store output
325
+ data_name: Name of job used to create data
326
+ output: Output type
327
+ consumer_path: Path to the post processing script (Python file)
328
+ overwrite: Overwrite existing post processing output in same location
329
+ Returns:
330
+ Dict with task_id
331
+ """
332
+ url = f"{self.base_url}/mass_stats/post_process"
333
+ files = {
334
+ 'consumer': (consumer_path, open(consumer_path, 'rb'), 'text/x-python')
335
+ }
336
+ data = {
337
+ 'process_name': process_name,
338
+ 'data_name': data_name,
339
+ 'output': output,
340
+ 'overwrite': str(overwrite).lower()
341
+ }
342
+ response = self.session.post(url, data=data, files=files, verify=self.verify, timeout=self.timeout)
343
+ print("the response is ", response.text)
344
+ # response.raise_for_status()
345
+ return response.json()
346
+
347
+ def download_results(
348
+ self,
349
+ id: Optional[str] = None,
350
+ force_loc: bool = False,
351
+ bucket: Optional[str] = None,
352
+ location: Optional[str] = None,
353
+ output: Optional[str] = None,
354
+ file_name: Optional[str] = None
355
+ ) -> bytes:
356
+ """
357
+ Download results from a mass stats job or arbitrary results if force_loc is True.
358
+ Returns the content of the .zip file.
359
+ """
360
+ url = f"{self.base_url}/mass_stats/download"
361
+ data = {}
362
+ if id is not None:
363
+ data["id"] = id
364
+ if force_loc:
365
+ data["force_loc"] = True
366
+ if bucket is not None:
367
+ data["bucket"] = bucket
368
+ if location is not None:
369
+ data["location"] = location
370
+ if output is not None:
371
+ data["output"] = output
372
+ if file_name is not None:
373
+ data["file_name"] = file_name
374
+ response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
375
+ print("the response is ", response.text)
376
+ # response.raise_for_status()
377
+ print("the response content is ", response.content)
378
+ return response.content
379
+
380
+ def cancel_job(self, id: str) -> Dict[str, Any]:
381
+ """
382
+ Cancel a mass stats job by ID.
383
+ """
384
+ url = f"{self.base_url}/mass_stats/cancel/{id}"
385
+ response = self.session.post(url, verify=self.verify, timeout=self.timeout)
386
+ response.raise_for_status()
387
+ return response.json()
388
+
389
+ def cancel_all_jobs(self) -> Dict[str, Any]:
390
+ """
391
+ Cancel all mass stats jobs for the user.
392
+ """
393
+ url = f"{self.base_url}/mass_stats/cancel"
394
+ response = self.session.post(url, verify=self.verify, timeout=self.timeout)
395
+ response.raise_for_status()
396
+ return response.json()
397
+
398
+ def create_pyramids(self, name: str, levels: int, config: Dict[str, Any]) -> Dict[str, Any]:
399
+ """
400
+ Create pyramids for a dataset.
401
+ Args:
402
+ name: Name for the pyramid job
403
+ levels: Number of zoom levels to compute
404
+ config: Dataset config (mapping)
405
+ Returns:
406
+ Dict with task_id
407
+ """
408
+ url = f"{self.base_url}/pyramids/create"
409
+ data = {
410
+ "name": name,
411
+ "levels": levels,
412
+ "config": config
413
+ }
414
+ response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
415
+ print("the url is ", url)
416
+ print("the response is ", response.text)
417
+ print("the response status code is ", response.status_code)
418
+ # response.raise_for_status()
419
+ return response.json()
420
+
421
+ def random_sample(
422
+ self,
423
+ name: str,
424
+ config: dict,
425
+ aoi: dict,
426
+ samples: int,
427
+ year_range: list,
428
+ crs: str,
429
+ tile_size: int,
430
+ res: float,
431
+ output: str,
432
+ server: str,
433
+ region: str,
434
+ bucket: str,
435
+ overwrite: bool = False
436
+ ) -> Dict[str, Any]:
437
+ """
438
+ Submit a random sample job.
439
+ """
440
+ if year_range is None or len(year_range) != 2:
441
+ raise ValueError("year_range must be a list of two integers")
442
+ start_year, end_year = year_range
443
+ if start_year is None or end_year is None:
444
+ raise ValueError("Both start_year and end_year must be provided for year_range.")
445
+
446
+ url = f"{self.base_url}/random_sample"
447
+ data = {
448
+ "name": name,
449
+ "overwrite": overwrite,
450
+ "config": config,
451
+ "aoi": aoi,
452
+ "samples": samples,
453
+ "year_range": [start_year, end_year],
454
+ "crs": crs,
455
+ "tile_size": tile_size,
456
+ "res": res,
457
+ "output": output,
458
+ "server": server,
459
+ "region": region,
460
+ "bucket": bucket
461
+ }
462
+ print("the data is ", data)
463
+ print("the url is ", url)
464
+ response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
465
+ print("Status code:", response.status_code)
466
+ print("Response text:", response.text)
467
+ # response.raise_for_status()
468
+ return response.json()
469
+
470
+
471
+ ### Adding the wrapper function to call endpoint /mass_stats/combine_tiles
472
+ def combine_tiles(
473
+ self,
474
+ data_name: str,
475
+ usezarr: bool = False,
476
+ overwrite: bool = True,
477
+ output : str = "netcdf"
478
+ ) -> Dict[str, Any]:
479
+
480
+ url = f"{self.base_url}/mass_stats/combine_tiles"
481
+ request_body = {
482
+ 'data_name': data_name,
483
+ 'usezarr': str(usezarr).lower(),
484
+ 'output': output,
485
+ 'overwrite': str(overwrite).lower()
486
+ }
487
+ print(f"Request body: {json.dumps(request_body, indent=2)}")
488
+ response = self.session.post(url, json=request_body, verify=self.verify, timeout=self.timeout)
489
+ print(f"Response text: {response.text}")
490
+ return response.json()
491
+
492
+
493
+
494
+
495
+
496
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: terrakio-core
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Core components for Terrakio API clients
5
5
  Author-email: Yupeng Chao <yupeng@haizea.com.au>
6
6
  Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
@@ -5,6 +5,7 @@ terrakio_core/auth.py
5
5
  terrakio_core/client.py
6
6
  terrakio_core/config.py
7
7
  terrakio_core/dataset_management.py
8
+ terrakio_core/decorators.py
8
9
  terrakio_core/exceptions.py
9
10
  terrakio_core/group_access_management.py
10
11
  terrakio_core/mass_stats.py
@@ -14,4 +15,5 @@ terrakio_core.egg-info/PKG-INFO
14
15
  terrakio_core.egg-info/SOURCES.txt
15
16
  terrakio_core.egg-info/dependency_links.txt
16
17
  terrakio_core.egg-info/requires.txt
17
- terrakio_core.egg-info/top_level.txt
18
+ terrakio_core.egg-info/top_level.txt
19
+ terrakio_core/generation/tiles.py
@@ -1,262 +0,0 @@
1
- import requests
2
- from typing import Optional, Dict, Any
3
-
4
- class MassStats:
5
- def __init__(self, base_url: str, api_key: str, verify: bool = True, timeout: int = 60):
6
- self.base_url = base_url.rstrip('/')
7
- self.api_key = api_key
8
- self.verify = verify
9
- self.timeout = timeout
10
- self.session = requests.Session()
11
- self.session.headers.update({
12
- 'x-api-key': self.api_key
13
- })
14
-
15
- def upload_request(
16
- self,
17
- name: str,
18
- size: int,
19
- bucket: str,
20
- output: str,
21
- location: Optional[str] = None,
22
- force_loc: bool = False,
23
- config: Optional[Dict[str, Any]] = None,
24
- overwrite: bool = False,
25
- server: Optional[str] = None,
26
- skip_existing: bool = False
27
- ) -> Dict[str, Any]:
28
- """
29
- Initiate a mass stats upload job.
30
-
31
- Args:
32
- name: Name of the job
33
- size: Size of the data
34
- bucket: Storage bucket
35
- output: Output path or identifier
36
- location: (Optional) Location for the upload
37
- force_loc: Force location usage
38
- config: Optional configuration dictionary
39
- overwrite: Overwrite existing data
40
- server: Optional server
41
- skip_existing: Skip existing files
42
- """
43
- url = f"{self.base_url}/mass_stats/upload"
44
- data = {
45
- "name": name,
46
- "size": size,
47
- "bucket": bucket,
48
- "output": output,
49
- "force_loc": force_loc,
50
- "overwrite": overwrite,
51
- "skip_existing": skip_existing
52
- }
53
- if location is not None:
54
- data["location"] = location
55
- if config is not None:
56
- data["config"] = config
57
- if server is not None:
58
- data["server"] = server
59
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
60
- print("the response is ", response.text)
61
- # response.raise_for_status()
62
- return response.json()
63
-
64
- def start_job(self, task_id: str) -> Dict[str, Any]:
65
- """
66
- Start a mass stats job by task ID.
67
- """
68
- url = f"{self.base_url}/mass_stats/start/{task_id}"
69
- print("the self session header is ", self.session.headers)
70
- response = self.session.post(url, verify=self.verify, timeout=self.timeout)
71
- response.raise_for_status()
72
- return response.json()
73
-
74
- def get_task_id(self, name: str, stage: str, uid: Optional[str] = None) -> Dict[str, Any]:
75
- """
76
- Get the task ID for a mass stats job by name and stage (and optionally user ID).
77
- """
78
- url = f"{self.base_url}/mass_stats/job_id?name={name}&stage={stage}"
79
- if uid is not None:
80
- url += f"&uid={uid}"
81
- response = self.session.get(url, verify=self.verify, timeout=self.timeout)
82
- print("response text is ", response.text)
83
- return response.json()
84
-
85
- def track_job(self, ids: Optional[list] = None) -> Dict[str, Any]:
86
- """
87
- Track the status of one or more mass stats jobs.
88
- If ids is None, gets progress for all of the user's jobs.
89
- """
90
- url = f"{self.base_url}/mass_stats/track"
91
- data = {"ids": ids} if ids is not None else {}
92
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
93
- response.raise_for_status()
94
- return response.json()
95
-
96
- def get_history(self, limit: int = 100) -> Dict[str, Any]:
97
- """
98
- Get the history of mass stats jobs.
99
- """
100
- url = f"{self.base_url}/mass_stats/history"
101
- params = {"limit": limit}
102
- response = self.session.get(url, params=params, verify=self.verify, timeout=self.timeout)
103
- response.raise_for_status()
104
- return response.json()
105
-
106
- def start_post_processing(
107
- self,
108
- process_name: str,
109
- data_name: str,
110
- output: str,
111
- consumer_path: str,
112
- overwrite: bool = False
113
- ) -> Dict[str, Any]:
114
- """
115
- Start post processing for a mass stats job.
116
- Args:
117
- process_name: Folder to store output
118
- data_name: Name of job used to create data
119
- output: Output type
120
- consumer_path: Path to the post processing script (Python file)
121
- overwrite: Overwrite existing post processing output in same location
122
- Returns:
123
- Dict with task_id
124
- """
125
- url = f"{self.base_url}/mass_stats/post_process"
126
- files = {
127
- 'consumer': (consumer_path, open(consumer_path, 'rb'), 'text/x-python')
128
- }
129
- data = {
130
- 'process_name': process_name,
131
- 'data_name': data_name,
132
- 'output': output,
133
- 'overwrite': str(overwrite).lower()
134
- }
135
- response = self.session.post(url, data=data, files=files, verify=self.verify, timeout=self.timeout)
136
- print("the response is ", response.text)
137
- # response.raise_for_status()
138
- return response.json()
139
-
140
- def download_results(
141
- self,
142
- id: Optional[str] = None,
143
- force_loc: bool = False,
144
- bucket: Optional[str] = None,
145
- location: Optional[str] = None,
146
- output: Optional[str] = None,
147
- file_name: Optional[str] = None
148
- ) -> bytes:
149
- """
150
- Download results from a mass stats job or arbitrary results if force_loc is True.
151
- Returns the content of the .zip file.
152
- """
153
- url = f"{self.base_url}/mass_stats/download"
154
- data = {}
155
- if id is not None:
156
- data["id"] = id
157
- if force_loc:
158
- data["force_loc"] = True
159
- if bucket is not None:
160
- data["bucket"] = bucket
161
- if location is not None:
162
- data["location"] = location
163
- if output is not None:
164
- data["output"] = output
165
- if file_name is not None:
166
- data["file_name"] = file_name
167
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
168
- print("the response is ", response.text)
169
- # response.raise_for_status()
170
- print("the response content is ", response.content)
171
- return response.content
172
-
173
- def cancel_job(self, id: str) -> Dict[str, Any]:
174
- """
175
- Cancel a mass stats job by ID.
176
- """
177
- url = f"{self.base_url}/mass_stats/cancel/{id}"
178
- response = self.session.post(url, verify=self.verify, timeout=self.timeout)
179
- response.raise_for_status()
180
- return response.json()
181
-
182
- def cancel_all_jobs(self) -> Dict[str, Any]:
183
- """
184
- Cancel all mass stats jobs for the user.
185
- """
186
- url = f"{self.base_url}/mass_stats/cancel"
187
- response = self.session.post(url, verify=self.verify, timeout=self.timeout)
188
- response.raise_for_status()
189
- return response.json()
190
-
191
- def create_pyramids(self, name: str, levels: int, config: Dict[str, Any]) -> Dict[str, Any]:
192
- """
193
- Create pyramids for a dataset.
194
- Args:
195
- name: Name for the pyramid job
196
- levels: Number of zoom levels to compute
197
- config: Dataset config (mapping)
198
- Returns:
199
- Dict with task_id
200
- """
201
- url = f"{self.base_url}/pyramids/create"
202
- data = {
203
- "name": name,
204
- "levels": levels,
205
- "config": config
206
- }
207
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
208
- print("the url is ", url)
209
- print("the response is ", response.text)
210
- print("the response status code is ", response.status_code)
211
- # response.raise_for_status()
212
- return response.json()
213
-
214
- def random_sample(
215
- self,
216
- name: str,
217
- config: dict,
218
- aoi: dict,
219
- samples: int,
220
- year_range: list,
221
- crs: str,
222
- tile_size: int,
223
- res: float,
224
- output: str,
225
- server: str,
226
- region: str,
227
- bucket: str,
228
- overwrite: bool = False
229
- ) -> Dict[str, Any]:
230
- """
231
- Submit a random sample job.
232
- """
233
- if year_range is None or len(year_range) != 2:
234
- raise ValueError("year_range must be a list of two integers")
235
- start_year, end_year = year_range
236
- if start_year is None or end_year is None:
237
- raise ValueError("Both start_year and end_year must be provided for year_range.")
238
-
239
- url = f"{self.base_url}/random_sample"
240
- data = {
241
- "name": name,
242
- "overwrite": overwrite,
243
- "config": config,
244
- "aoi": aoi,
245
- "samples": samples,
246
- "year_range": [start_year, end_year],
247
- "crs": crs,
248
- "tile_size": tile_size,
249
- "res": res,
250
- "output": output,
251
- "server": server,
252
- "region": region,
253
- "bucket": bucket
254
- }
255
- print("the data is ", data)
256
- print("the url is ", url)
257
- response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
258
- print("Status code:", response.status_code)
259
- print("Response text:", response.text)
260
- # response.raise_for_status()
261
- return response.json()
262
-
File without changes
File without changes