terrakio-core 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/__init__.py +1 -1
- terrakio_core/client.py +203 -35
- terrakio_core/decorators.py +18 -0
- terrakio_core/generation/tiles.py +95 -0
- terrakio_core/mass_stats.py +251 -17
- {terrakio_core-0.3.0.dist-info → terrakio_core-0.3.1.dist-info}/METADATA +1 -1
- terrakio_core-0.3.1.dist-info/RECORD +16 -0
- terrakio_core-0.3.0.dist-info/RECORD +0 -14
- {terrakio_core-0.3.0.dist-info → terrakio_core-0.3.1.dist-info}/WHEEL +0 -0
- {terrakio_core-0.3.0.dist-info → terrakio_core-0.3.1.dist-info}/top_level.txt +0 -0
terrakio_core/__init__.py
CHANGED
terrakio_core/client.py
CHANGED
|
@@ -13,6 +13,7 @@ from shapely.geometry import shape, mapping
|
|
|
13
13
|
from shapely.geometry.base import BaseGeometry as ShapelyGeometry
|
|
14
14
|
from google.cloud import storage
|
|
15
15
|
from .exceptions import APIError, ConfigurationError
|
|
16
|
+
from .decorators import admin_only_params
|
|
16
17
|
import logging
|
|
17
18
|
import textwrap
|
|
18
19
|
|
|
@@ -536,8 +537,8 @@ class BaseClient:
|
|
|
536
537
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
537
538
|
self.close()
|
|
538
539
|
|
|
539
|
-
|
|
540
|
-
def
|
|
540
|
+
@admin_only_params('location', 'force_loc', 'server')
|
|
541
|
+
def execute_job(self, name, region, output, config, overwrite=False, skip_existing=False, request_json=None, manifest_json=None, location=None, force_loc=None, server="dev-au.terrak.io"):
|
|
541
542
|
if not self.mass_stats:
|
|
542
543
|
from terrakio_core.mass_stats import MassStats
|
|
543
544
|
if not self.url or not self.key:
|
|
@@ -548,20 +549,8 @@ class BaseClient:
|
|
|
548
549
|
verify=self.verify,
|
|
549
550
|
timeout=self.timeout
|
|
550
551
|
)
|
|
551
|
-
return self.mass_stats.
|
|
552
|
+
return self.mass_stats.execute_job(name, region, output, config, overwrite, skip_existing, request_json, manifest_json, location, force_loc, server)
|
|
552
553
|
|
|
553
|
-
def start_mass_stats_job(self, task_id):
|
|
554
|
-
if not self.mass_stats:
|
|
555
|
-
from terrakio_core.mass_stats import MassStats
|
|
556
|
-
if not self.url or not self.key:
|
|
557
|
-
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
558
|
-
self.mass_stats = MassStats(
|
|
559
|
-
base_url=self.url,
|
|
560
|
-
api_key=self.key,
|
|
561
|
-
verify=self.verify,
|
|
562
|
-
timeout=self.timeout
|
|
563
|
-
)
|
|
564
|
-
return self.mass_stats.start_job(task_id)
|
|
565
554
|
|
|
566
555
|
def get_mass_stats_task_id(self, name, stage, uid=None):
|
|
567
556
|
if not self.mass_stats:
|
|
@@ -1123,22 +1112,66 @@ class BaseClient:
|
|
|
1123
1112
|
return task_id
|
|
1124
1113
|
|
|
1125
1114
|
|
|
1126
|
-
def train_model(self, model_name: str, training_data: dict) -> dict:
|
|
1115
|
+
# def train_model(self, model_name: str, training_data: dict) -> dict:
|
|
1116
|
+
# """
|
|
1117
|
+
# Train a model using the external model training API.
|
|
1118
|
+
|
|
1119
|
+
# Args:
|
|
1120
|
+
# model_name (str): The name of the model to train.
|
|
1121
|
+
# training_data (dict): Dictionary containing training data parameters.
|
|
1122
|
+
|
|
1123
|
+
# Returns:
|
|
1124
|
+
# dict: The response from the model training API.
|
|
1125
|
+
# """
|
|
1126
|
+
# endpoint = "https://modeltraining-573248941006.australia-southeast1.run.app/train_model"
|
|
1127
|
+
# payload = {
|
|
1128
|
+
# "model_name": model_name,
|
|
1129
|
+
# "training_data": training_data
|
|
1130
|
+
# }
|
|
1131
|
+
# try:
|
|
1132
|
+
# response = self.session.post(endpoint, json=payload, timeout=self.timeout, verify=self.verify)
|
|
1133
|
+
# if not response.ok:
|
|
1134
|
+
# error_msg = f"Model training request failed: {response.status_code} {response.reason}"
|
|
1135
|
+
# try:
|
|
1136
|
+
# error_data = response.json()
|
|
1137
|
+
# if "detail" in error_data:
|
|
1138
|
+
# error_msg += f" - {error_data['detail']}"
|
|
1139
|
+
# except Exception:
|
|
1140
|
+
# if response.text:
|
|
1141
|
+
# error_msg += f" - {response.text}"
|
|
1142
|
+
# raise APIError(error_msg)
|
|
1143
|
+
# return response.json()
|
|
1144
|
+
# except requests.RequestException as e:
|
|
1145
|
+
# raise APIError(f"Model training request failed: {str(e)}")
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
def train_model(self, model_name: str, training_dataset: str, task_type: str, model_category: str, architecture: str, region: str, hyperparameters: dict = None) -> dict:
|
|
1127
1149
|
"""
|
|
1128
1150
|
Train a model using the external model training API.
|
|
1129
|
-
|
|
1151
|
+
|
|
1130
1152
|
Args:
|
|
1131
1153
|
model_name (str): The name of the model to train.
|
|
1132
|
-
|
|
1133
|
-
|
|
1154
|
+
training_dataset (str): The training dataset identifier.
|
|
1155
|
+
task_type (str): The type of ML task (e.g., regression, classification).
|
|
1156
|
+
model_category (str): The category of model (e.g., random_forest).
|
|
1157
|
+
architecture (str): The model architecture.
|
|
1158
|
+
region (str): The region identifier.
|
|
1159
|
+
hyperparameters (dict, optional): Additional hyperparameters for training.
|
|
1160
|
+
|
|
1134
1161
|
Returns:
|
|
1135
1162
|
dict: The response from the model training API.
|
|
1136
1163
|
"""
|
|
1137
|
-
endpoint = "https://modeltraining-573248941006.australia-southeast1.run.app/train_model"
|
|
1138
1164
|
payload = {
|
|
1139
1165
|
"model_name": model_name,
|
|
1140
|
-
"
|
|
1166
|
+
"training_dataset": training_dataset,
|
|
1167
|
+
"task_type": task_type,
|
|
1168
|
+
"model_category": model_category,
|
|
1169
|
+
"architecture": architecture,
|
|
1170
|
+
"region": region,
|
|
1171
|
+
"hyperparameters": hyperparameters
|
|
1141
1172
|
}
|
|
1173
|
+
endpoint = f"{self.url.rstrip('/')}/train_model"
|
|
1174
|
+
print("the payload is ", payload)
|
|
1142
1175
|
try:
|
|
1143
1176
|
response = self.session.post(endpoint, json=payload, timeout=self.timeout, verify=self.verify)
|
|
1144
1177
|
if not response.ok:
|
|
@@ -1155,22 +1188,158 @@ class BaseClient:
|
|
|
1155
1188
|
except requests.RequestException as e:
|
|
1156
1189
|
raise APIError(f"Model training request failed: {str(e)}")
|
|
1157
1190
|
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1191
|
+
# Mass Stats methods
|
|
1192
|
+
def combine_tiles(self,
|
|
1193
|
+
data_name: str,
|
|
1194
|
+
usezarr: bool,
|
|
1195
|
+
overwrite: bool,
|
|
1196
|
+
output : str) -> dict:
|
|
1197
|
+
|
|
1198
|
+
if not self.mass_stats:
|
|
1199
|
+
from terrakio_core.mass_stats import MassStats
|
|
1200
|
+
if not self.url or not self.key:
|
|
1201
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
1202
|
+
self.mass_stats = MassStats(
|
|
1203
|
+
base_url=self.url,
|
|
1204
|
+
api_key=self.key,
|
|
1205
|
+
verify=self.verify,
|
|
1206
|
+
timeout=self.timeout
|
|
1207
|
+
)
|
|
1208
|
+
return self.mass_stats.combine_tiles(data_name, usezarr, overwrite, output)
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
|
|
1212
|
+
def generate_combine_tiles(
|
|
1213
|
+
self,
|
|
1214
|
+
name: str,
|
|
1215
|
+
aoi: str,
|
|
1216
|
+
expression: str,
|
|
1217
|
+
output: str,
|
|
1218
|
+
tile_size: float = 128.0,
|
|
1219
|
+
crs: str = "epsg:4326",
|
|
1220
|
+
res: float = 0.0001,
|
|
1221
|
+
region: str = "aus",
|
|
1222
|
+
to_crs: str = "epsg:4326",
|
|
1223
|
+
overwrite: bool = True,
|
|
1224
|
+
skip_existing: bool = False,
|
|
1225
|
+
non_interactive: bool = True,
|
|
1226
|
+
usezarr: bool = False,
|
|
1227
|
+
poll_interval: int = 30 # seconds between job status checks
|
|
1228
|
+
) -> dict:
|
|
1229
|
+
|
|
1230
|
+
from terrakio_core.generation.tiles import tiles
|
|
1231
|
+
import tempfile
|
|
1232
|
+
import time
|
|
1233
|
+
|
|
1234
|
+
body, reqs, groups = tiles(
|
|
1235
|
+
name = name,
|
|
1236
|
+
aoi = aoi,
|
|
1237
|
+
expression = expression,
|
|
1238
|
+
output = output,
|
|
1239
|
+
tile_size = tile_size,
|
|
1240
|
+
crs = crs,
|
|
1241
|
+
res = res,
|
|
1242
|
+
region = region,
|
|
1243
|
+
to_crs = to_crs,
|
|
1244
|
+
fully_cover = True,
|
|
1245
|
+
overwrite = overwrite,
|
|
1246
|
+
skip_existing = skip_existing,
|
|
1247
|
+
non_interactive = non_interactive
|
|
1248
|
+
)
|
|
1249
|
+
|
|
1250
|
+
# Create temp json files before upload
|
|
1251
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempreq:
|
|
1252
|
+
tempreq.write(reqs)
|
|
1253
|
+
tempreqname = tempreq.name
|
|
1254
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tempmanifest:
|
|
1255
|
+
tempmanifest.write(groups)
|
|
1256
|
+
tempmanifestname = tempmanifest.name
|
|
1257
|
+
|
|
1258
|
+
if not self.mass_stats:
|
|
1259
|
+
from terrakio_core.mass_stats import MassStats
|
|
1260
|
+
if not self.url or not self.key:
|
|
1261
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
1262
|
+
self.mass_stats = MassStats(
|
|
1263
|
+
base_url=self.url,
|
|
1264
|
+
api_key=self.key,
|
|
1265
|
+
verify=self.verify,
|
|
1266
|
+
timeout=self.timeout
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
task_id = self.mass_stats.execute_job(
|
|
1270
|
+
name=body["name"],
|
|
1271
|
+
region=body["region"],
|
|
1272
|
+
output=body["output"],
|
|
1273
|
+
config = {},
|
|
1274
|
+
overwrite=body["overwrite"],
|
|
1275
|
+
skip_existing=body["skip_existing"],
|
|
1276
|
+
request_json=tempreqname,
|
|
1277
|
+
manifest_json=tempmanifestname,
|
|
1278
|
+
)
|
|
1279
|
+
|
|
1280
|
+
### Start combining tiles when generation-tiles job is done
|
|
1281
|
+
start_time = time.time()
|
|
1282
|
+
status = None
|
|
1283
|
+
|
|
1284
|
+
while True:
|
|
1285
|
+
try:
|
|
1286
|
+
taskid = task_id['task_id']
|
|
1287
|
+
trackinfo = self.mass_stats.track_job([taskid])
|
|
1288
|
+
status = trackinfo[taskid]['status']
|
|
1289
|
+
|
|
1290
|
+
# Check completion states
|
|
1291
|
+
if status == 'Completed':
|
|
1292
|
+
print('Tiles generated successfully!')
|
|
1293
|
+
break
|
|
1294
|
+
elif status in ['Failed', 'Cancelled', 'Error']:
|
|
1295
|
+
raise RuntimeError(f"Job {taskid} failed with status: {status}")
|
|
1296
|
+
else:
|
|
1297
|
+
# Job is still running
|
|
1298
|
+
elapsed_time = time.time() - start_time
|
|
1299
|
+
print(f"Job status: {status} - Elapsed time: {elapsed_time:.1f}s", end='\r')
|
|
1300
|
+
|
|
1301
|
+
# Sleep before next check
|
|
1302
|
+
time.sleep(poll_interval)
|
|
1303
|
+
|
|
1304
|
+
|
|
1305
|
+
except KeyboardInterrupt:
|
|
1306
|
+
print(f"\nInterrupted! Job {taskid} is still running in the background.")
|
|
1307
|
+
raise
|
|
1308
|
+
except Exception as e:
|
|
1309
|
+
print(f"\nError tracking job: {e}")
|
|
1310
|
+
raise
|
|
1311
|
+
|
|
1312
|
+
# Clean up temporary files
|
|
1313
|
+
import os
|
|
1314
|
+
os.unlink(tempreqname)
|
|
1315
|
+
os.unlink(tempmanifestname)
|
|
1316
|
+
|
|
1167
1317
|
|
|
1318
|
+
# Start combining tiles
|
|
1319
|
+
if not self.mass_stats:
|
|
1320
|
+
from terrakio_core.mass_stats import MassStats
|
|
1321
|
+
if not self.url or not self.key:
|
|
1322
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
1323
|
+
self.mass_stats = MassStats(
|
|
1324
|
+
base_url=self.url,
|
|
1325
|
+
api_key=self.key,
|
|
1326
|
+
verify=self.verify,
|
|
1327
|
+
timeout=self.timeout
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
return self.mass_stats.combine_tiles(body["name"], usezarr, body["overwrite"], body["output"])
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
def deploy_model(self, dataset: str, product:str, model_name:str, input_expression: str, model_training_job_name: str, uid: str, dates_iso8601: list):
|
|
1168
1341
|
script_content = self._generate_script(model_name, product, model_training_job_name, uid)
|
|
1169
|
-
# self.create_dataset(collection = "terrakio-datasets", input = input, )
|
|
1170
|
-
# we have the script, we need to upload it to the bucket
|
|
1171
1342
|
script_name = f"{product}.py"
|
|
1172
|
-
print("the script content is ", script_content)
|
|
1173
|
-
print("the script name is ", script_name)
|
|
1174
1343
|
self._upload_script_to_bucket(script_content, script_name, model_training_job_name, uid)
|
|
1175
1344
|
# after uploading the script, we need to create a new virtual dataset
|
|
1176
1345
|
self._create_dataset(name = dataset, collection = "terrakio-datasets", products = [product], path = f"gs://terrakio-mass-requests/{uid}/{model_training_job_name}/inference_scripts", input = input_expression, dates_iso8601 = dates_iso8601, padding = 0)
|
|
@@ -1238,7 +1407,6 @@ class BaseClient:
|
|
|
1238
1407
|
client = storage.Client()
|
|
1239
1408
|
bucket = client.get_bucket('terrakio-mass-requests')
|
|
1240
1409
|
blob = bucket.blob(f'{uid}/{model_training_job_name}/inference_scripts/{script_name}')
|
|
1241
|
-
# the first layer is the uid, the second layer is the model training job name
|
|
1242
1410
|
blob.upload_from_string(script_content, content_type='text/plain')
|
|
1243
1411
|
logging.info(f"Script uploaded successfully to {uid}/{model_training_job_name}/inference_scripts/{script_name}")
|
|
1244
1412
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# terrakio_core/decorators.py
|
|
2
|
+
def admin_only_params(*restricted_params):
|
|
3
|
+
"""
|
|
4
|
+
Decorator factory for restricting method parameters to admin users only.
|
|
5
|
+
"""
|
|
6
|
+
def decorator(func):
|
|
7
|
+
def wrapper(self, *args, **kwargs):
|
|
8
|
+
if hasattr(self, '_is_admin') and self._is_admin:
|
|
9
|
+
return func(self, *args, **kwargs)
|
|
10
|
+
|
|
11
|
+
admin_params_used = set(kwargs.keys()) & set(restricted_params)
|
|
12
|
+
if admin_params_used:
|
|
13
|
+
raise PermissionError(f"Parameters {admin_params_used} are only available to admin users")
|
|
14
|
+
|
|
15
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k not in restricted_params}
|
|
16
|
+
return func(self, *args, **filtered_kwargs)
|
|
17
|
+
return wrapper
|
|
18
|
+
return decorator
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
### implementing generation-tiles in python api
|
|
2
|
+
### function should just generate the json file for mass_stats to pick up.
|
|
3
|
+
|
|
4
|
+
import geopandas as gpd
|
|
5
|
+
import shapely.geometry
|
|
6
|
+
import json
|
|
7
|
+
from rich import print
|
|
8
|
+
|
|
9
|
+
def escape_newline(string):
|
|
10
|
+
if isinstance(string, list):
|
|
11
|
+
return [s.replace('\\n', '\n') for s in string]
|
|
12
|
+
else:
|
|
13
|
+
return string.replace('\\n', '\n')
|
|
14
|
+
|
|
15
|
+
def get_bounds(aoi, crs, to_crs = None):
|
|
16
|
+
aoi : gpd.GeoDataFrame = gpd.read_file(aoi)
|
|
17
|
+
aoi = aoi.set_crs(crs, allow_override=True)
|
|
18
|
+
if to_crs:
|
|
19
|
+
aoi = aoi.to_crs(to_crs)
|
|
20
|
+
bounds = aoi.geometry[0].bounds
|
|
21
|
+
return *bounds, aoi
|
|
22
|
+
|
|
23
|
+
def tile_generator(x_min, y_min, x_max, y_max, aoi, crs, res, tile_size, expression, output, fully_cover=True):
|
|
24
|
+
i_max = int((x_max-x_min)/(tile_size*res))
|
|
25
|
+
j_max = int((y_max-y_min)/(tile_size*res))
|
|
26
|
+
if fully_cover:
|
|
27
|
+
i_max += 1
|
|
28
|
+
j_max += 1
|
|
29
|
+
for j in range(0, int(j_max)):
|
|
30
|
+
for i in range(0, int(i_max)):
|
|
31
|
+
#print(f"Processing tile {i} {j}")
|
|
32
|
+
x = x_min + i*(tile_size*res)
|
|
33
|
+
y = y_max - j*(tile_size*res)
|
|
34
|
+
bbox = shapely.geometry.box(x, y-(tile_size*res), x + (tile_size*res), y)
|
|
35
|
+
if not aoi.geometry[0].intersects(bbox):
|
|
36
|
+
continue
|
|
37
|
+
feat = {"type": "Feature", "geometry": bbox.__geo_interface__}
|
|
38
|
+
data = {
|
|
39
|
+
"feature": feat,
|
|
40
|
+
"in_crs": crs,
|
|
41
|
+
"out_crs": crs,
|
|
42
|
+
"resolution": res,
|
|
43
|
+
"expr" : expression,
|
|
44
|
+
"output" : output,
|
|
45
|
+
}
|
|
46
|
+
yield data, i , j
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def tiles(
|
|
50
|
+
name: str,
|
|
51
|
+
aoi : str,
|
|
52
|
+
expression: str = "red=S2v2#(year,median).red@(year =2024) \n red",
|
|
53
|
+
output: str = "netcdf",
|
|
54
|
+
tile_size : float = 512,
|
|
55
|
+
crs : str = "epsg:3577",
|
|
56
|
+
res: float = 10,
|
|
57
|
+
region : str = "eu",
|
|
58
|
+
to_crs: str = None,
|
|
59
|
+
fully_cover: bool = True,
|
|
60
|
+
overwrite: bool = False,
|
|
61
|
+
skip_existing: bool = False,
|
|
62
|
+
non_interactive: bool = False,
|
|
63
|
+
):
|
|
64
|
+
|
|
65
|
+
# Create requests for each tile
|
|
66
|
+
reqs = []
|
|
67
|
+
x_min, y_min, x_max, y_max, aoi = get_bounds(aoi, crs, to_crs)
|
|
68
|
+
#print(f"Bounds: {x_min}, {y_min}, {x_max}, {y_max}")
|
|
69
|
+
|
|
70
|
+
if to_crs is None:
|
|
71
|
+
to_crs = crs
|
|
72
|
+
for tile_req, i, j in tile_generator(x_min, y_min, x_max, y_max, aoi, to_crs, res, tile_size, expression, output, fully_cover):
|
|
73
|
+
req_name = f"{name}_{i:02d}_{j:02d}"
|
|
74
|
+
reqs.append({"group": "tiles", "file": req_name, "request": tile_req})
|
|
75
|
+
|
|
76
|
+
#print(f"Generated {len(reqs)} tile requests.")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
count = len(reqs)
|
|
80
|
+
groups = list(set(dic["group"] for dic in reqs))
|
|
81
|
+
|
|
82
|
+
body = {
|
|
83
|
+
"name" : name,
|
|
84
|
+
"output" : output,
|
|
85
|
+
"region" : region,
|
|
86
|
+
"size" : count,
|
|
87
|
+
"overwrite" : overwrite,
|
|
88
|
+
"non_interactive": non_interactive,
|
|
89
|
+
"skip_existing" : skip_existing,
|
|
90
|
+
}
|
|
91
|
+
request_json = json.dumps(reqs)
|
|
92
|
+
manifest_json = json.dumps(groups)
|
|
93
|
+
|
|
94
|
+
return body, request_json, manifest_json
|
|
95
|
+
|
terrakio_core/mass_stats.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import requests
|
|
2
|
-
from typing import Optional, Dict, Any
|
|
2
|
+
from typing import Optional, Dict, Any, List
|
|
3
|
+
import json
|
|
4
|
+
import json as json_lib
|
|
5
|
+
import gzip
|
|
3
6
|
|
|
4
7
|
class MassStats:
|
|
5
8
|
def __init__(self, base_url: str, api_key: str, verify: bool = True, timeout: int = 60):
|
|
@@ -12,61 +15,265 @@ class MassStats:
|
|
|
12
15
|
'x-api-key': self.api_key
|
|
13
16
|
})
|
|
14
17
|
|
|
18
|
+
def _upload_file(self, file_path: str, url: str, use_gzip: bool = False):
|
|
19
|
+
"""
|
|
20
|
+
Helper method to upload a JSON file to a signed URL.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
file_path: Path to the JSON file
|
|
24
|
+
url: Signed URL to upload to
|
|
25
|
+
use_gzip: Whether to compress the file with gzip
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
with open(file_path, 'r') as file:
|
|
29
|
+
json_data = json_lib.load(file)
|
|
30
|
+
except FileNotFoundError:
|
|
31
|
+
raise FileNotFoundError(f"JSON file not found: {file_path}")
|
|
32
|
+
except json.JSONDecodeError as e:
|
|
33
|
+
raise ValueError(f"Invalid JSON in file {file_path}: {e}")
|
|
34
|
+
|
|
35
|
+
# Check if using simplejson and support ignore_nan
|
|
36
|
+
if hasattr(json_lib, 'dumps') and 'ignore_nan' in json_lib.dumps.__code__.co_varnames:
|
|
37
|
+
dumps_kwargs = {'ignore_nan': True}
|
|
38
|
+
else:
|
|
39
|
+
dumps_kwargs = {}
|
|
40
|
+
|
|
41
|
+
if use_gzip:
|
|
42
|
+
# Serialize and compress the JSON data
|
|
43
|
+
body = gzip.compress(json_lib.dumps(json_data, **dumps_kwargs).encode('utf-8'))
|
|
44
|
+
headers = {
|
|
45
|
+
'Content-Type': 'application/json',
|
|
46
|
+
'Content-Encoding': 'gzip'
|
|
47
|
+
}
|
|
48
|
+
else:
|
|
49
|
+
body = json_lib.dumps(json_data, **dumps_kwargs).encode('utf-8')
|
|
50
|
+
headers = {
|
|
51
|
+
'Content-Type': 'application/json'
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# Make the PUT request to the signed URL
|
|
55
|
+
response = requests.put(
|
|
56
|
+
url,
|
|
57
|
+
data=body,
|
|
58
|
+
headers=headers
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
return response
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# def _download_file(self, url: str, output_path: str) -> str:
|
|
65
|
+
# """
|
|
66
|
+
# Helper method to download a file from a signed URL.
|
|
67
|
+
|
|
68
|
+
# Args:
|
|
69
|
+
# url: Signed URL to download from
|
|
70
|
+
# output_path: Path where the file should be saved
|
|
71
|
+
|
|
72
|
+
# Returns:
|
|
73
|
+
# str: Path to the downloaded file
|
|
74
|
+
# """
|
|
75
|
+
|
|
76
|
+
# try:
|
|
77
|
+
# response = requests.get(
|
|
78
|
+
# url,
|
|
79
|
+
# verify=self.verify,
|
|
80
|
+
# timeout=self.timeout
|
|
81
|
+
# )
|
|
82
|
+
# response.raise_for_status()
|
|
83
|
+
|
|
84
|
+
# # Download and write the file
|
|
85
|
+
# with open(output_path, 'wb') as file:
|
|
86
|
+
# file.write(response.content)
|
|
87
|
+
# print(f"File downloaded successfully to {output_path}")
|
|
88
|
+
# return output_path
|
|
89
|
+
|
|
90
|
+
# except requests.exceptions.RequestException as e:
|
|
91
|
+
# raise Exception(f"Error downloading file from {url}: {e}")
|
|
92
|
+
# except IOError as e:
|
|
93
|
+
# raise Exception(f"Error writing file to {output_path}: {e}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
15
98
|
def upload_request(
|
|
16
99
|
self,
|
|
17
100
|
name: str,
|
|
18
101
|
size: int,
|
|
19
|
-
|
|
102
|
+
region: List[str],
|
|
20
103
|
output: str,
|
|
104
|
+
config: Dict[str, Any],
|
|
21
105
|
location: Optional[str] = None,
|
|
22
|
-
force_loc: bool =
|
|
23
|
-
config: Optional[Dict[str, Any]] = None,
|
|
106
|
+
force_loc: Optional[bool] = None,
|
|
24
107
|
overwrite: bool = False,
|
|
25
108
|
server: Optional[str] = None,
|
|
26
|
-
skip_existing: bool = False
|
|
109
|
+
skip_existing: bool = False,
|
|
27
110
|
) -> Dict[str, Any]:
|
|
28
111
|
"""
|
|
29
112
|
Initiate a mass stats upload job.
|
|
30
113
|
|
|
31
114
|
Args:
|
|
32
115
|
name: Name of the job
|
|
33
|
-
size: Size of the
|
|
34
|
-
|
|
35
|
-
output: Output
|
|
116
|
+
size: Size of the job
|
|
117
|
+
region: Region to run job [aus, eu, us]
|
|
118
|
+
output: Output type
|
|
119
|
+
config: Configuration dictionary
|
|
36
120
|
location: (Optional) Location for the upload
|
|
37
121
|
force_loc: Force location usage
|
|
38
|
-
config: Optional configuration dictionary
|
|
39
122
|
overwrite: Overwrite existing data
|
|
40
123
|
server: Optional server
|
|
41
124
|
skip_existing: Skip existing files
|
|
42
125
|
"""
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Step 2: Create the upload job and get signed URLs
|
|
43
130
|
url = f"{self.base_url}/mass_stats/upload"
|
|
131
|
+
|
|
44
132
|
data = {
|
|
45
133
|
"name": name,
|
|
46
134
|
"size": size,
|
|
47
|
-
"
|
|
135
|
+
"region": region,
|
|
48
136
|
"output": output,
|
|
49
|
-
"
|
|
137
|
+
"config": config,
|
|
50
138
|
"overwrite": overwrite,
|
|
51
139
|
"skip_existing": skip_existing
|
|
52
140
|
}
|
|
141
|
+
|
|
53
142
|
if location is not None:
|
|
54
143
|
data["location"] = location
|
|
55
|
-
if
|
|
56
|
-
data["
|
|
144
|
+
if force_loc is not None:
|
|
145
|
+
data["force_loc"] = force_loc
|
|
57
146
|
if server is not None:
|
|
58
147
|
data["server"] = server
|
|
59
|
-
response = self.session.post(
|
|
60
|
-
|
|
61
|
-
|
|
148
|
+
response = self.session.post(
|
|
149
|
+
url,
|
|
150
|
+
json=data,
|
|
151
|
+
verify=self.verify,
|
|
152
|
+
timeout=self.timeout
|
|
153
|
+
)
|
|
62
154
|
return response.json()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# def construct_download_url(
|
|
158
|
+
# self,
|
|
159
|
+
# name: str,
|
|
160
|
+
# output: str,
|
|
161
|
+
# region: Optional[str] = None,
|
|
162
|
+
# ) -> Dict[str, Any]:
|
|
163
|
+
# """
|
|
164
|
+
# Request a signed download URL for a file.
|
|
165
|
+
|
|
166
|
+
# Args:
|
|
167
|
+
# name: job name
|
|
168
|
+
# file_type: Type of file to download (e.g., "output", "manifest", "log")
|
|
169
|
+
# region: Region where the file is stored
|
|
170
|
+
|
|
171
|
+
# Returns:
|
|
172
|
+
# Dict containing download_url and file metadata
|
|
173
|
+
# """
|
|
174
|
+
# url = f"{self.base_url}/mass_stats/download"
|
|
175
|
+
|
|
176
|
+
# data = {
|
|
177
|
+
# "name": name,
|
|
178
|
+
# "output": output
|
|
179
|
+
# }
|
|
180
|
+
|
|
181
|
+
# if region is not None:
|
|
182
|
+
# data["region"] = region
|
|
183
|
+
|
|
184
|
+
# response = self.session.post(
|
|
185
|
+
# url,
|
|
186
|
+
# json=data,
|
|
187
|
+
# verify=self.verify,
|
|
188
|
+
# timeout=self.timeout
|
|
189
|
+
# )
|
|
190
|
+
|
|
191
|
+
# return response.json()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# def testdownload(
|
|
195
|
+
# self,
|
|
196
|
+
# name: str,
|
|
197
|
+
# region: str,
|
|
198
|
+
# output: str,
|
|
199
|
+
# ):
|
|
200
|
+
# upload_result = self.construct_download_url(name, region, output)
|
|
201
|
+
# return upload_result
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def execute_job(
|
|
206
|
+
self,
|
|
207
|
+
name: str,
|
|
208
|
+
region: str,
|
|
209
|
+
output: str,
|
|
210
|
+
config: Dict[str, Any],
|
|
211
|
+
overwrite: bool = False,
|
|
212
|
+
skip_existing: bool = False,
|
|
213
|
+
request_json: Optional[str] = None,
|
|
214
|
+
manifest_json: Optional[str] = None,
|
|
215
|
+
location: Optional[str] = None,
|
|
216
|
+
force_loc: Optional[bool] = None,
|
|
217
|
+
server: Optional[str] = None
|
|
218
|
+
) -> Dict[str, Any]:
|
|
219
|
+
# Step 1: Calculate size from request JSON file if provided
|
|
220
|
+
size = 0
|
|
221
|
+
if request_json is not None:
|
|
222
|
+
try:
|
|
223
|
+
with open(request_json, 'r') as file:
|
|
224
|
+
request_data = json_lib.load(file)
|
|
225
|
+
|
|
226
|
+
if isinstance(request_data, list):
|
|
227
|
+
size = len(request_data)
|
|
228
|
+
else:
|
|
229
|
+
raise ValueError(f"Request JSON file {request_json} should contain a list of dictionaries")
|
|
230
|
+
|
|
231
|
+
except FileNotFoundError:
|
|
232
|
+
raise FileNotFoundError(f"Request JSON file not found: {request_json}")
|
|
233
|
+
except json.JSONDecodeError as e:
|
|
234
|
+
raise ValueError(f"Invalid JSON in request file {request_json}: {e}")
|
|
235
|
+
|
|
236
|
+
upload_result = self.upload_request(name, size, region, output, config, location, force_loc, overwrite, server, skip_existing)
|
|
237
|
+
|
|
238
|
+
# Step 3: Upload JSON files if provided
|
|
239
|
+
if request_json is not None or manifest_json is not None:
|
|
240
|
+
requests_url = upload_result.get('requests_url')
|
|
241
|
+
manifest_url = upload_result.get('manifest_url')
|
|
242
|
+
|
|
243
|
+
if request_json is not None:
|
|
244
|
+
if not requests_url:
|
|
245
|
+
raise ValueError("No requests_url returned from server for request JSON upload")
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
requests_response = self._upload_file(request_json, requests_url, use_gzip=True)
|
|
249
|
+
if requests_response.status_code not in [200, 201, 204]:
|
|
250
|
+
print(f"Requests upload error: {requests_response.text}")
|
|
251
|
+
raise Exception(f"Failed to upload request JSON: {requests_response.text}")
|
|
252
|
+
except Exception as e:
|
|
253
|
+
raise Exception(f"Error uploading request JSON file {request_json}: {e}")
|
|
254
|
+
|
|
255
|
+
if manifest_json is not None:
|
|
256
|
+
if not manifest_url:
|
|
257
|
+
raise ValueError("No manifest_url returned from server for manifest JSON upload")
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
manifest_response = self._upload_file(manifest_json, manifest_url, use_gzip=False)
|
|
261
|
+
if manifest_response.status_code not in [200, 201, 204]:
|
|
262
|
+
print(f"Manifest upload error: {manifest_response.text}")
|
|
263
|
+
raise Exception(f"Failed to upload manifest JSON: {manifest_response.text}")
|
|
264
|
+
except Exception as e:
|
|
265
|
+
raise Exception(f"Error uploading manifest JSON file {manifest_json}: {e}")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
start_job_task_id =self.start_job(upload_result.get("id"))
|
|
269
|
+
return start_job_task_id
|
|
270
|
+
|
|
63
271
|
|
|
64
272
|
def start_job(self, task_id: str) -> Dict[str, Any]:
|
|
65
273
|
"""
|
|
66
274
|
Start a mass stats job by task ID.
|
|
67
275
|
"""
|
|
68
276
|
url = f"{self.base_url}/mass_stats/start/{task_id}"
|
|
69
|
-
print("the self session header is ", self.session.headers)
|
|
70
277
|
response = self.session.post(url, verify=self.verify, timeout=self.timeout)
|
|
71
278
|
response.raise_for_status()
|
|
72
279
|
return response.json()
|
|
@@ -259,4 +466,31 @@ class MassStats:
|
|
|
259
466
|
print("Response text:", response.text)
|
|
260
467
|
# response.raise_for_status()
|
|
261
468
|
return response.json()
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
### Adding the wrapper function to call endpoint /mass_stats/combine_tiles
|
|
472
|
+
def combine_tiles(
|
|
473
|
+
self,
|
|
474
|
+
data_name: str,
|
|
475
|
+
usezarr: bool = False,
|
|
476
|
+
overwrite: bool = True,
|
|
477
|
+
output : str = "netcdf"
|
|
478
|
+
) -> Dict[str, Any]:
|
|
479
|
+
|
|
480
|
+
url = f"{self.base_url}/mass_stats/combine_tiles"
|
|
481
|
+
request_body = {
|
|
482
|
+
'data_name': data_name,
|
|
483
|
+
'usezarr': str(usezarr).lower(),
|
|
484
|
+
'output': output,
|
|
485
|
+
'overwrite': str(overwrite).lower()
|
|
486
|
+
}
|
|
487
|
+
print(f"Request body: {json.dumps(request_body, indent=2)}")
|
|
488
|
+
response = self.session.post(url, json=request_body, verify=self.verify, timeout=self.timeout)
|
|
489
|
+
print(f"Response text: {response.text}")
|
|
490
|
+
return response.json()
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
|
|
262
496
|
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
terrakio_core/__init__.py,sha256=279LuD40cJZBle4GQS_vxLIyyfejhEhcl2m9-4Qkdkk,88
|
|
2
|
+
terrakio_core/auth.py,sha256=Nuj0_X3Hiy17svYgGxrSAR-LXpTlP0J0dSrfMnkPUbI,7717
|
|
3
|
+
terrakio_core/client.py,sha256=rTwWP6r_aqEHoVGIpyMsy7pgVoaT2lSLNGpx0jf8QMU,62658
|
|
4
|
+
terrakio_core/config.py,sha256=AwJ1VgR5K7N32XCU5k7_Dp1nIv_FYt8MBonq9yKlGzA,2658
|
|
5
|
+
terrakio_core/dataset_management.py,sha256=LKUESSDPRu1JubQaQJWdPqHLGt-_Xv77Fpb4IM7vkzM,8751
|
|
6
|
+
terrakio_core/decorators.py,sha256=QeNOUX6WEAmdgBL5Igt5DXyYduh3jnmLbodttmwvXhE,785
|
|
7
|
+
terrakio_core/exceptions.py,sha256=9S-I20-QiDRj1qgjFyYUwYM7BLic_bxurcDOIm2Fu_0,410
|
|
8
|
+
terrakio_core/group_access_management.py,sha256=NJ7SX4keUzZAUENmJ5L6ynKf4eRlqtyir5uoKFyY17A,7315
|
|
9
|
+
terrakio_core/mass_stats.py,sha256=HHrpnlshADfCyVMD4VqR3jpIN9jCGnZaFDc_q5igG-Y,17215
|
|
10
|
+
terrakio_core/space_management.py,sha256=wlUUQrlj_4U_Lpjn9lbF5oj0Rv3NPvvnrd5mWej5kmA,4211
|
|
11
|
+
terrakio_core/user_management.py,sha256=MMNWkz0V_9X7ZYjjteuRU4H4W3F16iuQw1dpA2wVTGg,7400
|
|
12
|
+
terrakio_core/generation/tiles.py,sha256=eiiMNzqaga-c42kG_7zHXTF2o8ZInCPUj0Vu4Ye30Ts,2980
|
|
13
|
+
terrakio_core-0.3.1.dist-info/METADATA,sha256=fv09EWJ75yEd2k80SuM0-KKzOeCICvqbnxj9DKTdKrw,1448
|
|
14
|
+
terrakio_core-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
+
terrakio_core-0.3.1.dist-info/top_level.txt,sha256=5cBj6O7rNWyn97ND4YuvvXm0Crv4RxttT4JZvNdOG6Q,14
|
|
16
|
+
terrakio_core-0.3.1.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
terrakio_core/__init__.py,sha256=iguSJomKouzVNPOB2_Ox-FGnQBUQ0ykx8CshjVzU1QM,88
|
|
2
|
-
terrakio_core/auth.py,sha256=Nuj0_X3Hiy17svYgGxrSAR-LXpTlP0J0dSrfMnkPUbI,7717
|
|
3
|
-
terrakio_core/client.py,sha256=CQ1qiR_8tWKEGX-UT2wLeatk8fYMpyo9KseMpCapw7c,56813
|
|
4
|
-
terrakio_core/config.py,sha256=AwJ1VgR5K7N32XCU5k7_Dp1nIv_FYt8MBonq9yKlGzA,2658
|
|
5
|
-
terrakio_core/dataset_management.py,sha256=LKUESSDPRu1JubQaQJWdPqHLGt-_Xv77Fpb4IM7vkzM,8751
|
|
6
|
-
terrakio_core/exceptions.py,sha256=9S-I20-QiDRj1qgjFyYUwYM7BLic_bxurcDOIm2Fu_0,410
|
|
7
|
-
terrakio_core/group_access_management.py,sha256=NJ7SX4keUzZAUENmJ5L6ynKf4eRlqtyir5uoKFyY17A,7315
|
|
8
|
-
terrakio_core/mass_stats.py,sha256=AqYJsd6nqo2BDh4vEPUDgsv4T0UR1_TPDoXa3WO3gTU,9284
|
|
9
|
-
terrakio_core/space_management.py,sha256=wlUUQrlj_4U_Lpjn9lbF5oj0Rv3NPvvnrd5mWej5kmA,4211
|
|
10
|
-
terrakio_core/user_management.py,sha256=MMNWkz0V_9X7ZYjjteuRU4H4W3F16iuQw1dpA2wVTGg,7400
|
|
11
|
-
terrakio_core-0.3.0.dist-info/METADATA,sha256=8mS_NJQUoFcr1lE3iUQXQi5VwSZo07t3XF0pCL7VNSI,1448
|
|
12
|
-
terrakio_core-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
terrakio_core-0.3.0.dist-info/top_level.txt,sha256=5cBj6O7rNWyn97ND4YuvvXm0Crv4RxttT4JZvNdOG6Q,14
|
|
14
|
-
terrakio_core-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|