terrakio-core 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/PKG-INFO +2 -1
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/pyproject.toml +2 -1
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/client.py +129 -66
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/dataset_management.py +62 -10
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/mass_stats.py +81 -73
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core.egg-info/PKG-INFO +2 -1
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core.egg-info/requires.txt +1 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/README.md +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/setup.cfg +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/auth.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/config.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/decorators.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/exceptions.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/generation/tiles.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/group_access_management.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/space_management.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core/user_management.py +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core.egg-info/SOURCES.txt +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core.egg-info/dependency_links.txt +0 -0
- {terrakio_core-0.3.1 → terrakio_core-0.3.2}/terrakio_core.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -22,6 +22,7 @@ Requires-Dist: xarray>=2023.1.0
|
|
|
22
22
|
Requires-Dist: shapely>=2.0.0
|
|
23
23
|
Requires-Dist: geopandas>=0.13.0
|
|
24
24
|
Requires-Dist: google-cloud-storage>=2.0.0
|
|
25
|
+
Requires-Dist: nest_asyncio
|
|
25
26
|
|
|
26
27
|
# Terrakio Core
|
|
27
28
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "terrakio-core"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.2"
|
|
8
8
|
authors = [
|
|
9
9
|
{name = "Yupeng Chao", email = "yupeng@haizea.com.au"},
|
|
10
10
|
]
|
|
@@ -29,6 +29,7 @@ dependencies = [
|
|
|
29
29
|
"shapely>=2.0.0",
|
|
30
30
|
"geopandas>=0.13.0",
|
|
31
31
|
"google-cloud-storage>=2.0.0",
|
|
32
|
+
"nest_asyncio",
|
|
32
33
|
]
|
|
33
34
|
|
|
34
35
|
[project.urls]
|
|
@@ -130,6 +130,7 @@ class BaseClient:
|
|
|
130
130
|
"resolution": resolution,
|
|
131
131
|
**kwargs
|
|
132
132
|
}
|
|
133
|
+
print("the payload is ", payload)
|
|
133
134
|
request_url = f"{self.url}/geoquery"
|
|
134
135
|
for attempt in range(retry + 1):
|
|
135
136
|
try:
|
|
@@ -565,7 +566,7 @@ class BaseClient:
|
|
|
565
566
|
)
|
|
566
567
|
return self.mass_stats.get_task_id(name, stage, uid)
|
|
567
568
|
|
|
568
|
-
def track_mass_stats_job(self, ids=None):
|
|
569
|
+
def track_mass_stats_job(self, ids: Optional[list] = None):
|
|
569
570
|
if not self.mass_stats:
|
|
570
571
|
from terrakio_core.mass_stats import MassStats
|
|
571
572
|
if not self.url or not self.key:
|
|
@@ -1038,6 +1039,20 @@ class BaseClient:
|
|
|
1038
1039
|
)
|
|
1039
1040
|
return self.space_management.delete_data_in_path(path, region)
|
|
1040
1041
|
|
|
1042
|
+
def start_mass_stats_job(self, task_id):
|
|
1043
|
+
if not self.mass_stats:
|
|
1044
|
+
from terrakio_core.mass_stats import MassStats
|
|
1045
|
+
if not self.url or not self.key:
|
|
1046
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
1047
|
+
self.mass_stats = MassStats(
|
|
1048
|
+
base_url=self.url,
|
|
1049
|
+
api_key=self.key,
|
|
1050
|
+
verify=self.verify,
|
|
1051
|
+
timeout=self.timeout
|
|
1052
|
+
)
|
|
1053
|
+
return self.mass_stats.start_job(task_id)
|
|
1054
|
+
|
|
1055
|
+
|
|
1041
1056
|
def generate_ai_dataset(
|
|
1042
1057
|
self,
|
|
1043
1058
|
name: str,
|
|
@@ -1107,43 +1122,30 @@ class BaseClient:
|
|
|
1107
1122
|
overwrite=True
|
|
1108
1123
|
)["task_id"]
|
|
1109
1124
|
print("the task id is ", task_id)
|
|
1110
|
-
task_id = self.start_mass_stats_job(task_id)
|
|
1111
|
-
print("the task id is ", task_id)
|
|
1112
|
-
return task_id
|
|
1113
1125
|
|
|
1126
|
+
# Wait for job completion
|
|
1127
|
+
import time
|
|
1128
|
+
|
|
1129
|
+
while True:
|
|
1130
|
+
result = self.track_mass_stats_job(ids=[task_id])
|
|
1131
|
+
status = result[task_id]['status']
|
|
1132
|
+
print(f"Job status: {status}")
|
|
1133
|
+
|
|
1134
|
+
if status == "Completed":
|
|
1135
|
+
break
|
|
1136
|
+
elif status == "Error":
|
|
1137
|
+
raise Exception(f"Job {task_id} encountered an error")
|
|
1138
|
+
|
|
1139
|
+
# Wait 30 seconds before checking again
|
|
1140
|
+
time.sleep(30)
|
|
1114
1141
|
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
# Args:
|
|
1120
|
-
# model_name (str): The name of the model to train.
|
|
1121
|
-
# training_data (dict): Dictionary containing training data parameters.
|
|
1122
|
-
|
|
1123
|
-
# Returns:
|
|
1124
|
-
# dict: The response from the model training API.
|
|
1125
|
-
# """
|
|
1126
|
-
# endpoint = "https://modeltraining-573248941006.australia-southeast1.run.app/train_model"
|
|
1127
|
-
# payload = {
|
|
1128
|
-
# "model_name": model_name,
|
|
1129
|
-
# "training_data": training_data
|
|
1130
|
-
# }
|
|
1131
|
-
# try:
|
|
1132
|
-
# response = self.session.post(endpoint, json=payload, timeout=self.timeout, verify=self.verify)
|
|
1133
|
-
# if not response.ok:
|
|
1134
|
-
# error_msg = f"Model training request failed: {response.status_code} {response.reason}"
|
|
1135
|
-
# try:
|
|
1136
|
-
# error_data = response.json()
|
|
1137
|
-
# if "detail" in error_data:
|
|
1138
|
-
# error_msg += f" - {error_data['detail']}"
|
|
1139
|
-
# except Exception:
|
|
1140
|
-
# if response.text:
|
|
1141
|
-
# error_msg += f" - {response.text}"
|
|
1142
|
-
# raise APIError(error_msg)
|
|
1143
|
-
# return response.json()
|
|
1144
|
-
# except requests.RequestException as e:
|
|
1145
|
-
# raise APIError(f"Model training request failed: {str(e)}")
|
|
1142
|
+
# print("the result is ", result)
|
|
1143
|
+
# after all the random sample jos are done, we then start the mass stats job
|
|
1144
|
+
task_id = self.start_mass_stats_job(task_id)
|
|
1145
|
+
# now we hav ethe random sampel
|
|
1146
1146
|
|
|
1147
|
+
# print("the task id is ", task_id)
|
|
1148
|
+
return task_id
|
|
1147
1149
|
|
|
1148
1150
|
def train_model(self, model_name: str, training_dataset: str, task_type: str, model_category: str, architecture: str, region: str, hyperparameters: dict = None) -> dict:
|
|
1149
1151
|
"""
|
|
@@ -1209,7 +1211,7 @@ class BaseClient:
|
|
|
1209
1211
|
|
|
1210
1212
|
|
|
1211
1213
|
|
|
1212
|
-
def
|
|
1214
|
+
def create_dataset_file(
|
|
1213
1215
|
self,
|
|
1214
1216
|
name: str,
|
|
1215
1217
|
aoi: str,
|
|
@@ -1329,30 +1331,22 @@ class BaseClient:
|
|
|
1329
1331
|
|
|
1330
1332
|
return self.mass_stats.combine_tiles(body["name"], usezarr, body["overwrite"], body["output"])
|
|
1331
1333
|
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
1334
|
def deploy_model(self, dataset: str, product:str, model_name:str, input_expression: str, model_training_job_name: str, uid: str, dates_iso8601: list):
|
|
1341
1335
|
script_content = self._generate_script(model_name, product, model_training_job_name, uid)
|
|
1342
1336
|
script_name = f"{product}.py"
|
|
1343
1337
|
self._upload_script_to_bucket(script_content, script_name, model_training_job_name, uid)
|
|
1344
|
-
# after uploading the script, we need to create a new virtual dataset
|
|
1345
1338
|
self._create_dataset(name = dataset, collection = "terrakio-datasets", products = [product], path = f"gs://terrakio-mass-requests/{uid}/{model_training_job_name}/inference_scripts", input = input_expression, dates_iso8601 = dates_iso8601, padding = 0)
|
|
1346
1339
|
|
|
1347
1340
|
def _generate_script(self, model_name: str, product: str, model_training_job_name: str, uid: str) -> str:
|
|
1348
1341
|
return textwrap.dedent(f'''
|
|
1349
1342
|
import logging
|
|
1350
1343
|
from io import BytesIO
|
|
1351
|
-
|
|
1352
|
-
from onnxruntime import InferenceSession
|
|
1344
|
+
|
|
1353
1345
|
import numpy as np
|
|
1346
|
+
import pandas as pd
|
|
1354
1347
|
import xarray as xr
|
|
1355
|
-
import
|
|
1348
|
+
from google.cloud import storage
|
|
1349
|
+
from onnxruntime import InferenceSession
|
|
1356
1350
|
|
|
1357
1351
|
logging.basicConfig(
|
|
1358
1352
|
level=logging.INFO
|
|
@@ -1360,47 +1354,95 @@ class BaseClient:
|
|
|
1360
1354
|
|
|
1361
1355
|
def get_model():
|
|
1362
1356
|
logging.info("Loading model for {model_name}...")
|
|
1363
|
-
|
|
1357
|
+
|
|
1364
1358
|
client = storage.Client()
|
|
1365
1359
|
bucket = client.get_bucket('terrakio-mass-requests')
|
|
1366
1360
|
blob = bucket.blob('{uid}/{model_training_job_name}/models/{model_name}.onnx')
|
|
1367
|
-
|
|
1361
|
+
|
|
1368
1362
|
model = BytesIO()
|
|
1369
1363
|
blob.download_to_file(model)
|
|
1370
1364
|
model.seek(0)
|
|
1371
|
-
|
|
1365
|
+
|
|
1372
1366
|
session = InferenceSession(model.read(), providers=["CPUExecutionProvider"])
|
|
1373
1367
|
return session
|
|
1374
1368
|
|
|
1375
1369
|
def {product}(*bands, model):
|
|
1376
1370
|
logging.info("start preparing data")
|
|
1371
|
+
print("the bands are ", bands)
|
|
1377
1372
|
|
|
1378
|
-
|
|
1379
|
-
logging.info(f"Original shape: {{original_shape}}")
|
|
1373
|
+
data_arrays = list(bands)
|
|
1380
1374
|
|
|
1381
|
-
|
|
1382
|
-
for band in bands:
|
|
1383
|
-
transformed_band = band.values.reshape(-1,1)
|
|
1384
|
-
transformed_bands.append(transformed_band)
|
|
1375
|
+
print("the data arrays are ", [da.name for da in data_arrays])
|
|
1385
1376
|
|
|
1386
|
-
|
|
1377
|
+
reference_array = data_arrays[0]
|
|
1378
|
+
original_shape = reference_array.shape
|
|
1379
|
+
logging.info(f"Original shape: {{original_shape}}")
|
|
1387
1380
|
|
|
1381
|
+
if 'time' in reference_array.dims:
|
|
1382
|
+
time_coords = reference_array.coords['time']
|
|
1383
|
+
if len(time_coords) == 1:
|
|
1384
|
+
output_timestamp = time_coords[0]
|
|
1385
|
+
else:
|
|
1386
|
+
years = [pd.to_datetime(t).year for t in time_coords.values]
|
|
1387
|
+
unique_years = set(years)
|
|
1388
|
+
|
|
1389
|
+
if len(unique_years) == 1:
|
|
1390
|
+
year = list(unique_years)[0]
|
|
1391
|
+
output_timestamp = pd.Timestamp(f"{{year}}-01-01")
|
|
1392
|
+
else:
|
|
1393
|
+
latest_year = max(unique_years)
|
|
1394
|
+
output_timestamp = pd.Timestamp(f"{{latest_year}}-01-01")
|
|
1395
|
+
else:
|
|
1396
|
+
output_timestamp = pd.Timestamp("1970-01-01")
|
|
1397
|
+
|
|
1398
|
+
averaged_bands = []
|
|
1399
|
+
for data_array in data_arrays:
|
|
1400
|
+
if 'time' in data_array.dims:
|
|
1401
|
+
averaged_band = np.mean(data_array.values, axis=0)
|
|
1402
|
+
logging.info(f"Averaged band from {{data_array.shape}} to {{averaged_band.shape}}")
|
|
1403
|
+
else:
|
|
1404
|
+
averaged_band = data_array.values
|
|
1405
|
+
logging.info(f"No time dimension, shape: {{averaged_band.shape}}")
|
|
1406
|
+
|
|
1407
|
+
flattened_band = averaged_band.reshape(-1, 1)
|
|
1408
|
+
averaged_bands.append(flattened_band)
|
|
1409
|
+
|
|
1410
|
+
input_data = np.hstack(averaged_bands)
|
|
1411
|
+
|
|
1388
1412
|
logging.info(f"Final input shape: {{input_data.shape}}")
|
|
1389
|
-
|
|
1413
|
+
|
|
1390
1414
|
output = model.run(None, {{"float_input": input_data.astype(np.float32)}})[0]
|
|
1391
|
-
|
|
1415
|
+
|
|
1392
1416
|
logging.info(f"Model output shape: {{output.shape}}")
|
|
1393
1417
|
|
|
1394
|
-
|
|
1418
|
+
if len(original_shape) >= 3:
|
|
1419
|
+
spatial_shape = original_shape[1:]
|
|
1420
|
+
else:
|
|
1421
|
+
spatial_shape = original_shape
|
|
1422
|
+
|
|
1423
|
+
output_reshaped = output.reshape(spatial_shape)
|
|
1424
|
+
|
|
1425
|
+
output_with_time = np.expand_dims(output_reshaped, axis=0)
|
|
1426
|
+
|
|
1427
|
+
if 'time' in reference_array.dims:
|
|
1428
|
+
spatial_dims = [dim for dim in reference_array.dims if dim != 'time']
|
|
1429
|
+
spatial_coords = {{dim: reference_array.coords[dim] for dim in spatial_dims if dim in reference_array.coords}}
|
|
1430
|
+
else:
|
|
1431
|
+
spatial_dims = list(reference_array.dims)
|
|
1432
|
+
spatial_coords = dict(reference_array.coords)
|
|
1433
|
+
|
|
1395
1434
|
result = xr.DataArray(
|
|
1396
|
-
data=
|
|
1397
|
-
dims=
|
|
1398
|
-
coords=
|
|
1435
|
+
data=output_with_time.astype(np.float32),
|
|
1436
|
+
dims=['time'] + list(spatial_dims),
|
|
1437
|
+
coords={
|
|
1438
|
+
'time': [output_timestamp.values],
|
|
1439
|
+
'y': spatial_coords['y'].values,
|
|
1440
|
+
'x': spatial_coords['x'].values
|
|
1441
|
+
}
|
|
1399
1442
|
)
|
|
1400
|
-
|
|
1401
1443
|
return result
|
|
1402
1444
|
''').strip()
|
|
1403
|
-
|
|
1445
|
+
|
|
1404
1446
|
def _upload_script_to_bucket(self, script_content: str, script_name: str, model_training_job_name: str, uid: str):
|
|
1405
1447
|
"""Upload the generated script to Google Cloud Storage"""
|
|
1406
1448
|
|
|
@@ -1410,3 +1452,24 @@ class BaseClient:
|
|
|
1410
1452
|
blob.upload_from_string(script_content, content_type='text/plain')
|
|
1411
1453
|
logging.info(f"Script uploaded successfully to {uid}/{model_training_job_name}/inference_scripts/{script_name}")
|
|
1412
1454
|
|
|
1455
|
+
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
def download_file_to_path(self, job_name, stage, file_name, output_path):
|
|
1459
|
+
if not self.mass_stats:
|
|
1460
|
+
from terrakio_core.mass_stats import MassStats
|
|
1461
|
+
if not self.url or not self.key:
|
|
1462
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
1463
|
+
self.mass_stats = MassStats(
|
|
1464
|
+
base_url=self.url,
|
|
1465
|
+
api_key=self.key,
|
|
1466
|
+
verify=self.verify,
|
|
1467
|
+
timeout=self.timeout
|
|
1468
|
+
)
|
|
1469
|
+
|
|
1470
|
+
# fetch bucket info based on job name and stage
|
|
1471
|
+
|
|
1472
|
+
taskid = self.mass_stats.get_task_id(job_name, stage).get('task_id')
|
|
1473
|
+
trackinfo = self.mass_stats.track_job([taskid])
|
|
1474
|
+
bucket = trackinfo[taskid]['bucket']
|
|
1475
|
+
return self.mass_stats.download_file(job_name, bucket, file_name, output_path)
|
|
@@ -83,10 +83,63 @@ class DatasetManagement:
|
|
|
83
83
|
except requests.RequestException as e:
|
|
84
84
|
raise APIError(f"Request failed: {str(e)}")
|
|
85
85
|
|
|
86
|
+
# def create_dataset(self, name: str, collection: str = "terrakio-datasets", **kwargs) -> Dict[str, Any]:
|
|
87
|
+
# """
|
|
88
|
+
# Create a new dataset.
|
|
89
|
+
|
|
90
|
+
# Args:
|
|
91
|
+
# name: Name of the dataset (required)
|
|
92
|
+
# collection: Dataset collection (default: 'terrakio-datasets')
|
|
93
|
+
# **kwargs: Additional dataset parameters including:
|
|
94
|
+
# - products: List of products
|
|
95
|
+
# - dates_iso8601: List of dates
|
|
96
|
+
# - bucket: Storage bucket
|
|
97
|
+
# - path: Storage path
|
|
98
|
+
# - data_type: Data type
|
|
99
|
+
# - no_data: No data value
|
|
100
|
+
# - l_max: Maximum level
|
|
101
|
+
# - y_size: Y size
|
|
102
|
+
# - x_size: X size
|
|
103
|
+
# - proj4: Projection string
|
|
104
|
+
# - abstract: Dataset abstract
|
|
105
|
+
# - geotransform: Geotransform parameters
|
|
106
|
+
|
|
107
|
+
# Returns:
|
|
108
|
+
# Created dataset information
|
|
109
|
+
|
|
110
|
+
# Raises:
|
|
111
|
+
# APIError: If the API request fails
|
|
112
|
+
# """
|
|
113
|
+
# endpoint = f"{self.api_url}/datasets"
|
|
114
|
+
# params = {"collection": collection}
|
|
115
|
+
# # Create payload with required name parameter
|
|
116
|
+
# payload = {"name": name}
|
|
117
|
+
|
|
118
|
+
# # Add optional parameters if provided
|
|
119
|
+
# for param in ["products", "dates_iso8601", "bucket", "path", "data_type",
|
|
120
|
+
# "no_data", "l_max", "y_size", "x_size", "proj4", "abstract", "geotransform", "input"]:
|
|
121
|
+
# if param in kwargs:
|
|
122
|
+
# payload[param] = kwargs[param]
|
|
123
|
+
|
|
124
|
+
# try:
|
|
125
|
+
# response = self.session.post(
|
|
126
|
+
# endpoint,
|
|
127
|
+
# params=params,
|
|
128
|
+
# json=payload,
|
|
129
|
+
# timeout=self.timeout,
|
|
130
|
+
# verify=self.verify
|
|
131
|
+
# )
|
|
132
|
+
|
|
133
|
+
# if not response.ok:
|
|
134
|
+
# raise APIError(f"API request failed: {response.status_code} {response.reason}")
|
|
135
|
+
# return response.json()
|
|
136
|
+
# except requests.RequestException as e:
|
|
137
|
+
# raise APIError(f"Request failed: {str(e)}")
|
|
138
|
+
|
|
86
139
|
def create_dataset(self, name: str, collection: str = "terrakio-datasets", **kwargs) -> Dict[str, Any]:
|
|
87
140
|
"""
|
|
88
141
|
Create a new dataset.
|
|
89
|
-
|
|
142
|
+
|
|
90
143
|
Args:
|
|
91
144
|
name: Name of the dataset (required)
|
|
92
145
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
@@ -103,24 +156,23 @@ class DatasetManagement:
|
|
|
103
156
|
- proj4: Projection string
|
|
104
157
|
- abstract: Dataset abstract
|
|
105
158
|
- geotransform: Geotransform parameters
|
|
106
|
-
|
|
159
|
+
- padding: Padding value
|
|
160
|
+
|
|
107
161
|
Returns:
|
|
108
162
|
Created dataset information
|
|
109
|
-
|
|
163
|
+
|
|
110
164
|
Raises:
|
|
111
165
|
APIError: If the API request fails
|
|
112
166
|
"""
|
|
113
167
|
endpoint = f"{self.api_url}/datasets"
|
|
114
168
|
params = {"collection": collection}
|
|
115
|
-
# Create payload with required name parameter
|
|
116
169
|
payload = {"name": name}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
"no_data", "l_max", "y_size", "x_size", "proj4", "abstract", "geotransform", "input"]:
|
|
170
|
+
|
|
171
|
+
for param in ["products", "dates_iso8601", "bucket", "path", "data_type",
|
|
172
|
+
"no_data", "l_max", "y_size", "x_size", "proj4", "abstract", "geotransform", "input", "padding"]:
|
|
121
173
|
if param in kwargs:
|
|
122
174
|
payload[param] = kwargs[param]
|
|
123
|
-
|
|
175
|
+
|
|
124
176
|
try:
|
|
125
177
|
response = self.session.post(
|
|
126
178
|
endpoint,
|
|
@@ -129,7 +181,7 @@ class DatasetManagement:
|
|
|
129
181
|
timeout=self.timeout,
|
|
130
182
|
verify=self.verify
|
|
131
183
|
)
|
|
132
|
-
|
|
184
|
+
|
|
133
185
|
if not response.ok:
|
|
134
186
|
raise APIError(f"API request failed: {response.status_code} {response.reason}")
|
|
135
187
|
return response.json()
|
|
@@ -61,36 +61,89 @@ class MassStats:
|
|
|
61
61
|
return response
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
def download_file(self, job_name: str, bucket:str, file_name: str, output_path: str) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Download a file from mass_stats using job name and file name.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
job_name: Name of the job
|
|
70
|
+
file_name: Name of the file to download
|
|
71
|
+
output_path: Path where the file should be saved
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
str: Path to the downloaded file
|
|
75
|
+
"""
|
|
76
|
+
import os
|
|
77
|
+
from pathlib import Path
|
|
67
78
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
79
|
+
endpoint_url = f"{self.base_url}/mass_stats/download_files"
|
|
80
|
+
request_body = {
|
|
81
|
+
"job_name": job_name,
|
|
82
|
+
"bucket": bucket,
|
|
83
|
+
"file_name": file_name
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Get signed URL
|
|
88
|
+
response = self.session.post(
|
|
89
|
+
endpoint_url,
|
|
90
|
+
json=request_body,
|
|
91
|
+
verify=self.verify,
|
|
92
|
+
timeout=self.timeout
|
|
93
|
+
)
|
|
94
|
+
signed_url = response.json().get('download_url')
|
|
95
|
+
if not signed_url:
|
|
96
|
+
raise Exception("No download URL received from server")
|
|
97
|
+
print(f"Generated signed URL for download")
|
|
98
|
+
|
|
99
|
+
# Create output directory if it doesn't exist
|
|
100
|
+
output_dir = Path(output_path).parent
|
|
101
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
102
|
+
|
|
103
|
+
# Download the file using the signed URL
|
|
104
|
+
download_response = self.session.get(
|
|
105
|
+
signed_url,
|
|
106
|
+
verify=self.verify,
|
|
107
|
+
timeout=self.timeout,
|
|
108
|
+
stream=True # Stream for large files
|
|
109
|
+
)
|
|
110
|
+
download_response.raise_for_status()
|
|
111
|
+
|
|
112
|
+
# Check if file exists in the response (content-length header)
|
|
113
|
+
content_length = download_response.headers.get('content-length')
|
|
114
|
+
if content_length and int(content_length) == 0:
|
|
115
|
+
raise Exception("File appears to be empty")
|
|
71
116
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
# response = requests.get(
|
|
78
|
-
# url,
|
|
79
|
-
# verify=self.verify,
|
|
80
|
-
# timeout=self.timeout
|
|
81
|
-
# )
|
|
82
|
-
# response.raise_for_status()
|
|
117
|
+
# Write the file
|
|
118
|
+
with open(output_path, 'wb') as file:
|
|
119
|
+
for chunk in download_response.iter_content(chunk_size=8192):
|
|
120
|
+
if chunk:
|
|
121
|
+
file.write(chunk)
|
|
83
122
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
123
|
+
# Verify file was written
|
|
124
|
+
if not os.path.exists(output_path):
|
|
125
|
+
raise Exception(f"File was not written to {output_path}")
|
|
126
|
+
|
|
127
|
+
file_size = os.path.getsize(output_path)
|
|
128
|
+
print(f"File downloaded successfully to {output_path} (size: {file_size / (1024 * 1024):.4f} mb)")
|
|
129
|
+
|
|
130
|
+
return output_path
|
|
89
131
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
132
|
+
except self.session.exceptions.RequestException as e:
|
|
133
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
134
|
+
error_detail = e.response.text
|
|
135
|
+
raise Exception(f"Error getting signed URL: {e}. Details: {error_detail}")
|
|
136
|
+
raise Exception(f"Error in download process: {e}")
|
|
137
|
+
except IOError as e:
|
|
138
|
+
raise Exception(f"Error writing file to {output_path}: {e}")
|
|
139
|
+
except Exception as e:
|
|
140
|
+
# Clean up partial file if it exists
|
|
141
|
+
if os.path.exists(output_path):
|
|
142
|
+
try:
|
|
143
|
+
os.remove(output_path)
|
|
144
|
+
except:
|
|
145
|
+
pass
|
|
146
|
+
raise
|
|
94
147
|
|
|
95
148
|
|
|
96
149
|
|
|
@@ -152,53 +205,7 @@ class MassStats:
|
|
|
152
205
|
timeout=self.timeout
|
|
153
206
|
)
|
|
154
207
|
return response.json()
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
# def construct_download_url(
|
|
158
|
-
# self,
|
|
159
|
-
# name: str,
|
|
160
|
-
# output: str,
|
|
161
|
-
# region: Optional[str] = None,
|
|
162
|
-
# ) -> Dict[str, Any]:
|
|
163
|
-
# """
|
|
164
|
-
# Request a signed download URL for a file.
|
|
165
|
-
|
|
166
|
-
# Args:
|
|
167
|
-
# name: job name
|
|
168
|
-
# file_type: Type of file to download (e.g., "output", "manifest", "log")
|
|
169
|
-
# region: Region where the file is stored
|
|
170
|
-
|
|
171
|
-
# Returns:
|
|
172
|
-
# Dict containing download_url and file metadata
|
|
173
|
-
# """
|
|
174
|
-
# url = f"{self.base_url}/mass_stats/download"
|
|
175
|
-
|
|
176
|
-
# data = {
|
|
177
|
-
# "name": name,
|
|
178
|
-
# "output": output
|
|
179
|
-
# }
|
|
180
|
-
|
|
181
|
-
# if region is not None:
|
|
182
|
-
# data["region"] = region
|
|
183
|
-
|
|
184
|
-
# response = self.session.post(
|
|
185
|
-
# url,
|
|
186
|
-
# json=data,
|
|
187
|
-
# verify=self.verify,
|
|
188
|
-
# timeout=self.timeout
|
|
189
|
-
# )
|
|
190
|
-
|
|
191
|
-
# return response.json()
|
|
192
|
-
|
|
193
208
|
|
|
194
|
-
# def testdownload(
|
|
195
|
-
# self,
|
|
196
|
-
# name: str,
|
|
197
|
-
# region: str,
|
|
198
|
-
# output: str,
|
|
199
|
-
# ):
|
|
200
|
-
# upload_result = self.construct_download_url(name, region, output)
|
|
201
|
-
# return upload_result
|
|
202
209
|
|
|
203
210
|
|
|
204
211
|
|
|
@@ -286,7 +293,7 @@ class MassStats:
|
|
|
286
293
|
if uid is not None:
|
|
287
294
|
url += f"&uid={uid}"
|
|
288
295
|
response = self.session.get(url, verify=self.verify, timeout=self.timeout)
|
|
289
|
-
print("response text is ", response.text)
|
|
296
|
+
#print("response text is ", response.text)
|
|
290
297
|
return response.json()
|
|
291
298
|
|
|
292
299
|
def track_job(self, ids: Optional[list] = None) -> Dict[str, Any]:
|
|
@@ -491,6 +498,7 @@ class MassStats:
|
|
|
491
498
|
|
|
492
499
|
|
|
493
500
|
|
|
501
|
+
|
|
494
502
|
|
|
495
503
|
|
|
496
504
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Author-email: Yupeng Chao <yupeng@haizea.com.au>
|
|
6
6
|
Project-URL: Homepage, https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
@@ -22,6 +22,7 @@ Requires-Dist: xarray>=2023.1.0
|
|
|
22
22
|
Requires-Dist: shapely>=2.0.0
|
|
23
23
|
Requires-Dist: geopandas>=0.13.0
|
|
24
24
|
Requires-Dist: google-cloud-storage>=2.0.0
|
|
25
|
+
Requires-Dist: nest_asyncio
|
|
25
26
|
|
|
26
27
|
# Terrakio Core
|
|
27
28
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|