terrakio-core 0.4.96__tar.gz → 0.4.98__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/PKG-INFO +1 -1
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/pyproject.toml +1 -1
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/zonal_stats.py +114 -4
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/dataset_management.py +12 -4
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/.gitignore +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/README.md +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/accessors.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/async_client.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/client.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/config.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/create_dataset_file.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/geoquries.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/auth.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/group_management.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/mass_stats.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/model_management.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/space_management.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/user_management.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/exceptions.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/helper/bounded_taskgroup.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/helper/decorators.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/helper/tiles.py +0 -0
- {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/sync_client.py +0 -0
{terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/zonal_stats.py
RENAMED
|
@@ -9,18 +9,18 @@ from typing import Optional
|
|
|
9
9
|
# Third-party library imports
|
|
10
10
|
import aiohttp
|
|
11
11
|
import geopandas as gpd
|
|
12
|
-
import nest_asyncio
|
|
13
12
|
import pandas as pd
|
|
14
13
|
import pyproj
|
|
15
14
|
import xarray as xr
|
|
16
15
|
from geopandas import GeoDataFrame
|
|
17
16
|
from shapely.geometry import box, mapping, shape
|
|
18
17
|
from shapely.ops import transform
|
|
18
|
+
import threading
|
|
19
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
19
20
|
|
|
20
21
|
# Local imports
|
|
21
22
|
from .geoquries import request_geoquery_list
|
|
22
23
|
|
|
23
|
-
nest_asyncio.apply()
|
|
24
24
|
class cloud_object(gpd.GeoDataFrame):
|
|
25
25
|
"""
|
|
26
26
|
This class is a class used for cloud
|
|
@@ -36,12 +36,51 @@ class cloud_object(gpd.GeoDataFrame):
|
|
|
36
36
|
self.client = client
|
|
37
37
|
self.job_name = job_name
|
|
38
38
|
|
|
39
|
+
def __repr__(self):
|
|
40
|
+
return (
|
|
41
|
+
f"<CloudZonalStats job_id='{self.job_id}', job_name='{self.job_name}'>\n"
|
|
42
|
+
f"Call .head(n) to fetch a preview GeoDataFrame when the job completes."
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def _repr_html_(self):
|
|
46
|
+
# Jupyter HTML-friendly representation to avoid auto-rendering an empty DataFrame
|
|
47
|
+
return (
|
|
48
|
+
f"<div style='font-family:system-ui,Segoe UI,Helvetica,Arial,sans-serif'>"
|
|
49
|
+
f"<strong>Cloud Zonal Stats</strong><br/>"
|
|
50
|
+
f"job_id: <code>{self.job_id}</code><br/>"
|
|
51
|
+
f"job_name: <code>{self.job_name}</code><br/>"
|
|
52
|
+
f"<em>Use <code>.head(n)</code> to retrieve a preview once the job is completed.</em>"
|
|
53
|
+
f"</div>"
|
|
54
|
+
)
|
|
55
|
+
|
|
39
56
|
def head(self, n = 5):
|
|
40
57
|
"""
|
|
41
58
|
Returns the first n files stored in the cloud bucket.
|
|
42
59
|
"""
|
|
43
|
-
|
|
60
|
+
# Detect if we're inside an existing event loop (e.g., Jupyter)
|
|
61
|
+
in_running_loop = False
|
|
62
|
+
try:
|
|
63
|
+
asyncio.get_running_loop()
|
|
64
|
+
in_running_loop = True
|
|
65
|
+
except RuntimeError:
|
|
66
|
+
in_running_loop = False
|
|
44
67
|
|
|
68
|
+
if in_running_loop:
|
|
69
|
+
# Run the async function in a separate thread with its own loop
|
|
70
|
+
def run_async_in_thread():
|
|
71
|
+
new_loop = asyncio.new_event_loop()
|
|
72
|
+
try:
|
|
73
|
+
return new_loop.run_until_complete(self._head_async(n))
|
|
74
|
+
finally:
|
|
75
|
+
new_loop.close()
|
|
76
|
+
|
|
77
|
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
78
|
+
future = executor.submit(run_async_in_thread)
|
|
79
|
+
return future.result()
|
|
80
|
+
else:
|
|
81
|
+
# No running loop - safe to use asyncio.run
|
|
82
|
+
return asyncio.run(self._head_async(n))
|
|
83
|
+
|
|
45
84
|
async def _head_async(self, n = 5):
|
|
46
85
|
"""
|
|
47
86
|
Returns the first n files stored in the cloud bucket.
|
|
@@ -120,12 +159,54 @@ class cloud_object(gpd.GeoDataFrame):
|
|
|
120
159
|
})
|
|
121
160
|
|
|
122
161
|
self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
|
|
162
|
+
|
|
163
|
+
# Derive id values from json metadata (prefer 'file', fallback to 'group')
|
|
164
|
+
id_values = []
|
|
165
|
+
for i in range(min_length):
|
|
166
|
+
entry = json_data[i] if i < len(json_data) else {}
|
|
167
|
+
id_candidate = entry.get('file') or entry.get('group') or ''
|
|
168
|
+
if isinstance(id_candidate, str) and id_candidate.startswith('file_'):
|
|
169
|
+
id_val = id_candidate[len('file_'):]
|
|
170
|
+
elif isinstance(id_candidate, str) and id_candidate.startswith('group_'):
|
|
171
|
+
id_val = id_candidate[len('group_'):]
|
|
172
|
+
else:
|
|
173
|
+
id_val = str(id_candidate) if id_candidate else str(i)
|
|
174
|
+
id_values.append(id_val)
|
|
175
|
+
|
|
176
|
+
# Geometry to id mapping using WKB to avoid precision issues
|
|
177
|
+
geom_to_id = {geometries[i].wkb: id_values[i] for i in range(min_length)}
|
|
178
|
+
|
|
123
179
|
try:
|
|
124
180
|
expanded_gdf = expand_on_variables_and_time(gdf)
|
|
181
|
+
|
|
182
|
+
# Attach id as first index level, geometry second, time third if present
|
|
183
|
+
if hasattr(expanded_gdf.index, 'names') and 'geometry' in expanded_gdf.index.names:
|
|
184
|
+
if isinstance(expanded_gdf.index, pd.MultiIndex):
|
|
185
|
+
geometry_index = expanded_gdf.index.get_level_values('geometry')
|
|
186
|
+
else:
|
|
187
|
+
geometry_index = expanded_gdf.index
|
|
188
|
+
id_col = [geom_to_id.get(geom.wkb) for geom in geometry_index]
|
|
189
|
+
expanded_gdf['id'] = id_col
|
|
190
|
+
expanded_gdf = expanded_gdf.reset_index()
|
|
191
|
+
if 'time' in expanded_gdf.columns:
|
|
192
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
|
|
193
|
+
else:
|
|
194
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
|
|
195
|
+
else:
|
|
196
|
+
# geometry exists as a column
|
|
197
|
+
id_col = [geom_to_id.get(geom.wkb) for geom in expanded_gdf['geometry']]
|
|
198
|
+
expanded_gdf['id'] = id_col
|
|
199
|
+
if 'time' in expanded_gdf.columns:
|
|
200
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
|
|
201
|
+
else:
|
|
202
|
+
expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
|
|
203
|
+
|
|
125
204
|
return expanded_gdf
|
|
126
205
|
except NameError:
|
|
127
206
|
self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
|
|
128
|
-
|
|
207
|
+
# Set id on raw gdf and index appropriately
|
|
208
|
+
gdf['id'] = id_values
|
|
209
|
+
return gdf.set_index(['id', 'geometry'])
|
|
129
210
|
|
|
130
211
|
else:
|
|
131
212
|
self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
|
|
@@ -513,6 +594,7 @@ async def zonal_stats(
|
|
|
513
594
|
job_name = await client.mass_stats.track_job([mass_stats_id])
|
|
514
595
|
job_name = job_name[mass_stats_id]["name"]
|
|
515
596
|
cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
|
|
597
|
+
|
|
516
598
|
return cloud_files_object
|
|
517
599
|
|
|
518
600
|
quries = []
|
|
@@ -539,5 +621,33 @@ async def zonal_stats(
|
|
|
539
621
|
"is_cloud_backed": False,
|
|
540
622
|
}
|
|
541
623
|
gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
|
|
624
|
+
|
|
625
|
+
# If an id_column is provided, attach it to the result and include in the index
|
|
626
|
+
if id_column is not None and id_column in gdf.columns:
|
|
627
|
+
# Build a mapping from input geometries to id values (use WKB for robust equality)
|
|
628
|
+
geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
|
|
629
|
+
|
|
630
|
+
# Determine geometry values in the result (index may be geometry or (geometry, time))
|
|
631
|
+
if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
|
|
632
|
+
if isinstance(gdf_with_datasets.index, pd.MultiIndex):
|
|
633
|
+
geometry_index = gdf_with_datasets.index.get_level_values('geometry')
|
|
634
|
+
else:
|
|
635
|
+
geometry_index = gdf_with_datasets.index
|
|
636
|
+
id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
|
|
637
|
+
gdf_with_datasets[id_column] = id_values
|
|
638
|
+
# Reset index to control index composition precisely, then set to desired levels
|
|
639
|
+
gdf_with_datasets = gdf_with_datasets.reset_index()
|
|
640
|
+
if 'time' in gdf_with_datasets.columns:
|
|
641
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
|
|
642
|
+
else:
|
|
643
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
|
|
644
|
+
else:
|
|
645
|
+
# geometry exists as a column
|
|
646
|
+
id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
|
|
647
|
+
gdf_with_datasets[id_column] = id_values
|
|
648
|
+
if 'time' in gdf_with_datasets.columns:
|
|
649
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
|
|
650
|
+
else:
|
|
651
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
|
|
542
652
|
return gdf_with_datasets
|
|
543
653
|
|
|
@@ -69,7 +69,7 @@ class DatasetManagement:
|
|
|
69
69
|
name: Name of the dataset (required)
|
|
70
70
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
71
71
|
products: List of products
|
|
72
|
-
dates_iso8601: List of dates
|
|
72
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
73
73
|
bucket: Storage bucket
|
|
74
74
|
path: Storage path
|
|
75
75
|
data_type: Data type
|
|
@@ -142,7 +142,7 @@ class DatasetManagement:
|
|
|
142
142
|
append: Whether to append data or replace (default: True)
|
|
143
143
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
144
144
|
products: List of products
|
|
145
|
-
dates_iso8601: List of dates
|
|
145
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
146
146
|
bucket: Storage bucket
|
|
147
147
|
path: Storage path
|
|
148
148
|
data_type: Data type
|
|
@@ -162,6 +162,10 @@ class DatasetManagement:
|
|
|
162
162
|
Raises:
|
|
163
163
|
APIError: If the API request fails
|
|
164
164
|
"""
|
|
165
|
+
# Sort dates_iso8601 chronologically if provided
|
|
166
|
+
if dates_iso8601 is not None:
|
|
167
|
+
dates_iso8601 = sorted(dates_iso8601)
|
|
168
|
+
|
|
165
169
|
params = {"collection": collection, "append": str(append).lower()}
|
|
166
170
|
payload = {"name": name}
|
|
167
171
|
param_mapping = {
|
|
@@ -215,7 +219,7 @@ class DatasetManagement:
|
|
|
215
219
|
append: Whether to append data or replace (default: True)
|
|
216
220
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
217
221
|
products: List of products
|
|
218
|
-
dates_iso8601: List of dates
|
|
222
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
219
223
|
bucket: Storage bucket
|
|
220
224
|
path: Storage path
|
|
221
225
|
data_type: Data type
|
|
@@ -236,6 +240,10 @@ class DatasetManagement:
|
|
|
236
240
|
Raises:
|
|
237
241
|
APIError: If the API request fails
|
|
238
242
|
"""
|
|
243
|
+
# Sort dates_iso8601 chronologically if provided
|
|
244
|
+
if dates_iso8601 is not None:
|
|
245
|
+
dates_iso8601 = sorted(dates_iso8601)
|
|
246
|
+
|
|
239
247
|
params = {"collection": collection, "append": str(append).lower()}
|
|
240
248
|
payload = {"name": name}
|
|
241
249
|
param_mapping = {
|
|
@@ -289,7 +297,7 @@ class DatasetManagement:
|
|
|
289
297
|
name: Name of the dataset (required)
|
|
290
298
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
291
299
|
products: List of products
|
|
292
|
-
dates_iso8601: List of dates
|
|
300
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
293
301
|
bucket: Storage bucket
|
|
294
302
|
path: Storage path
|
|
295
303
|
data_type: Data type
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/geoquries.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|