terrakio-core 0.4.3__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/__init__.py +3 -1
- terrakio_core/accessors.py +477 -0
- terrakio_core/async_client.py +23 -38
- terrakio_core/client.py +83 -84
- terrakio_core/convenience_functions/convenience_functions.py +316 -324
- terrakio_core/endpoints/auth.py +8 -1
- terrakio_core/endpoints/mass_stats.py +13 -9
- terrakio_core/endpoints/model_management.py +604 -948
- terrakio_core/sync_client.py +341 -33
- {terrakio_core-0.4.3.dist-info → terrakio_core-0.4.4.dist-info}/METADATA +2 -1
- terrakio_core-0.4.4.dist-info/RECORD +22 -0
- terrakio_core-0.4.3.dist-info/RECORD +0 -21
- {terrakio_core-0.4.3.dist-info → terrakio_core-0.4.4.dist-info}/WHEEL +0 -0
- {terrakio_core-0.4.3.dist-info → terrakio_core-0.4.4.dist-info}/top_level.txt +0 -0
terrakio_core/__init__.py
CHANGED
|
@@ -5,10 +5,12 @@ Terrakio Core
|
|
|
5
5
|
Core components for Terrakio API clients.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.4.
|
|
8
|
+
__version__ = "0.4.4"
|
|
9
9
|
|
|
10
10
|
from .async_client import AsyncClient
|
|
11
11
|
from .sync_client import SyncClient as Client
|
|
12
|
+
from . import accessors
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
__all__ = [
|
|
14
16
|
"AsyncClient",
|
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import geopandas as gpd
|
|
3
|
+
import xarray as xr
|
|
4
|
+
import numpy as np
|
|
5
|
+
from typing import Optional, Union, List
|
|
6
|
+
|
|
7
|
+
@pd.api.extensions.register_dataframe_accessor("geo")
|
|
8
|
+
class GeoXarrayAccessor:
|
|
9
|
+
"""
|
|
10
|
+
Custom accessor for GeoDataFrames containing xarray datasets or dataarrays.
|
|
11
|
+
Handles both direct xarray objects and lists containing xarray objects.
|
|
12
|
+
Can aggregate across time when time dimension has been expanded into the index.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, pandas_obj):
|
|
16
|
+
self._obj = pandas_obj
|
|
17
|
+
self._validate()
|
|
18
|
+
|
|
19
|
+
def _validate(self):
|
|
20
|
+
"""Validate that the DataFrame has the expected structure."""
|
|
21
|
+
if not isinstance(self._obj, gpd.GeoDataFrame):
|
|
22
|
+
raise AttributeError("Can only use .geo accessor with GeoDataFrames")
|
|
23
|
+
|
|
24
|
+
# Check for columns with xarray data (including lists containing xarray objects)
|
|
25
|
+
self._xarray_columns = []
|
|
26
|
+
for col in self._obj.columns:
|
|
27
|
+
if col != 'geometry':
|
|
28
|
+
sample_value = self._obj[col].iloc[0] if len(self._obj) > 0 else None
|
|
29
|
+
|
|
30
|
+
# Check if it's directly an xarray object
|
|
31
|
+
if isinstance(sample_value, (xr.Dataset, xr.DataArray)):
|
|
32
|
+
self._xarray_columns.append(col)
|
|
33
|
+
# Check if it's a list containing xarray objects
|
|
34
|
+
elif isinstance(sample_value, list) and len(sample_value) > 0:
|
|
35
|
+
if isinstance(sample_value[0], (xr.Dataset, xr.DataArray)):
|
|
36
|
+
self._xarray_columns.append(col)
|
|
37
|
+
|
|
38
|
+
if not self._xarray_columns:
|
|
39
|
+
raise AttributeError("No xarray Dataset or DataArray columns found")
|
|
40
|
+
|
|
41
|
+
def _extract_xarray_object(self, value):
|
|
42
|
+
"""Extract xarray object from various formats (direct object, list, etc.)."""
|
|
43
|
+
if isinstance(value, (xr.Dataset, xr.DataArray)):
|
|
44
|
+
return value
|
|
45
|
+
elif isinstance(value, list) and len(value) > 0:
|
|
46
|
+
if isinstance(value[0], (xr.Dataset, xr.DataArray)):
|
|
47
|
+
return value[0] # Take the first item from the list
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
def _get_target_columns(self, columns: Optional[List[str]] = None) -> List[str]:
|
|
51
|
+
"""
|
|
52
|
+
Get the list of columns to operate on.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
columns: List of column names to operate on. If None, uses all xarray columns.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
List of column names to operate on
|
|
59
|
+
"""
|
|
60
|
+
if columns is None:
|
|
61
|
+
return self._xarray_columns
|
|
62
|
+
|
|
63
|
+
# Validate that specified columns exist and contain xarray data
|
|
64
|
+
invalid_columns = [col for col in columns if col not in self._xarray_columns]
|
|
65
|
+
if invalid_columns:
|
|
66
|
+
raise ValueError(f"Columns {invalid_columns} are not valid xarray columns. "
|
|
67
|
+
f"Available xarray columns: {self._xarray_columns}")
|
|
68
|
+
|
|
69
|
+
return columns
|
|
70
|
+
|
|
71
|
+
def _should_aggregate_by_geometry(self, dim: Optional[Union[str, List[str]]] = None) -> bool:
|
|
72
|
+
"""
|
|
73
|
+
Determine if we should aggregate by geometry (i.e., time dimension was expanded to index).
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
dim: Dimension(s) being reduced over
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
True if we should group by geometry and aggregate across time rows
|
|
80
|
+
"""
|
|
81
|
+
if dim is None:
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
dims_to_reduce = [dim] if isinstance(dim, str) else dim
|
|
85
|
+
|
|
86
|
+
# Check if 'time' is in the dimensions to reduce and if we have a MultiIndex with time
|
|
87
|
+
if 'time' in dims_to_reduce:
|
|
88
|
+
if hasattr(self._obj.index, 'names') and self._obj.index.names:
|
|
89
|
+
# Check if time is one of the index levels
|
|
90
|
+
return 'time' in self._obj.index.names
|
|
91
|
+
|
|
92
|
+
return False
|
|
93
|
+
|
|
94
|
+
def _get_geometry_level_name(self) -> Optional[str]:
|
|
95
|
+
"""Get the name of the geometry level in the MultiIndex."""
|
|
96
|
+
if hasattr(self._obj.index, 'names') and self._obj.index.names:
|
|
97
|
+
# Look for the level that's not 'time' - this should be the geometry level
|
|
98
|
+
for name in self._obj.index.names:
|
|
99
|
+
if name != 'time':
|
|
100
|
+
return name
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
def _apply_reduction(self, reduction_func: str, dim: Optional[Union[str, List[str]]] = None,
|
|
104
|
+
columns: Optional[List[str]] = None, **kwargs):
|
|
105
|
+
"""
|
|
106
|
+
Apply a reduction function to specified xarray datasets/dataarrays in the GeoDataFrame.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
reduction_func: Name of the xarray reduction method (e.g., 'mean', 'sum', 'std')
|
|
110
|
+
dim: Dimension(s) to reduce over. If None, reduces over all dimensions
|
|
111
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
112
|
+
**kwargs: Additional arguments to pass to the reduction function
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
GeoDataFrame with reduced xarray data
|
|
116
|
+
"""
|
|
117
|
+
target_columns = self._get_target_columns(columns)
|
|
118
|
+
|
|
119
|
+
# Check if we need to aggregate by geometry (time dimension expanded to index)
|
|
120
|
+
if self._should_aggregate_by_geometry(dim):
|
|
121
|
+
return self._apply_temporal_aggregation(reduction_func, dim, target_columns, **kwargs)
|
|
122
|
+
else:
|
|
123
|
+
return self._apply_spatial_reduction(reduction_func, dim, target_columns, **kwargs)
|
|
124
|
+
|
|
125
|
+
def _apply_temporal_aggregation(self, reduction_func: str, dim: Union[str, List[str]],
|
|
126
|
+
target_columns: List[str], **kwargs):
|
|
127
|
+
"""
|
|
128
|
+
Apply aggregation across time by grouping by geometry.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
reduction_func: Name of the reduction method
|
|
132
|
+
dim: Dimension(s) being reduced (should include 'time')
|
|
133
|
+
target_columns: Columns to operate on
|
|
134
|
+
**kwargs: Additional arguments
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
GeoDataFrame with time-aggregated data
|
|
138
|
+
"""
|
|
139
|
+
geometry_level = self._get_geometry_level_name()
|
|
140
|
+
if geometry_level is None:
|
|
141
|
+
raise ValueError("Could not identify geometry level in MultiIndex")
|
|
142
|
+
|
|
143
|
+
# Check if specific columns were requested for time aggregation
|
|
144
|
+
if target_columns != self._xarray_columns:
|
|
145
|
+
print("Warning: Cannot aggregate time on a single column. Aggregating all xarray columns instead.")
|
|
146
|
+
target_columns = self._xarray_columns
|
|
147
|
+
|
|
148
|
+
# Group by geometry level
|
|
149
|
+
grouped = self._obj.groupby(level=geometry_level)
|
|
150
|
+
|
|
151
|
+
result_data = []
|
|
152
|
+
result_geometries = []
|
|
153
|
+
result_index = []
|
|
154
|
+
|
|
155
|
+
for geometry_key, group in grouped:
|
|
156
|
+
# For each geometry, collect all xarray objects across time
|
|
157
|
+
# The geometry is the group key itself (from the MultiIndex)
|
|
158
|
+
new_row = {}
|
|
159
|
+
|
|
160
|
+
for col in target_columns:
|
|
161
|
+
xarray_objects = []
|
|
162
|
+
|
|
163
|
+
# Collect all xarray objects for this geometry across different times
|
|
164
|
+
for _, row in group.iterrows():
|
|
165
|
+
xr_data = self._extract_xarray_object(row[col])
|
|
166
|
+
if xr_data is not None:
|
|
167
|
+
xarray_objects.append(xr_data)
|
|
168
|
+
|
|
169
|
+
if xarray_objects:
|
|
170
|
+
try:
|
|
171
|
+
# Concatenate along a new 'time' dimension
|
|
172
|
+
if isinstance(xarray_objects[0], xr.DataArray):
|
|
173
|
+
# Create time coordinate
|
|
174
|
+
time_coords = list(range(len(xarray_objects)))
|
|
175
|
+
concatenated = xr.concat(xarray_objects, dim='time')
|
|
176
|
+
concatenated = concatenated.assign_coords(time=time_coords)
|
|
177
|
+
elif isinstance(xarray_objects[0], xr.Dataset):
|
|
178
|
+
time_coords = list(range(len(xarray_objects)))
|
|
179
|
+
concatenated = xr.concat(xarray_objects, dim='time')
|
|
180
|
+
concatenated = concatenated.assign_coords(time=time_coords)
|
|
181
|
+
else:
|
|
182
|
+
raise TypeError(f"Unsupported xarray type: {type(xarray_objects[0])}")
|
|
183
|
+
|
|
184
|
+
# Apply the reduction function over the time dimension
|
|
185
|
+
if hasattr(concatenated, reduction_func):
|
|
186
|
+
if 'skipna' not in kwargs and reduction_func in ['mean', 'sum', 'std', 'var', 'min', 'max', 'median', 'quantile']:
|
|
187
|
+
kwargs['skipna'] = True
|
|
188
|
+
|
|
189
|
+
reduced_data = getattr(concatenated, reduction_func)(dim='time', **kwargs)
|
|
190
|
+
|
|
191
|
+
# Check if result should be converted to scalar
|
|
192
|
+
if isinstance(reduced_data, xr.DataArray) and reduced_data.size == 1:
|
|
193
|
+
try:
|
|
194
|
+
scalar_value = float(reduced_data.values)
|
|
195
|
+
reduced_data = scalar_value
|
|
196
|
+
except (ValueError, TypeError):
|
|
197
|
+
pass
|
|
198
|
+
elif isinstance(reduced_data, xr.Dataset) and len(reduced_data.dims) == 0:
|
|
199
|
+
try:
|
|
200
|
+
vars_list = list(reduced_data.data_vars.keys())
|
|
201
|
+
if len(vars_list) == 1:
|
|
202
|
+
var_name = vars_list[0]
|
|
203
|
+
scalar_value = float(reduced_data[var_name].values)
|
|
204
|
+
reduced_data = scalar_value
|
|
205
|
+
except (ValueError, TypeError, KeyError):
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
# Maintain original format (list vs direct)
|
|
209
|
+
original_format = group[col].iloc[0]
|
|
210
|
+
if isinstance(original_format, list):
|
|
211
|
+
new_row[col] = [reduced_data]
|
|
212
|
+
else:
|
|
213
|
+
new_row[col] = reduced_data
|
|
214
|
+
else:
|
|
215
|
+
raise AttributeError(f"'{type(concatenated).__name__}' object has no attribute '{reduction_func}'")
|
|
216
|
+
|
|
217
|
+
except Exception as e:
|
|
218
|
+
print(f"Warning: Could not apply {reduction_func} to geometry {geometry_key}, column {col}: {e}")
|
|
219
|
+
# Keep the first value as fallback
|
|
220
|
+
new_row[col] = group[col].iloc[0]
|
|
221
|
+
else:
|
|
222
|
+
# No xarray data found, keep first value
|
|
223
|
+
new_row[col] = group[col].iloc[0]
|
|
224
|
+
|
|
225
|
+
result_data.append(new_row)
|
|
226
|
+
result_geometries.append(geometry_key)
|
|
227
|
+
result_index.append(geometry_key)
|
|
228
|
+
|
|
229
|
+
# Create result GeoDataFrame
|
|
230
|
+
# Create a normal DataFrame with just the data columns
|
|
231
|
+
result_df = pd.DataFrame(result_data, index=result_index)
|
|
232
|
+
|
|
233
|
+
# Add geometry as a temporary column
|
|
234
|
+
result_df['_temp_geom'] = result_geometries
|
|
235
|
+
|
|
236
|
+
# Convert to GeoDataFrame using the temporary geometry column
|
|
237
|
+
result_gdf = gpd.GeoDataFrame(result_df, geometry='_temp_geom')
|
|
238
|
+
|
|
239
|
+
# Drop the temporary geometry column (the geometry is now properly set as the active geometry)
|
|
240
|
+
result_gdf = result_gdf.drop(columns=['_temp_geom'])
|
|
241
|
+
|
|
242
|
+
result_gdf.index.name = geometry_level
|
|
243
|
+
|
|
244
|
+
return result_gdf
|
|
245
|
+
|
|
246
|
+
def _apply_spatial_reduction(self, reduction_func: str, dim: Optional[Union[str, List[str]]],
|
|
247
|
+
target_columns: List[str], **kwargs):
|
|
248
|
+
"""
|
|
249
|
+
Apply reduction to spatial dimensions within each xarray object.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
reduction_func: Name of the reduction method
|
|
253
|
+
dim: Spatial dimension(s) to reduce over
|
|
254
|
+
target_columns: Columns to operate on
|
|
255
|
+
**kwargs: Additional arguments
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
GeoDataFrame with spatially reduced data
|
|
259
|
+
"""
|
|
260
|
+
result_gdf = self._obj.copy()
|
|
261
|
+
|
|
262
|
+
for col in target_columns:
|
|
263
|
+
new_data = []
|
|
264
|
+
for idx, row in self._obj.iterrows():
|
|
265
|
+
original_value = row[col]
|
|
266
|
+
xr_data = self._extract_xarray_object(original_value)
|
|
267
|
+
|
|
268
|
+
if xr_data is not None:
|
|
269
|
+
try:
|
|
270
|
+
# Apply the reduction function
|
|
271
|
+
if hasattr(xr_data, reduction_func):
|
|
272
|
+
# Ensure skipna=True is set by default for most reduction functions
|
|
273
|
+
if 'skipna' not in kwargs and reduction_func in ['mean', 'sum', 'std', 'var', 'min', 'max', 'median', 'quantile']:
|
|
274
|
+
kwargs['skipna'] = True
|
|
275
|
+
reduced_data = getattr(xr_data, reduction_func)(dim=dim, **kwargs)
|
|
276
|
+
|
|
277
|
+
# Check if the result is a scalar and convert to float if so
|
|
278
|
+
if isinstance(reduced_data, xr.DataArray):
|
|
279
|
+
if reduced_data.size == 1:
|
|
280
|
+
try:
|
|
281
|
+
scalar_value = float(reduced_data.values)
|
|
282
|
+
reduced_data = scalar_value
|
|
283
|
+
except (ValueError, TypeError):
|
|
284
|
+
pass
|
|
285
|
+
elif isinstance(reduced_data, xr.Dataset):
|
|
286
|
+
try:
|
|
287
|
+
if len(reduced_data.dims) == 0:
|
|
288
|
+
vars_list = list(reduced_data.data_vars.keys())
|
|
289
|
+
if len(vars_list) == 1:
|
|
290
|
+
var_name = vars_list[0]
|
|
291
|
+
scalar_value = float(reduced_data[var_name].values)
|
|
292
|
+
reduced_data = scalar_value
|
|
293
|
+
except (ValueError, TypeError, KeyError):
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
# Keep the same format as original (list vs direct)
|
|
297
|
+
if isinstance(original_value, list):
|
|
298
|
+
new_data.append([reduced_data])
|
|
299
|
+
else:
|
|
300
|
+
new_data.append(reduced_data)
|
|
301
|
+
else:
|
|
302
|
+
raise AttributeError(f"'{type(xr_data).__name__}' object has no attribute '{reduction_func}'")
|
|
303
|
+
except Exception as e:
|
|
304
|
+
# If reduction fails, keep original data
|
|
305
|
+
print(f"Warning: Could not apply {reduction_func} to row {idx}, column {col}: {e}")
|
|
306
|
+
new_data.append(original_value)
|
|
307
|
+
else:
|
|
308
|
+
# If it's not xarray data, keep as is
|
|
309
|
+
new_data.append(original_value)
|
|
310
|
+
|
|
311
|
+
result_gdf[col] = new_data
|
|
312
|
+
|
|
313
|
+
return result_gdf
|
|
314
|
+
|
|
315
|
+
def mean(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
316
|
+
"""
|
|
317
|
+
Calculate mean of xarray datasets/dataarrays.
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
321
|
+
If spatial dims like 'x', 'y', reduces within each xarray object.
|
|
322
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
323
|
+
**kwargs: Additional arguments for the reduction function
|
|
324
|
+
"""
|
|
325
|
+
return self._apply_reduction('mean', dim=dim, columns=columns, **kwargs)
|
|
326
|
+
|
|
327
|
+
def sum(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
328
|
+
"""
|
|
329
|
+
Calculate sum of xarray datasets/dataarrays.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
333
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
334
|
+
**kwargs: Additional arguments for the reduction function
|
|
335
|
+
"""
|
|
336
|
+
return self._apply_reduction('sum', dim=dim, columns=columns, **kwargs)
|
|
337
|
+
|
|
338
|
+
def std(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
339
|
+
"""
|
|
340
|
+
Calculate standard deviation of xarray datasets/dataarrays.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
344
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
345
|
+
**kwargs: Additional arguments for the reduction function
|
|
346
|
+
"""
|
|
347
|
+
return self._apply_reduction('std', dim=dim, columns=columns, **kwargs)
|
|
348
|
+
|
|
349
|
+
def var(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
350
|
+
"""
|
|
351
|
+
Calculate variance of xarray datasets/dataarrays.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
355
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
356
|
+
**kwargs: Additional arguments for the reduction function
|
|
357
|
+
"""
|
|
358
|
+
return self._apply_reduction('var', dim=dim, columns=columns, **kwargs)
|
|
359
|
+
|
|
360
|
+
def min(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
361
|
+
"""
|
|
362
|
+
Calculate minimum of xarray datasets/dataarrays.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
366
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
367
|
+
**kwargs: Additional arguments for the reduction function
|
|
368
|
+
"""
|
|
369
|
+
return self._apply_reduction('min', dim=dim, columns=columns, **kwargs)
|
|
370
|
+
|
|
371
|
+
def max(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
372
|
+
"""
|
|
373
|
+
Calculate maximum of xarray datasets/dataarrays.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
377
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
378
|
+
**kwargs: Additional arguments for the reduction function
|
|
379
|
+
"""
|
|
380
|
+
return self._apply_reduction('max', dim=dim, columns=columns, **kwargs)
|
|
381
|
+
|
|
382
|
+
def median(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
383
|
+
"""
|
|
384
|
+
Calculate median of xarray datasets/dataarrays.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
388
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
389
|
+
**kwargs: Additional arguments for the reduction function
|
|
390
|
+
"""
|
|
391
|
+
return self._apply_reduction('median', dim=dim, columns=columns, **kwargs)
|
|
392
|
+
|
|
393
|
+
def quantile(self, q: float, dim: Optional[Union[str, List[str]]] = None,
|
|
394
|
+
columns: Optional[List[str]] = None, **kwargs):
|
|
395
|
+
"""
|
|
396
|
+
Calculate quantile of xarray datasets/dataarrays.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
q: Quantile to compute (between 0 and 1)
|
|
400
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
401
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
402
|
+
**kwargs: Additional arguments for the reduction function
|
|
403
|
+
"""
|
|
404
|
+
return self._apply_reduction('quantile', dim=dim, columns=columns, q=q, **kwargs)
|
|
405
|
+
|
|
406
|
+
def count(self, dim: Optional[Union[str, List[str]]] = None, columns: Optional[List[str]] = None, **kwargs):
|
|
407
|
+
"""
|
|
408
|
+
Count non-NaN values in xarray datasets/dataarrays.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
dim: Dimension(s) to reduce over. If 'time', aggregates across time rows for each geometry.
|
|
412
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
413
|
+
**kwargs: Additional arguments for the reduction function
|
|
414
|
+
"""
|
|
415
|
+
return self._apply_reduction('count', dim=dim, columns=columns, **kwargs)
|
|
416
|
+
|
|
417
|
+
def to_values(self, columns: Optional[List[str]] = None):
|
|
418
|
+
"""
|
|
419
|
+
Extract scalar values from xarray dataarrays and add them as new columns.
|
|
420
|
+
Useful when dataarrays have been reduced to single values.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
columns: List of column names to operate on. If None, operates on all xarray columns
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
GeoDataFrame with extracted values as new columns
|
|
427
|
+
"""
|
|
428
|
+
result_gdf = self._obj.copy()
|
|
429
|
+
target_columns = self._get_target_columns(columns)
|
|
430
|
+
|
|
431
|
+
for col in target_columns:
|
|
432
|
+
values_to_add = []
|
|
433
|
+
for idx, row in self._obj.iterrows():
|
|
434
|
+
xr_data = self._extract_xarray_object(row[col])
|
|
435
|
+
if isinstance(xr_data, xr.DataArray):
|
|
436
|
+
try:
|
|
437
|
+
if xr_data.size == 1:
|
|
438
|
+
scalar_value = float(xr_data.values)
|
|
439
|
+
values_to_add.append(scalar_value)
|
|
440
|
+
else:
|
|
441
|
+
values_to_add.append(np.nan) # Can't convert non-scalar to value
|
|
442
|
+
except (ValueError, TypeError):
|
|
443
|
+
values_to_add.append(np.nan)
|
|
444
|
+
else:
|
|
445
|
+
values_to_add.append(np.nan)
|
|
446
|
+
|
|
447
|
+
# Add new column with scalar values
|
|
448
|
+
new_col_name = f"{col}_value"
|
|
449
|
+
result_gdf[new_col_name] = values_to_add
|
|
450
|
+
|
|
451
|
+
return result_gdf
|
|
452
|
+
|
|
453
|
+
def info(self):
|
|
454
|
+
"""Print information about xarray datasets/dataarrays in the GeoDataFrame."""
|
|
455
|
+
print(f"GeoDataFrame shape: {self._obj.shape}")
|
|
456
|
+
print(f"Columns: {list(self._obj.columns)}")
|
|
457
|
+
print(f"Xarray columns: {self._xarray_columns}")
|
|
458
|
+
print(f"Index structure: {self._obj.index.names if hasattr(self._obj.index, 'names') else 'Simple index'}")
|
|
459
|
+
print(f"Geometry column name: {self._obj.geometry.name if hasattr(self._obj.geometry, 'name') else 'No geometry name'}")
|
|
460
|
+
|
|
461
|
+
if hasattr(self._obj.index, 'names') and 'time' in self._obj.index.names:
|
|
462
|
+
print("Note: Time dimension appears to be expanded into the index.")
|
|
463
|
+
print("Use dim='time' to aggregate across time rows for each geometry.")
|
|
464
|
+
|
|
465
|
+
for col in self._xarray_columns:
|
|
466
|
+
print(f"\n--- Column: {col} ---")
|
|
467
|
+
sample_data = self._extract_xarray_object(self._obj[col].iloc[0]) if len(self._obj) > 0 else None
|
|
468
|
+
if isinstance(sample_data, xr.Dataset):
|
|
469
|
+
print(f"Type: xarray.Dataset")
|
|
470
|
+
print(f"Variables: {list(sample_data.data_vars.keys())}")
|
|
471
|
+
print(f"Dimensions: {list(sample_data.dims.keys())}")
|
|
472
|
+
print(f"Coordinates: {list(sample_data.coords.keys())}")
|
|
473
|
+
elif isinstance(sample_data, xr.DataArray):
|
|
474
|
+
print(f"Type: xarray.DataArray")
|
|
475
|
+
print(f"Dimensions: {list(sample_data.dims)}")
|
|
476
|
+
print(f"Shape: {sample_data.shape}")
|
|
477
|
+
print(f"Data type: {sample_data.dtype}")
|
terrakio_core/async_client.py
CHANGED
|
@@ -33,7 +33,6 @@ class AsyncClient(BaseClient):
|
|
|
33
33
|
self._owns_session = session is None
|
|
34
34
|
|
|
35
35
|
async def _terrakio_request(self, method: str, endpoint: str, **kwargs):
|
|
36
|
-
# if self.session is None:
|
|
37
36
|
if self.session is None:
|
|
38
37
|
headers = {
|
|
39
38
|
'Content-Type': 'application/json',
|
|
@@ -49,7 +48,6 @@ class AsyncClient(BaseClient):
|
|
|
49
48
|
async def _make_request_with_retry(self, session: aiohttp.ClientSession, method: str, endpoint: str, **kwargs) -> Dict[Any, Any]:
|
|
50
49
|
url = f"{self.url}/{endpoint.lstrip('/')}"
|
|
51
50
|
last_exception = None
|
|
52
|
-
|
|
53
51
|
for attempt in range(self.retry + 1):
|
|
54
52
|
try:
|
|
55
53
|
async with session.request(method, url, **kwargs) as response:
|
|
@@ -103,23 +101,38 @@ class AsyncClient(BaseClient):
|
|
|
103
101
|
return xr.open_dataset(BytesIO(content))
|
|
104
102
|
except:
|
|
105
103
|
raise APIError(f"Unknown response format: {content_type}", status_code=response.status)
|
|
106
|
-
|
|
104
|
+
|
|
107
105
|
async def _regular_request(self, method: str, endpoint: str, **kwargs):
|
|
108
106
|
url = endpoint.lstrip('/')
|
|
107
|
+
|
|
109
108
|
if self._session is None:
|
|
110
|
-
|
|
111
109
|
async with aiohttp.ClientSession() as session:
|
|
112
110
|
try:
|
|
113
111
|
async with session.request(method, url, **kwargs) as response:
|
|
114
112
|
response.raise_for_status()
|
|
115
|
-
|
|
113
|
+
|
|
114
|
+
content = await response.read()
|
|
115
|
+
|
|
116
|
+
return type('Response', (), {
|
|
117
|
+
'status': response.status,
|
|
118
|
+
'content': content,
|
|
119
|
+
'text': lambda: content.decode('utf-8'),
|
|
120
|
+
'json': lambda: json.loads(content.decode('utf-8'))
|
|
121
|
+
})()
|
|
116
122
|
except aiohttp.ClientError as e:
|
|
117
123
|
raise APIError(f"Request failed: {e}")
|
|
118
124
|
else:
|
|
119
125
|
try:
|
|
120
126
|
async with self._session.request(method, url, **kwargs) as response:
|
|
121
127
|
response.raise_for_status()
|
|
122
|
-
|
|
128
|
+
content = await response.read()
|
|
129
|
+
|
|
130
|
+
return type('Response', (), {
|
|
131
|
+
'status': response.status,
|
|
132
|
+
'content': content,
|
|
133
|
+
'text': lambda: content.decode('utf-8'),
|
|
134
|
+
'json': lambda: json.loads(content.decode('utf-8'))
|
|
135
|
+
})()
|
|
123
136
|
except aiohttp.ClientError as e:
|
|
124
137
|
raise APIError(f"Request failed: {e}")
|
|
125
138
|
|
|
@@ -170,23 +183,19 @@ class AsyncClient(BaseClient):
|
|
|
170
183
|
"validated": validated,
|
|
171
184
|
**kwargs
|
|
172
185
|
}
|
|
173
|
-
|
|
186
|
+
result = await self._terrakio_request("POST", "geoquery", json=payload)
|
|
187
|
+
|
|
188
|
+
return result
|
|
174
189
|
|
|
175
190
|
async def zonal_stats(
|
|
176
191
|
self,
|
|
177
192
|
gdf: GeoDataFrame,
|
|
178
193
|
expr: str,
|
|
179
194
|
conc: int = 20,
|
|
180
|
-
inplace: bool = False,
|
|
181
195
|
in_crs: str = "epsg:4326",
|
|
182
196
|
out_crs: str = "epsg:4326",
|
|
183
197
|
resolution: int = -1,
|
|
184
198
|
geom_fix: bool = False,
|
|
185
|
-
drop_nan: bool = False,
|
|
186
|
-
spatial_reduction: str = None,
|
|
187
|
-
temporal_reduction: str = None,
|
|
188
|
-
max_memory_mb: int = 500,
|
|
189
|
-
stream_to_disk: bool = False,
|
|
190
199
|
):
|
|
191
200
|
"""
|
|
192
201
|
Compute zonal statistics for all geometries in a GeoDataFrame.
|
|
@@ -195,22 +204,13 @@ class AsyncClient(BaseClient):
|
|
|
195
204
|
gdf (GeoDataFrame): GeoDataFrame containing geometries
|
|
196
205
|
expr (str): Terrakio expression to evaluate, can include spatial aggregations
|
|
197
206
|
conc (int): Number of concurrent requests to make
|
|
198
|
-
inplace (bool): Whether to modify the input GeoDataFrame in place
|
|
199
207
|
in_crs (str): Input coordinate reference system
|
|
200
208
|
out_crs (str): Output coordinate reference system
|
|
201
209
|
resolution (int): Resolution parameter
|
|
202
210
|
geom_fix (bool): Whether to fix the geometry (default False)
|
|
203
|
-
drop_nan (bool): Whether to drop NaN values from the results (default False)
|
|
204
|
-
spatial_reduction (str): Reduction operation for spatial dimensions (x, y).
|
|
205
|
-
Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
|
|
206
|
-
temporal_reduction (str): Reduction operation for temporal dimension (time).
|
|
207
|
-
Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
|
|
208
|
-
max_memory_mb (int): Maximum memory threshold in MB (default 500MB)
|
|
209
|
-
stream_to_disk (bool): Whether to stream datasets to disk as NetCDF files (default False)
|
|
210
211
|
|
|
211
212
|
Returns:
|
|
212
|
-
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
213
|
-
If stream_to_disk=True, large datasets are saved as NetCDF files with file paths stored.
|
|
213
|
+
geopandas.GeoDataFrame: GeoDataFrame with added columns for results
|
|
214
214
|
|
|
215
215
|
Raises:
|
|
216
216
|
ValueError: If concurrency is too high or if data exceeds memory limit without streaming
|
|
@@ -221,16 +221,10 @@ class AsyncClient(BaseClient):
|
|
|
221
221
|
gdf=gdf,
|
|
222
222
|
expr=expr,
|
|
223
223
|
conc=conc,
|
|
224
|
-
inplace=inplace,
|
|
225
224
|
in_crs=in_crs,
|
|
226
225
|
out_crs=out_crs,
|
|
227
226
|
resolution=resolution,
|
|
228
227
|
geom_fix=geom_fix,
|
|
229
|
-
drop_nan=drop_nan,
|
|
230
|
-
spatial_reduction=spatial_reduction,
|
|
231
|
-
temporal_reduction=temporal_reduction,
|
|
232
|
-
max_memory_mb=max_memory_mb,
|
|
233
|
-
stream_to_disk=stream_to_disk
|
|
234
228
|
)
|
|
235
229
|
|
|
236
230
|
async def create_dataset_file(
|
|
@@ -289,32 +283,23 @@ class AsyncClient(BaseClient):
|
|
|
289
283
|
)
|
|
290
284
|
|
|
291
285
|
async def __aenter__(self):
|
|
292
|
-
# if there is no session, we create a session
|
|
293
286
|
if self._session is None:
|
|
294
287
|
headers = {
|
|
295
288
|
'Content-Type': 'application/json',
|
|
296
289
|
'x-api-key': self.key,
|
|
297
290
|
'Authorization': self.token
|
|
298
291
|
}
|
|
299
|
-
# Remove None values from headers
|
|
300
292
|
clean_headers = {k: v for k, v in headers.items() if v is not None}
|
|
301
|
-
# we are creating the header and clean any value that is none
|
|
302
|
-
# now we create the session
|
|
303
293
|
self._session = aiohttp.ClientSession(
|
|
304
294
|
headers=clean_headers,
|
|
305
295
|
timeout=aiohttp.ClientTimeout(total=self.timeout)
|
|
306
296
|
)
|
|
307
297
|
return self
|
|
308
|
-
# if there is no session, we create a session
|
|
309
298
|
|
|
310
|
-
# now lets create the aexit function, this function is used when a user uses with, and this function will be automatically called when the with statement is done
|
|
311
299
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
312
|
-
# so if the session is not being passed in, and we created it by ourselves, we are responsible for closing the session
|
|
313
|
-
# if the session is being passed in, we are not responsible for closing the session
|
|
314
300
|
if self._owns_session and self._session:
|
|
315
301
|
await self._session.close()
|
|
316
302
|
self._session = None
|
|
317
|
-
# we close the session and set the session value to none
|
|
318
303
|
|
|
319
304
|
async def close(self):
|
|
320
305
|
if self._owns_session and self._session:
|