terrakio-core 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of terrakio-core might be problematic. Click here for more details.

terrakio_core/__init__.py CHANGED
@@ -5,7 +5,7 @@ Terrakio Core
5
5
  Core components for Terrakio API clients.
6
6
  """
7
7
 
8
- __version__ = "0.3.9"
8
+ __version__ = "0.4.0"
9
9
 
10
10
  from .async_client import AsyncClient
11
11
  from .sync_client import SyncClient as Client
@@ -182,6 +182,11 @@ class AsyncClient(BaseClient):
182
182
  out_crs: str = "epsg:4326",
183
183
  resolution: int = -1,
184
184
  geom_fix: bool = False,
185
+ drop_nan: bool = False,
186
+ spatial_reduction: str = None,
187
+ temporal_reduction: str = None,
188
+ max_memory_mb: int = 500,
189
+ stream_to_disk: bool = False,
185
190
  ):
186
191
  """
187
192
  Compute zonal statistics for all geometries in a GeoDataFrame.
@@ -195,11 +200,20 @@ class AsyncClient(BaseClient):
195
200
  out_crs (str): Output coordinate reference system
196
201
  resolution (int): Resolution parameter
197
202
  geom_fix (bool): Whether to fix the geometry (default False)
203
+ drop_nan (bool): Whether to drop NaN values from the results (default False)
204
+ spatial_reduction (str): Reduction operation for spatial dimensions (x, y).
205
+ Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
206
+ temporal_reduction (str): Reduction operation for temporal dimension (time).
207
+ Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
208
+ max_memory_mb (int): Maximum memory threshold in MB (default 500MB)
209
+ stream_to_disk (bool): Whether to stream datasets to disk as NetCDF files (default False)
210
+
198
211
  Returns:
199
212
  geopandas.GeoDataFrame: GeoDataFrame with added columns for results, or None if inplace=True
213
+ If stream_to_disk=True, large datasets are saved as NetCDF files with file paths stored.
200
214
 
201
215
  Raises:
202
- ValueError: If concurrency is too high
216
+ ValueError: If concurrency is too high or if data exceeds memory limit without streaming
203
217
  APIError: If the API request fails
204
218
  """
205
219
  return await _zonal_stats(
@@ -211,7 +225,12 @@ class AsyncClient(BaseClient):
211
225
  in_crs=in_crs,
212
226
  out_crs=out_crs,
213
227
  resolution=resolution,
214
- geom_fix=geom_fix
228
+ geom_fix=geom_fix,
229
+ drop_nan=drop_nan,
230
+ spatial_reduction=spatial_reduction,
231
+ temporal_reduction=temporal_reduction,
232
+ max_memory_mb=max_memory_mb,
233
+ stream_to_disk=stream_to_disk
215
234
  )
216
235
 
217
236
  async def create_dataset_file(
terrakio_core/client.py CHANGED
@@ -1,11 +1,14 @@
1
1
  from typing import Optional
2
2
  import logging
3
+ import warnings
3
4
  from terrakio_core.config import read_config_file, DEFAULT_CONFIG_FILE
4
5
  from abc import abstractmethod
6
+ import xarray as xr
7
+
5
8
 
6
9
  class BaseClient():
7
10
  def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None, verbose: bool = False):
8
-
11
+
9
12
  self.verbose = verbose
10
13
  self.logger = logging.getLogger("terrakio")
11
14
  if verbose:
@@ -21,17 +24,110 @@ class BaseClient():
21
24
  self.url = url
22
25
  self.key = api_key
23
26
 
24
- config = read_config_file( DEFAULT_CONFIG_FILE, logger = self.logger)
25
-
27
+ config = read_config_file(DEFAULT_CONFIG_FILE, logger=self.logger)
28
+
26
29
  if self.url is None:
27
30
  self.url = config.get('url')
28
-
31
+
29
32
  if self.key is None:
30
33
  self.key = config.get('key')
31
34
 
32
35
  self.token = config.get('token')
33
-
34
36
 
37
+ # Apply xarray printing fix to prevent crashes with GeoDataFrames
38
+ self._apply_xarray_fix()
39
+
40
+ def _apply_xarray_fix(self):
41
+ """
42
+ Apply xarray printing fix to prevent crashes when GeoDataFrames contain xarray objects.
43
+ This fix is applied automatically when the client is initialized.
44
+ """
45
+ try:
46
+
47
+ # Check if fix is already applied globally
48
+ if hasattr(xr.DataArray, '_terrakio_fix_applied'):
49
+ if self.verbose:
50
+ self.logger.info("xarray printing fix already applied")
51
+ return
52
+
53
+ # Store original methods for potential restoration
54
+ if not hasattr(xr.DataArray, '_original_iter'):
55
+ xr.DataArray._original_iter = xr.DataArray.__iter__
56
+ xr.Dataset._original_iter = xr.Dataset.__iter__
57
+
58
+ # Define safe iteration methods that prevent pandas from iterating
59
+ # but leave __repr__ and __str__ untouched for normal xarray printing
60
+ def safe_dataarray_iter(self):
61
+ # Return infinite iterator that always yields the same safe value
62
+ name = getattr(self, 'name', None) or 'unnamed'
63
+ shape_str = 'x'.join(map(str, self.shape)) if hasattr(self, 'shape') else 'unknown'
64
+ placeholder = f"<DataArray '{name}' {shape_str}>"
65
+ while True:
66
+ yield placeholder
67
+
68
+ def safe_dataset_iter(self):
69
+ # Return infinite iterator that always yields the same safe value
70
+ num_vars = len(self.data_vars) if hasattr(self, 'data_vars') else 0
71
+ num_dims = len(self.dims) if hasattr(self, 'dims') else 0
72
+ placeholder = f"<Dataset: {num_vars} vars, {num_dims} dims>"
73
+ while True:
74
+ yield placeholder
75
+
76
+ # Apply only the iteration fix - leave __repr__ and __str__ untouched
77
+ xr.DataArray.__iter__ = safe_dataarray_iter
78
+ xr.Dataset.__iter__ = safe_dataset_iter
79
+
80
+ # Mark as applied to avoid duplicate applications
81
+ xr.DataArray._terrakio_fix_applied = True
82
+ xr.Dataset._terrakio_fix_applied = True
83
+
84
+ if self.verbose:
85
+ self.logger.info("xarray iteration fix applied - GeoDataFrames with xarray objects will print safely, direct xarray printing unchanged")
86
+
87
+ except ImportError:
88
+ # xarray not installed, skip the fix
89
+ if self.verbose:
90
+ self.logger.info("xarray not installed, skipping printing fix")
91
+ except Exception as e:
92
+ # Log warning but don't fail initialization
93
+ warning_msg = f"Failed to apply xarray printing fix: {e}"
94
+ warnings.warn(warning_msg)
95
+ if self.verbose:
96
+ self.logger.warning(warning_msg)
97
+
98
+ def restore_xarray_printing(self):
99
+ """
100
+ Restore original xarray printing behavior.
101
+ Call this method if you want to see full xarray representations again.
102
+ """
103
+ try:
104
+ import xarray as xr
105
+
106
+ if hasattr(xr.DataArray, '_original_iter'):
107
+ xr.DataArray.__iter__ = xr.DataArray._original_iter
108
+ xr.Dataset.__iter__ = xr.Dataset._original_iter
109
+
110
+ # Remove the fix markers
111
+ if hasattr(xr.DataArray, '_terrakio_fix_applied'):
112
+ delattr(xr.DataArray, '_terrakio_fix_applied')
113
+ if hasattr(xr.Dataset, '_terrakio_fix_applied'):
114
+ delattr(xr.Dataset, '_terrakio_fix_applied')
115
+
116
+ if self.verbose:
117
+ self.logger.info("Original xarray iteration behavior restored")
118
+ else:
119
+ if self.verbose:
120
+ self.logger.info("No xarray fix to restore")
121
+
122
+ except ImportError:
123
+ if self.verbose:
124
+ self.logger.info("xarray not available")
125
+ except Exception as e:
126
+ warning_msg = f"Failed to restore xarray printing: {e}"
127
+ warnings.warn(warning_msg)
128
+ if self.verbose:
129
+ self.logger.warning(warning_msg)
130
+
35
131
  @abstractmethod
36
132
  def _setup_session(self):
37
133
  """Initialize the HTTP session - implemented by sync/async clients"""
@@ -11,37 +11,39 @@ from ..helper.tiles import tiles
11
11
  import uuid
12
12
  import xarray as xr
13
13
 
14
-
15
- async def zonal_stats(
14
+ async def request_data(
16
15
  client,
17
16
  gdf: GeoDataFrame,
18
17
  expr: str,
19
18
  conc: int = 20,
20
- inplace: bool = False,
21
19
  in_crs: str = "epsg:4326",
22
20
  out_crs: str = "epsg:4326",
23
21
  resolution: int = -1,
24
22
  geom_fix: bool = False,
23
+ max_memory_mb: int = 500,
24
+ stream_to_disk: bool = None,
25
25
  ):
26
26
  """
27
- Compute zonal statistics for all geometries in a GeoDataFrame.
27
+ Request xarray datasets for all geometries in a GeoDataFrame.
28
28
 
29
29
  Args:
30
30
  client: The AsyncClient instance
31
31
  gdf (GeoDataFrame): GeoDataFrame containing geometries
32
32
  expr (str): Terrakio expression to evaluate, can include spatial aggregations
33
33
  conc (int): Number of concurrent requests to make
34
- inplace (bool): Whether to modify the input GeoDataFrame in place
35
34
  in_crs (str): Input coordinate reference system
36
35
  out_crs (str): Output coordinate reference system
37
36
  resolution (int): Resolution parameter
38
37
  geom_fix (bool): Whether to fix the geometry (default False)
38
+ max_memory_mb (int): Maximum memory threshold in MB (default 500MB)
39
+ stream_to_disk (bool): Whether to stream large datasets to disk. If None, will be determined automatically.
40
+
39
41
  Returns:
40
- geopandas.GeoDataFrame: GeoDataFrame with variable dataarrays in separate columns.
41
- Each row represents one geometry with full time-dimensional dataarrays.
42
+ geopandas.GeoDataFrame: Copy of input GeoDataFrame with additional 'dataset' column
43
+ containing the xarray Dataset for each geometry.
42
44
 
43
45
  Raises:
44
- ValueError: If concurrency is too high
46
+ ValueError: If concurrency is too high or if data exceeds memory limit without streaming
45
47
  APIError: If the API request fails
46
48
  """
47
49
  if conc > 100:
@@ -49,13 +51,54 @@ async def zonal_stats(
49
51
 
50
52
  total_geometries = len(gdf)
51
53
 
54
+ # First, make a request with the first geometry to estimate total memory usage
55
+ client.logger.info("Estimating total memory usage...")
56
+ first_geom = gdf.geometry.iloc[0]
57
+ feature = {
58
+ "type": "Feature",
59
+ "geometry": mapping(first_geom),
60
+ "properties": {}
61
+ }
62
+
63
+ try:
64
+ first_result = await client.geoquery(expr=expr, feature=feature,
65
+ in_crs=in_crs, out_crs=out_crs, resolution=resolution, geom_fix=geom_fix)
66
+ if isinstance(first_result, dict) and first_result.get("error"):
67
+ error_msg = f"Request failed: {first_result.get('error_message', 'Unknown error')}"
68
+ if first_result.get('status_code'):
69
+ error_msg = f"Request failed with status {first_result['status_code']}: {first_result.get('error_message', 'Unknown error')}"
70
+ raise APIError(error_msg)
71
+
72
+ if not isinstance(first_result, xr.Dataset):
73
+ raise ValueError(f"Expected xarray Dataset, got {type(first_result)}")
74
+
75
+ # Estimate total memory usage
76
+ single_dataset_size_bytes = estimate_dataset_size(first_result)
77
+ total_size_bytes = single_dataset_size_bytes * total_geometries
78
+ total_size_mb = total_size_bytes / (1024 * 1024)
79
+
80
+ client.logger.info(f"Estimated total memory usage: {total_size_mb:.2f} MB for {total_geometries} geometries")
81
+
82
+ # Check if we need to stream to disk
83
+ if stream_to_disk is None:
84
+ # Auto-determine based on memory usage
85
+ if total_size_mb > max_memory_mb:
86
+ client.logger.warning(f"The data you are requesting exceeds {max_memory_mb} MB, we recommend you to set the stream_to_disk parameter to True")
87
+ raise ValueError(f"The data you are requesting exceeds {max_memory_mb} MB, we recommend you to set the stream_to_disk parameter to True")
88
+
89
+ except Exception as e:
90
+ if "recommend you to set the stream_to_disk parameter to True" in str(e):
91
+ raise
92
+ client.logger.error(f"Failed to estimate memory usage: {e}")
93
+ raise
94
+
52
95
  client.logger.info(f"Processing {total_geometries} geometries with concurrency {conc}")
53
96
 
54
97
  completed_count = 0
55
98
  lock = asyncio.Lock()
56
99
 
57
100
  async def process_geometry(geom):
58
- """Process a single geometry"""
101
+ """Process a single geometry and return the dataset"""
59
102
  nonlocal completed_count
60
103
 
61
104
  try:
@@ -103,53 +146,261 @@ async def zonal_stats(
103
146
  raise APIError(f"API request failed: {e.response.text}")
104
147
  raise
105
148
 
106
- client.logger.info("All requests completed! Processing results...")
149
+ client.logger.info("All requests completed!")
107
150
 
108
-
109
151
  if not all_results:
110
152
  raise ValueError("No valid results were returned for any geometry")
111
153
 
154
+ # Create a copy of the input GeoDataFrame
155
+ result_gdf = gdf.copy()
156
+
157
+ # Add the dataset column with the xarray datasets
158
+ result_gdf['dataset'] = all_results
159
+
160
+ return result_gdf
161
+
162
+
163
+ import os
164
+ from pathlib import Path
165
+
166
+ def estimate_dataset_size(dataset):
167
+ """
168
+ Estimate the memory size of an xarray dataset in bytes.
169
+
170
+ Args:
171
+ dataset: xarray Dataset
172
+
173
+ Returns:
174
+ int: Estimated size in bytes
175
+ """
176
+ total_size = 0
177
+ for var_name, var in dataset.data_vars.items():
178
+ # Get the dtype size in bytes
179
+ dtype_size = var.dtype.itemsize
180
+ # Get the total number of elements
181
+ total_elements = var.size
182
+ # Calculate total size for this variable
183
+ total_size += dtype_size * total_elements
184
+
185
+ # Add coordinate sizes
186
+ for coord_name, coord in dataset.coords.items():
187
+ if coord_name not in dataset.dims: # Don't double count dimension coordinates
188
+ dtype_size = coord.dtype.itemsize
189
+ total_elements = coord.size
190
+ total_size += dtype_size * total_elements
191
+
192
+ return total_size
112
193
 
194
+ def save_dataset_to_file(dataset, filepath):
195
+ """
196
+ Save dataset to NetCDF file.
197
+
198
+ Args:
199
+ dataset: xarray Dataset
200
+ filepath: Path to save the file
201
+
202
+ Returns:
203
+ str: Path to saved file
204
+ """
205
+ filepath = Path(filepath)
206
+
207
+ if not str(filepath).endswith('.nc'):
208
+ filepath = filepath.with_suffix('.nc')
209
+
210
+ dataset.to_netcdf(filepath)
211
+ return str(filepath)
212
+
213
+ def post_processing(
214
+ gdf_with_datasets: GeoDataFrame,
215
+ spatial_reduction: str = None,
216
+ temporal_reduction: str = None,
217
+ drop_nan: bool = False,
218
+ inplace: bool = False,
219
+ stream_to_disk: bool = False,
220
+ ):
221
+ """
222
+ Post-process the GeoDataFrame with datasets to extract variables with optional reductions.
223
+
224
+ Args:
225
+ gdf_with_datasets (GeoDataFrame): GeoDataFrame with 'dataset' column containing xarray Datasets
226
+ spatial_reduction (str): Reduction operation for spatial dimensions (x, y).
227
+ Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
228
+ temporal_reduction (str): Reduction operation for temporal dimension (time).
229
+ Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
230
+ drop_nan (bool): Whether to drop NaN values from the results (default False)
231
+ inplace (bool): Whether to modify the input GeoDataFrame in place
232
+ stream_to_disk (bool): Whether to stream datasets to disk as NetCDF files (default False)
233
+
234
+ Returns:
235
+ geopandas.GeoDataFrame: GeoDataFrame with variable dataarrays/values or file paths in separate columns.
236
+ If stream_to_disk=True, large datasets are saved as NetCDF files with file paths stored.
237
+ """
238
+ if 'dataset' not in gdf_with_datasets.columns:
239
+ raise ValueError("Input GeoDataFrame must contain a 'dataset' column")
240
+
241
+ # Validate reduction parameters
242
+ valid_reductions = ['mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count']
243
+ if spatial_reduction and spatial_reduction not in valid_reductions:
244
+ raise ValueError(f"spatial_reduction must be one of {valid_reductions}")
245
+ if temporal_reduction and temporal_reduction not in valid_reductions:
246
+ raise ValueError(f"temporal_reduction must be one of {valid_reductions}")
247
+
113
248
  result_rows = []
114
249
  geometries = []
115
250
 
116
- # Results should be in the same order as input geometries
117
- for i, dataset in enumerate(all_results):
118
- # Get the original geometry by index
119
- original_geom = gdf.geometry.iloc[i]
120
- # Create single row for this geometry
251
+ # Process each row (geometry + dataset)
252
+ for i, row in gdf_with_datasets.iterrows():
253
+ dataset = row['dataset']
254
+
255
+ # Create new row for this geometry
121
256
  new_row = {}
122
-
123
- # Copy original GeoDataFrame attributes
124
- for col in gdf.columns:
125
- if col != 'geometry':
126
- new_row[col] = gdf.iloc[i][col]
127
- # Store the full dataarray for each variable (with time dimension intact)
257
+
258
+ # Copy original GeoDataFrame attributes (excluding dataset column)
259
+ for col in gdf_with_datasets.columns:
260
+ if col not in ['geometry', 'dataset']:
261
+ new_row[col] = row[col]
262
+
263
+ # Process each variable in the dataset
128
264
  data_vars = list(dataset.data_vars.keys())
129
265
  for var_name in data_vars:
130
266
  var_data = dataset[var_name]
131
- new_row[f'{var_name}_dataarray'] = var_data
267
+
268
+ # Apply drop_nan if requested
269
+ if drop_nan:
270
+ # Drop spatial dimensions where all values are NaN
271
+ var_data = var_data.dropna(dim='x', how='all').dropna(dim='y', how='all')
272
+
273
+ # Drop time dimensions where all values are NaN
274
+ if 'time' in var_data.dims:
275
+ var_data = var_data.dropna(dim='time', how='all')
276
+
277
+ # Check current dimensions to determine if aggregation is needed
278
+ current_dims = set(var_data.dims)
279
+ has_spatial_dims = bool(current_dims.intersection(['x', 'y']))
280
+ has_temporal_dim = 'time' in current_dims
281
+
282
+ # Apply spatial reduction only if spatial dimensions exist and reduction is requested
283
+ if spatial_reduction and has_spatial_dims:
284
+ spatial_dims = [dim for dim in ['x', 'y'] if dim in var_data.dims]
285
+ if spatial_dims:
286
+ if spatial_reduction == 'count':
287
+ var_data = var_data.count(dim=spatial_dims)
288
+ else:
289
+ var_data = getattr(var_data, spatial_reduction)(dim=spatial_dims)
290
+
291
+ # Apply temporal reduction only if time dimension exists and reduction is requested
292
+ if temporal_reduction and has_temporal_dim:
293
+ if temporal_reduction == 'count':
294
+ var_data = var_data.count(dim='time')
295
+ else:
296
+ var_data = getattr(var_data, temporal_reduction)(dim='time')
297
+
298
+ # Handle streaming to disk if requested
299
+ if stream_to_disk:
300
+ # Create a single-variable dataset for saving
301
+ single_var_dataset = var_data.to_dataset(name=var_name)
302
+
303
+ # Generate filename based on row index and variable name
304
+ filename = f"geometry_{i}_{var_name}.nc"
305
+ filepath = os.path.join(os.getcwd(), filename)
306
+
307
+ # Save to disk and store file path
308
+ saved_path = save_dataset_to_file(single_var_dataset, filepath)
309
+ new_row[var_name] = f"file://{saved_path}"
310
+
311
+ print(f"Dataset for geometry {i}, variable '{var_name}' saved to: {saved_path}")
312
+ else:
313
+ # Keep in memory
314
+ new_row[var_name] = var_data
315
+
132
316
  result_rows.append(new_row)
133
- geometries.append(original_geom)
317
+ geometries.append(row['geometry'])
318
+
134
319
  # Create the result GeoDataFrame with default integer index
135
320
  result_gdf = GeoDataFrame(result_rows, geometry=geometries)
321
+
136
322
  if inplace:
137
323
  # Clear original gdf and replace with result_gdf content
138
- gdf.drop(gdf.index, inplace=True)
139
- gdf.drop(gdf.columns, axis=1, inplace=True)
324
+ gdf_with_datasets.drop(gdf_with_datasets.index, inplace=True)
325
+ gdf_with_datasets.drop(gdf_with_datasets.columns, axis=1, inplace=True)
140
326
 
141
327
  # Copy all data from result_gdf to gdf
142
328
  for col in result_gdf.columns:
143
- gdf[col] = result_gdf[col].values
329
+ gdf_with_datasets[col] = result_gdf[col].values
144
330
 
145
331
  # Ensure it remains a GeoDataFrame with correct geometry
146
- gdf.geometry = result_gdf.geometry
332
+ gdf_with_datasets.geometry = result_gdf.geometry
147
333
 
148
334
  return None
149
335
  else:
150
-
151
336
  return result_gdf
152
337
 
338
+
339
+ # Updated zonal_stats function that uses both parts
340
+ async def zonal_stats(
341
+ client,
342
+ gdf: GeoDataFrame,
343
+ expr: str,
344
+ conc: int = 20,
345
+ inplace: bool = False,
346
+ in_crs: str = "epsg:4326",
347
+ out_crs: str = "epsg:4326",
348
+ resolution: int = -1,
349
+ geom_fix: bool = False,
350
+ drop_nan: bool = False,
351
+ spatial_reduction: str = None,
352
+ temporal_reduction: str = None,
353
+ max_memory_mb: int = 500,
354
+ stream_to_disk: bool = False,
355
+ ):
356
+ """
357
+ Compute zonal statistics for all geometries in a GeoDataFrame.
358
+ This is a convenience function that combines request_data and post_processing.
359
+
360
+ Args:
361
+ client: The AsyncClient instance
362
+ gdf (GeoDataFrame): GeoDataFrame containing geometries
363
+ expr (str): Terrakio expression to evaluate, can include spatial aggregations
364
+ conc (int): Number of concurrent requests to make
365
+ inplace (bool): Whether to modify the input GeoDataFrame in place
366
+ in_crs (str): Input coordinate reference system
367
+ out_crs (str): Output coordinate reference system
368
+ resolution (int): Resolution parameter
369
+ geom_fix (bool): Whether to fix the geometry (default False)
370
+ drop_nan (bool): Whether to drop NaN values from the results (default False)
371
+ spatial_reduction (str): Reduction operation for spatial dimensions (x, y).
372
+ Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
373
+ temporal_reduction (str): Reduction operation for temporal dimension (time).
374
+ Options: 'mean', 'median', 'min', 'max', 'std', 'var', 'sum', 'count'
375
+ max_memory_mb (int): Maximum memory threshold in MB (default 500MB)
376
+ stream_to_disk (bool): Whether to stream datasets to disk as NetCDF files (default False)
377
+ """
378
+ # Step 1: Request data (with memory estimation)
379
+ gdf_with_datasets = await request_data(
380
+ client=client,
381
+ gdf=gdf,
382
+ expr=expr,
383
+ conc=conc,
384
+ in_crs=in_crs,
385
+ out_crs=out_crs,
386
+ resolution=resolution,
387
+ geom_fix=geom_fix,
388
+ max_memory_mb=max_memory_mb,
389
+ stream_to_disk=stream_to_disk
390
+ )
391
+
392
+ # Step 2: Post-process with reductions and optional streaming
393
+ result = post_processing(
394
+ gdf_with_datasets=gdf_with_datasets,
395
+ spatial_reduction=spatial_reduction,
396
+ temporal_reduction=temporal_reduction,
397
+ drop_nan=drop_nan,
398
+ inplace=inplace,
399
+ stream_to_disk=stream_to_disk
400
+ )
401
+
402
+ return result
403
+
153
404
  async def create_dataset_file(
154
405
  client,
155
406
  aoi: str,