terrakio-core 0.4.96__tar.gz → 0.4.98__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/PKG-INFO +1 -1
  2. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/pyproject.toml +1 -1
  3. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/__init__.py +1 -1
  4. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/zonal_stats.py +114 -4
  5. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/dataset_management.py +12 -4
  6. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/.gitignore +0 -0
  7. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/README.md +0 -0
  8. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/accessors.py +0 -0
  9. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/async_client.py +0 -0
  10. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/client.py +0 -0
  11. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/config.py +0 -0
  12. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/create_dataset_file.py +0 -0
  13. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/convenience_functions/geoquries.py +0 -0
  14. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/auth.py +0 -0
  15. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/group_management.py +0 -0
  16. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/mass_stats.py +0 -0
  17. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/model_management.py +0 -0
  18. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/space_management.py +0 -0
  19. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/endpoints/user_management.py +0 -0
  20. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/exceptions.py +0 -0
  21. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/helper/bounded_taskgroup.py +0 -0
  22. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/helper/decorators.py +0 -0
  23. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/helper/tiles.py +0 -0
  24. {terrakio_core-0.4.96 → terrakio_core-0.4.98}/terrakio_core/sync_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: terrakio-core
3
- Version: 0.4.96
3
+ Version: 0.4.98
4
4
  Summary: Core package for the terrakio-python-api
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiofiles>=24.1.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "terrakio-core"
3
- version = "0.4.96"
3
+ version = "0.4.98"
4
4
  description = "Core package for the terrakio-python-api"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -5,7 +5,7 @@ Terrakio Core
5
5
  Core components for Terrakio API clients.
6
6
  """
7
7
 
8
- __version__ = "0.4.96"
8
+ __version__ = "0.4.98"
9
9
 
10
10
  from .async_client import AsyncClient
11
11
  from .sync_client import SyncClient as Client
@@ -9,18 +9,18 @@ from typing import Optional
9
9
  # Third-party library imports
10
10
  import aiohttp
11
11
  import geopandas as gpd
12
- import nest_asyncio
13
12
  import pandas as pd
14
13
  import pyproj
15
14
  import xarray as xr
16
15
  from geopandas import GeoDataFrame
17
16
  from shapely.geometry import box, mapping, shape
18
17
  from shapely.ops import transform
18
+ import threading
19
+ from concurrent.futures import ThreadPoolExecutor
19
20
 
20
21
  # Local imports
21
22
  from .geoquries import request_geoquery_list
22
23
 
23
- nest_asyncio.apply()
24
24
  class cloud_object(gpd.GeoDataFrame):
25
25
  """
26
26
  This class is a class used for cloud
@@ -36,12 +36,51 @@ class cloud_object(gpd.GeoDataFrame):
36
36
  self.client = client
37
37
  self.job_name = job_name
38
38
 
39
+ def __repr__(self):
40
+ return (
41
+ f"<CloudZonalStats job_id='{self.job_id}', job_name='{self.job_name}'>\n"
42
+ f"Call .head(n) to fetch a preview GeoDataFrame when the job completes."
43
+ )
44
+
45
+ def _repr_html_(self):
46
+ # Jupyter HTML-friendly representation to avoid auto-rendering an empty DataFrame
47
+ return (
48
+ f"<div style='font-family:system-ui,Segoe UI,Helvetica,Arial,sans-serif'>"
49
+ f"<strong>Cloud Zonal Stats</strong><br/>"
50
+ f"job_id: <code>{self.job_id}</code><br/>"
51
+ f"job_name: <code>{self.job_name}</code><br/>"
52
+ f"<em>Use <code>.head(n)</code> to retrieve a preview once the job is completed.</em>"
53
+ f"</div>"
54
+ )
55
+
39
56
  def head(self, n = 5):
40
57
  """
41
58
  Returns the first n files stored in the cloud bucket.
42
59
  """
43
- return asyncio.run(self._head_async(n))
60
+ # Detect if we're inside an existing event loop (e.g., Jupyter)
61
+ in_running_loop = False
62
+ try:
63
+ asyncio.get_running_loop()
64
+ in_running_loop = True
65
+ except RuntimeError:
66
+ in_running_loop = False
44
67
 
68
+ if in_running_loop:
69
+ # Run the async function in a separate thread with its own loop
70
+ def run_async_in_thread():
71
+ new_loop = asyncio.new_event_loop()
72
+ try:
73
+ return new_loop.run_until_complete(self._head_async(n))
74
+ finally:
75
+ new_loop.close()
76
+
77
+ with ThreadPoolExecutor(max_workers=1) as executor:
78
+ future = executor.submit(run_async_in_thread)
79
+ return future.result()
80
+ else:
81
+ # No running loop - safe to use asyncio.run
82
+ return asyncio.run(self._head_async(n))
83
+
45
84
  async def _head_async(self, n = 5):
46
85
  """
47
86
  Returns the first n files stored in the cloud bucket.
@@ -120,12 +159,54 @@ class cloud_object(gpd.GeoDataFrame):
120
159
  })
121
160
 
122
161
  self.client.logger.info(f"Created GeoDataFrame with {len(gdf)} rows")
162
+
163
+ # Derive id values from json metadata (prefer 'file', fallback to 'group')
164
+ id_values = []
165
+ for i in range(min_length):
166
+ entry = json_data[i] if i < len(json_data) else {}
167
+ id_candidate = entry.get('file') or entry.get('group') or ''
168
+ if isinstance(id_candidate, str) and id_candidate.startswith('file_'):
169
+ id_val = id_candidate[len('file_'):]
170
+ elif isinstance(id_candidate, str) and id_candidate.startswith('group_'):
171
+ id_val = id_candidate[len('group_'):]
172
+ else:
173
+ id_val = str(id_candidate) if id_candidate else str(i)
174
+ id_values.append(id_val)
175
+
176
+ # Geometry to id mapping using WKB to avoid precision issues
177
+ geom_to_id = {geometries[i].wkb: id_values[i] for i in range(min_length)}
178
+
123
179
  try:
124
180
  expanded_gdf = expand_on_variables_and_time(gdf)
181
+
182
+ # Attach id as first index level, geometry second, time third if present
183
+ if hasattr(expanded_gdf.index, 'names') and 'geometry' in expanded_gdf.index.names:
184
+ if isinstance(expanded_gdf.index, pd.MultiIndex):
185
+ geometry_index = expanded_gdf.index.get_level_values('geometry')
186
+ else:
187
+ geometry_index = expanded_gdf.index
188
+ id_col = [geom_to_id.get(geom.wkb) for geom in geometry_index]
189
+ expanded_gdf['id'] = id_col
190
+ expanded_gdf = expanded_gdf.reset_index()
191
+ if 'time' in expanded_gdf.columns:
192
+ expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
193
+ else:
194
+ expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
195
+ else:
196
+ # geometry exists as a column
197
+ id_col = [geom_to_id.get(geom.wkb) for geom in expanded_gdf['geometry']]
198
+ expanded_gdf['id'] = id_col
199
+ if 'time' in expanded_gdf.columns:
200
+ expanded_gdf = expanded_gdf.set_index(['id', 'geometry', 'time'])
201
+ else:
202
+ expanded_gdf = expanded_gdf.set_index(['id', 'geometry'])
203
+
125
204
  return expanded_gdf
126
205
  except NameError:
127
206
  self.client.logger.warning("expand_on_variables_and_time function not found, returning raw GeoDataFrame")
128
- return gdf
207
+ # Set id on raw gdf and index appropriately
208
+ gdf['id'] = id_values
209
+ return gdf.set_index(['id', 'geometry'])
129
210
 
130
211
  else:
131
212
  self.client.logger.warning(f"Failed to download geometry data: HTTP {response.status}")
@@ -513,6 +594,7 @@ async def zonal_stats(
513
594
  job_name = await client.mass_stats.track_job([mass_stats_id])
514
595
  job_name = job_name[mass_stats_id]["name"]
515
596
  cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
597
+
516
598
  return cloud_files_object
517
599
 
518
600
  quries = []
@@ -539,5 +621,33 @@ async def zonal_stats(
539
621
  "is_cloud_backed": False,
540
622
  }
541
623
  gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
624
+
625
+ # If an id_column is provided, attach it to the result and include in the index
626
+ if id_column is not None and id_column in gdf.columns:
627
+ # Build a mapping from input geometries to id values (use WKB for robust equality)
628
+ geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
629
+
630
+ # Determine geometry values in the result (index may be geometry or (geometry, time))
631
+ if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
632
+ if isinstance(gdf_with_datasets.index, pd.MultiIndex):
633
+ geometry_index = gdf_with_datasets.index.get_level_values('geometry')
634
+ else:
635
+ geometry_index = gdf_with_datasets.index
636
+ id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
637
+ gdf_with_datasets[id_column] = id_values
638
+ # Reset index to control index composition precisely, then set to desired levels
639
+ gdf_with_datasets = gdf_with_datasets.reset_index()
640
+ if 'time' in gdf_with_datasets.columns:
641
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
642
+ else:
643
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
644
+ else:
645
+ # geometry exists as a column
646
+ id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
647
+ gdf_with_datasets[id_column] = id_values
648
+ if 'time' in gdf_with_datasets.columns:
649
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
650
+ else:
651
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
542
652
  return gdf_with_datasets
543
653
 
@@ -69,7 +69,7 @@ class DatasetManagement:
69
69
  name: Name of the dataset (required)
70
70
  collection: Dataset collection (default: 'terrakio-datasets')
71
71
  products: List of products
72
- dates_iso8601: List of dates
72
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
73
73
  bucket: Storage bucket
74
74
  path: Storage path
75
75
  data_type: Data type
@@ -142,7 +142,7 @@ class DatasetManagement:
142
142
  append: Whether to append data or replace (default: True)
143
143
  collection: Dataset collection (default: 'terrakio-datasets')
144
144
  products: List of products
145
- dates_iso8601: List of dates
145
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
146
146
  bucket: Storage bucket
147
147
  path: Storage path
148
148
  data_type: Data type
@@ -162,6 +162,10 @@ class DatasetManagement:
162
162
  Raises:
163
163
  APIError: If the API request fails
164
164
  """
165
+ # Sort dates_iso8601 chronologically if provided
166
+ if dates_iso8601 is not None:
167
+ dates_iso8601 = sorted(dates_iso8601)
168
+
165
169
  params = {"collection": collection, "append": str(append).lower()}
166
170
  payload = {"name": name}
167
171
  param_mapping = {
@@ -215,7 +219,7 @@ class DatasetManagement:
215
219
  append: Whether to append data or replace (default: True)
216
220
  collection: Dataset collection (default: 'terrakio-datasets')
217
221
  products: List of products
218
- dates_iso8601: List of dates
222
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
219
223
  bucket: Storage bucket
220
224
  path: Storage path
221
225
  data_type: Data type
@@ -236,6 +240,10 @@ class DatasetManagement:
236
240
  Raises:
237
241
  APIError: If the API request fails
238
242
  """
243
+ # Sort dates_iso8601 chronologically if provided
244
+ if dates_iso8601 is not None:
245
+ dates_iso8601 = sorted(dates_iso8601)
246
+
239
247
  params = {"collection": collection, "append": str(append).lower()}
240
248
  payload = {"name": name}
241
249
  param_mapping = {
@@ -289,7 +297,7 @@ class DatasetManagement:
289
297
  name: Name of the dataset (required)
290
298
  collection: Dataset collection (default: 'terrakio-datasets')
291
299
  products: List of products
292
- dates_iso8601: List of dates
300
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
293
301
  bucket: Storage bucket
294
302
  path: Storage path
295
303
  data_type: Data type
File without changes