terrakio-core 0.4.95__tar.gz → 0.4.97__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/PKG-INFO +1 -1
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/pyproject.toml +1 -1
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/__init__.py +1 -1
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/accessors.py +98 -16
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/convenience_functions/geoquries.py +1 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/convenience_functions/zonal_stats.py +74 -2
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/dataset_management.py +12 -4
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/.gitignore +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/README.md +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/async_client.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/client.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/config.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/convenience_functions/create_dataset_file.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/auth.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/group_management.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/mass_stats.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/model_management.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/space_management.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/endpoints/user_management.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/exceptions.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/helper/bounded_taskgroup.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/helper/decorators.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/helper/tiles.py +0 -0
- {terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/sync_client.py +0 -0
|
@@ -673,9 +673,61 @@ class GeoXarrayAccessor:
|
|
|
673
673
|
result_gdf[col] = new_data
|
|
674
674
|
|
|
675
675
|
return result_gdf
|
|
676
|
-
|
|
676
|
+
|
|
677
677
|
def _apply_cloud_reduction(self, reduction_func: str, dim: Optional[Union[str, List[str]]] = None,
|
|
678
|
-
|
|
678
|
+
columns: Optional[List[str]] = None, **kwargs):
|
|
679
|
+
|
|
680
|
+
if hasattr(self._obj, 'job_id') and self._obj.job_id and self._client:
|
|
681
|
+
import asyncio
|
|
682
|
+
import concurrent.futures
|
|
683
|
+
|
|
684
|
+
def check_job_status():
|
|
685
|
+
loop = asyncio.new_event_loop()
|
|
686
|
+
asyncio.set_event_loop(loop)
|
|
687
|
+
try:
|
|
688
|
+
return loop.run_until_complete(
|
|
689
|
+
self._client.mass_stats.track_job([self._obj.job_id])
|
|
690
|
+
)
|
|
691
|
+
finally:
|
|
692
|
+
loop.close()
|
|
693
|
+
|
|
694
|
+
try:
|
|
695
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
696
|
+
future = executor.submit(check_job_status)
|
|
697
|
+
track_info = future.result(timeout=10) # Short timeout for status check
|
|
698
|
+
|
|
699
|
+
job_info = track_info[self._obj.job_id]
|
|
700
|
+
status = job_info['status']
|
|
701
|
+
|
|
702
|
+
if status in ["Failed", "Cancelled", "Error"]:
|
|
703
|
+
raise RuntimeError(f"The zonal stats job (job_id: {self._obj.job_id}) has failed, cancelled, or errored. Please check the job status!")
|
|
704
|
+
|
|
705
|
+
elif status != "Completed":
|
|
706
|
+
# Job is still running - include progress information
|
|
707
|
+
completed = job_info.get('completed', 0)
|
|
708
|
+
total = job_info.get('total', 1)
|
|
709
|
+
progress = completed / total if total > 0 else 0
|
|
710
|
+
percentage = progress * 100
|
|
711
|
+
|
|
712
|
+
# Create progress bar
|
|
713
|
+
bar_length = 30 # Shorter bar for error message
|
|
714
|
+
filled_length = int(bar_length * progress)
|
|
715
|
+
bar = '█' * filled_length + '░' * (bar_length - filled_length)
|
|
716
|
+
|
|
717
|
+
raise RuntimeError(
|
|
718
|
+
f"The zonal stats job (job_id: {self._obj.job_id}) is still running. "
|
|
719
|
+
f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
|
|
720
|
+
f"Please come back at a later time!"
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
except concurrent.futures.TimeoutError:
|
|
724
|
+
self._client.logger.warning("Timeout checking job status, proceeding with reduction")
|
|
725
|
+
except Exception as e:
|
|
726
|
+
if "still running" in str(e) or "failed" in str(e).lower():
|
|
727
|
+
raise # Re-raise our custom errors
|
|
728
|
+
else:
|
|
729
|
+
self._client.logger.warning(f"Could not check job status: {e}, proceeding with reduction")
|
|
730
|
+
|
|
679
731
|
current_time = time.time()
|
|
680
732
|
chain_reset_threshold = 0.01
|
|
681
733
|
|
|
@@ -835,20 +887,25 @@ class GeoXarrayAccessor:
|
|
|
835
887
|
"from io import BytesIO",
|
|
836
888
|
"import tempfile",
|
|
837
889
|
"import os",
|
|
890
|
+
"import traceback",
|
|
838
891
|
"",
|
|
839
892
|
"def consume(filename, file_bytes, metadata):",
|
|
840
893
|
]
|
|
841
894
|
|
|
842
895
|
script_lines.extend([
|
|
896
|
+
" tmp_file = None",
|
|
897
|
+
" nc_tmp_file = None",
|
|
898
|
+
" ds = None",
|
|
843
899
|
" ",
|
|
844
900
|
" try:",
|
|
845
901
|
" with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file:",
|
|
846
902
|
" tmp_file.write(file_bytes)",
|
|
847
903
|
" tmp_file.flush()",
|
|
848
|
-
" ds = xr.open_dataset(tmp_file.name, engine='
|
|
904
|
+
" ds = xr.open_dataset(tmp_file.name, engine='h5netcdf')",
|
|
849
905
|
" ",
|
|
850
906
|
])
|
|
851
907
|
|
|
908
|
+
# Add operations without excessive debugging
|
|
852
909
|
for i, op in enumerate(self._pending_operations):
|
|
853
910
|
op_type = op['type']
|
|
854
911
|
params = op['params']
|
|
@@ -882,8 +939,13 @@ class GeoXarrayAccessor:
|
|
|
882
939
|
' output_filename = f"{base_filename}_processed.csv"',
|
|
883
940
|
" csv_data = result_df.to_csv(index=False).encode()",
|
|
884
941
|
" ",
|
|
885
|
-
" ds
|
|
886
|
-
"
|
|
942
|
+
" if ds is not None:",
|
|
943
|
+
" ds.close()",
|
|
944
|
+
" if tmp_file and hasattr(tmp_file, 'name'):",
|
|
945
|
+
" try:",
|
|
946
|
+
" os.unlink(tmp_file.name)",
|
|
947
|
+
" except:",
|
|
948
|
+
" pass",
|
|
887
949
|
" return output_filename, csv_data",
|
|
888
950
|
" else:",
|
|
889
951
|
" # Output as NetCDF - still has dimensions",
|
|
@@ -897,24 +959,44 @@ class GeoXarrayAccessor:
|
|
|
897
959
|
" netcdf_data = f.read()",
|
|
898
960
|
" ",
|
|
899
961
|
" # Clean up temp files",
|
|
900
|
-
"
|
|
962
|
+
" try:",
|
|
963
|
+
" os.unlink(nc_tmp_file.name)",
|
|
964
|
+
" except:",
|
|
965
|
+
" pass",
|
|
901
966
|
" ",
|
|
902
|
-
" ds
|
|
903
|
-
"
|
|
967
|
+
" if ds is not None:",
|
|
968
|
+
" ds.close()",
|
|
969
|
+
" if tmp_file and hasattr(tmp_file, 'name'):",
|
|
970
|
+
" try:",
|
|
971
|
+
" os.unlink(tmp_file.name)",
|
|
972
|
+
" except:",
|
|
973
|
+
" pass",
|
|
904
974
|
" return output_filename, netcdf_data",
|
|
905
975
|
])
|
|
906
976
|
|
|
907
977
|
script_lines.extend([
|
|
908
978
|
" ",
|
|
909
979
|
" except Exception as e:",
|
|
910
|
-
"
|
|
911
|
-
"
|
|
912
|
-
"
|
|
913
|
-
"
|
|
914
|
-
"
|
|
915
|
-
"
|
|
916
|
-
"
|
|
917
|
-
"
|
|
980
|
+
" ",
|
|
981
|
+
" # Clean up resources",
|
|
982
|
+
" if ds is not None:",
|
|
983
|
+
" try:",
|
|
984
|
+
" ds.close()",
|
|
985
|
+
" except:",
|
|
986
|
+
" pass",
|
|
987
|
+
" ",
|
|
988
|
+
" if tmp_file and hasattr(tmp_file, 'name'):",
|
|
989
|
+
" try:",
|
|
990
|
+
" os.unlink(tmp_file.name)",
|
|
991
|
+
" except:",
|
|
992
|
+
" pass",
|
|
993
|
+
" ",
|
|
994
|
+
" if nc_tmp_file and hasattr(nc_tmp_file, 'name'):",
|
|
995
|
+
" try:",
|
|
996
|
+
" os.unlink(nc_tmp_file.name)",
|
|
997
|
+
" except:",
|
|
998
|
+
" pass",
|
|
999
|
+
" ",
|
|
918
1000
|
" return None, None",
|
|
919
1001
|
])
|
|
920
1002
|
|
{terrakio_core-0.4.95 → terrakio_core-0.4.97}/terrakio_core/convenience_functions/zonal_stats.py
RENAMED
|
@@ -139,8 +139,23 @@ class cloud_object(gpd.GeoDataFrame):
|
|
|
139
139
|
raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) has failed, cancelled, or errored. Please check the job status!")
|
|
140
140
|
|
|
141
141
|
else:
|
|
142
|
-
|
|
143
|
-
|
|
142
|
+
# Job is still running - include progress information
|
|
143
|
+
completed = job_info.get('completed', 0)
|
|
144
|
+
total = job_info.get('total', 1)
|
|
145
|
+
progress = completed / total if total > 0 else 0
|
|
146
|
+
percentage = progress * 100
|
|
147
|
+
|
|
148
|
+
# Create progress bar
|
|
149
|
+
bar_length = 30 # Shorter bar for error message
|
|
150
|
+
filled_length = int(bar_length * progress)
|
|
151
|
+
bar = '█' * filled_length + '░' * (bar_length - filled_length)
|
|
152
|
+
|
|
153
|
+
raise RuntimeError(
|
|
154
|
+
f"The zonal stats job (job_id: {self.job_id}) is still running. "
|
|
155
|
+
f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
|
|
156
|
+
f"Please come back at a later time!"
|
|
157
|
+
)
|
|
158
|
+
|
|
144
159
|
def expand_on_time(gdf):
|
|
145
160
|
"""
|
|
146
161
|
Expand datasets on time dimension - each time becomes a new row.
|
|
@@ -498,6 +513,35 @@ async def zonal_stats(
|
|
|
498
513
|
job_name = await client.mass_stats.track_job([mass_stats_id])
|
|
499
514
|
job_name = job_name[mass_stats_id]["name"]
|
|
500
515
|
cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
|
|
516
|
+
|
|
517
|
+
# Attach id column behavior to cloud object via a wrapper method
|
|
518
|
+
async def _head_with_id(n = 5):
|
|
519
|
+
result_gdf = await cloud_files_object._head_async(n)
|
|
520
|
+
if id_column is not None and id_column in gdf.columns:
|
|
521
|
+
geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
|
|
522
|
+
if hasattr(result_gdf.index, 'names') and 'geometry' in result_gdf.index.names:
|
|
523
|
+
if isinstance(result_gdf.index, pd.MultiIndex):
|
|
524
|
+
geometry_index = result_gdf.index.get_level_values('geometry')
|
|
525
|
+
else:
|
|
526
|
+
geometry_index = result_gdf.index
|
|
527
|
+
id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
|
|
528
|
+
result_gdf[id_column] = id_values
|
|
529
|
+
result_gdf = result_gdf.reset_index()
|
|
530
|
+
if 'time' in result_gdf.columns:
|
|
531
|
+
result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
|
|
532
|
+
else:
|
|
533
|
+
result_gdf = result_gdf.set_index([id_column, 'geometry'])
|
|
534
|
+
else:
|
|
535
|
+
id_values = [geometry_to_id.get(geom.wkb) for geom in result_gdf['geometry']]
|
|
536
|
+
result_gdf[id_column] = id_values
|
|
537
|
+
if 'time' in result_gdf.columns:
|
|
538
|
+
result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
|
|
539
|
+
else:
|
|
540
|
+
result_gdf = result_gdf.set_index([id_column, 'geometry'])
|
|
541
|
+
return result_gdf
|
|
542
|
+
|
|
543
|
+
# Monkey-patch a convenience method without modifying original class contract
|
|
544
|
+
cloud_files_object.head_with_id = lambda n=5: asyncio.run(_head_with_id(n))
|
|
501
545
|
return cloud_files_object
|
|
502
546
|
|
|
503
547
|
quries = []
|
|
@@ -524,5 +568,33 @@ async def zonal_stats(
|
|
|
524
568
|
"is_cloud_backed": False,
|
|
525
569
|
}
|
|
526
570
|
gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
|
|
571
|
+
|
|
572
|
+
# If an id_column is provided, attach it to the result and include in the index
|
|
573
|
+
if id_column is not None and id_column in gdf.columns:
|
|
574
|
+
# Build a mapping from input geometries to id values (use WKB for robust equality)
|
|
575
|
+
geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
|
|
576
|
+
|
|
577
|
+
# Determine geometry values in the result (index may be geometry or (geometry, time))
|
|
578
|
+
if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
|
|
579
|
+
if isinstance(gdf_with_datasets.index, pd.MultiIndex):
|
|
580
|
+
geometry_index = gdf_with_datasets.index.get_level_values('geometry')
|
|
581
|
+
else:
|
|
582
|
+
geometry_index = gdf_with_datasets.index
|
|
583
|
+
id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
|
|
584
|
+
gdf_with_datasets[id_column] = id_values
|
|
585
|
+
# Reset index to control index composition precisely, then set to desired levels
|
|
586
|
+
gdf_with_datasets = gdf_with_datasets.reset_index()
|
|
587
|
+
if 'time' in gdf_with_datasets.columns:
|
|
588
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
|
|
589
|
+
else:
|
|
590
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
|
|
591
|
+
else:
|
|
592
|
+
# geometry exists as a column
|
|
593
|
+
id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
|
|
594
|
+
gdf_with_datasets[id_column] = id_values
|
|
595
|
+
if 'time' in gdf_with_datasets.columns:
|
|
596
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
|
|
597
|
+
else:
|
|
598
|
+
gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
|
|
527
599
|
return gdf_with_datasets
|
|
528
600
|
|
|
@@ -69,7 +69,7 @@ class DatasetManagement:
|
|
|
69
69
|
name: Name of the dataset (required)
|
|
70
70
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
71
71
|
products: List of products
|
|
72
|
-
dates_iso8601: List of dates
|
|
72
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
73
73
|
bucket: Storage bucket
|
|
74
74
|
path: Storage path
|
|
75
75
|
data_type: Data type
|
|
@@ -142,7 +142,7 @@ class DatasetManagement:
|
|
|
142
142
|
append: Whether to append data or replace (default: True)
|
|
143
143
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
144
144
|
products: List of products
|
|
145
|
-
dates_iso8601: List of dates
|
|
145
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
146
146
|
bucket: Storage bucket
|
|
147
147
|
path: Storage path
|
|
148
148
|
data_type: Data type
|
|
@@ -162,6 +162,10 @@ class DatasetManagement:
|
|
|
162
162
|
Raises:
|
|
163
163
|
APIError: If the API request fails
|
|
164
164
|
"""
|
|
165
|
+
# Sort dates_iso8601 chronologically if provided
|
|
166
|
+
if dates_iso8601 is not None:
|
|
167
|
+
dates_iso8601 = sorted(dates_iso8601)
|
|
168
|
+
|
|
165
169
|
params = {"collection": collection, "append": str(append).lower()}
|
|
166
170
|
payload = {"name": name}
|
|
167
171
|
param_mapping = {
|
|
@@ -215,7 +219,7 @@ class DatasetManagement:
|
|
|
215
219
|
append: Whether to append data or replace (default: True)
|
|
216
220
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
217
221
|
products: List of products
|
|
218
|
-
dates_iso8601: List of dates
|
|
222
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
219
223
|
bucket: Storage bucket
|
|
220
224
|
path: Storage path
|
|
221
225
|
data_type: Data type
|
|
@@ -236,6 +240,10 @@ class DatasetManagement:
|
|
|
236
240
|
Raises:
|
|
237
241
|
APIError: If the API request fails
|
|
238
242
|
"""
|
|
243
|
+
# Sort dates_iso8601 chronologically if provided
|
|
244
|
+
if dates_iso8601 is not None:
|
|
245
|
+
dates_iso8601 = sorted(dates_iso8601)
|
|
246
|
+
|
|
239
247
|
params = {"collection": collection, "append": str(append).lower()}
|
|
240
248
|
payload = {"name": name}
|
|
241
249
|
param_mapping = {
|
|
@@ -289,7 +297,7 @@ class DatasetManagement:
|
|
|
289
297
|
name: Name of the dataset (required)
|
|
290
298
|
collection: Dataset collection (default: 'terrakio-datasets')
|
|
291
299
|
products: List of products
|
|
292
|
-
dates_iso8601: List of dates
|
|
300
|
+
dates_iso8601: List of dates (will be automatically sorted chronologically)
|
|
293
301
|
bucket: Storage bucket
|
|
294
302
|
path: Storage path
|
|
295
303
|
data_type: Data type
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|