terrakio-core 0.4.95__py3-none-any.whl → 0.4.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of terrakio-core might be problematic. Click here for more details.

terrakio_core/__init__.py CHANGED
@@ -5,7 +5,7 @@ Terrakio Core
5
5
  Core components for Terrakio API clients.
6
6
  """
7
7
 
8
- __version__ = "0.4.95"
8
+ __version__ = "0.4.97"
9
9
 
10
10
  from .async_client import AsyncClient
11
11
  from .sync_client import SyncClient as Client
@@ -673,9 +673,61 @@ class GeoXarrayAccessor:
673
673
  result_gdf[col] = new_data
674
674
 
675
675
  return result_gdf
676
-
676
+
677
677
  def _apply_cloud_reduction(self, reduction_func: str, dim: Optional[Union[str, List[str]]] = None,
678
- columns: Optional[List[str]] = None, **kwargs):
678
+ columns: Optional[List[str]] = None, **kwargs):
679
+
680
+ if hasattr(self._obj, 'job_id') and self._obj.job_id and self._client:
681
+ import asyncio
682
+ import concurrent.futures
683
+
684
+ def check_job_status():
685
+ loop = asyncio.new_event_loop()
686
+ asyncio.set_event_loop(loop)
687
+ try:
688
+ return loop.run_until_complete(
689
+ self._client.mass_stats.track_job([self._obj.job_id])
690
+ )
691
+ finally:
692
+ loop.close()
693
+
694
+ try:
695
+ with concurrent.futures.ThreadPoolExecutor() as executor:
696
+ future = executor.submit(check_job_status)
697
+ track_info = future.result(timeout=10) # Short timeout for status check
698
+
699
+ job_info = track_info[self._obj.job_id]
700
+ status = job_info['status']
701
+
702
+ if status in ["Failed", "Cancelled", "Error"]:
703
+ raise RuntimeError(f"The zonal stats job (job_id: {self._obj.job_id}) has failed, cancelled, or errored. Please check the job status!")
704
+
705
+ elif status != "Completed":
706
+ # Job is still running - include progress information
707
+ completed = job_info.get('completed', 0)
708
+ total = job_info.get('total', 1)
709
+ progress = completed / total if total > 0 else 0
710
+ percentage = progress * 100
711
+
712
+ # Create progress bar
713
+ bar_length = 30 # Shorter bar for error message
714
+ filled_length = int(bar_length * progress)
715
+ bar = '█' * filled_length + '░' * (bar_length - filled_length)
716
+
717
+ raise RuntimeError(
718
+ f"The zonal stats job (job_id: {self._obj.job_id}) is still running. "
719
+ f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
720
+ f"Please come back at a later time!"
721
+ )
722
+
723
+ except concurrent.futures.TimeoutError:
724
+ self._client.logger.warning("Timeout checking job status, proceeding with reduction")
725
+ except Exception as e:
726
+ if "still running" in str(e) or "failed" in str(e).lower():
727
+ raise # Re-raise our custom errors
728
+ else:
729
+ self._client.logger.warning(f"Could not check job status: {e}, proceeding with reduction")
730
+
679
731
  current_time = time.time()
680
732
  chain_reset_threshold = 0.01
681
733
 
@@ -835,20 +887,25 @@ class GeoXarrayAccessor:
835
887
  "from io import BytesIO",
836
888
  "import tempfile",
837
889
  "import os",
890
+ "import traceback",
838
891
  "",
839
892
  "def consume(filename, file_bytes, metadata):",
840
893
  ]
841
894
 
842
895
  script_lines.extend([
896
+ " tmp_file = None",
897
+ " nc_tmp_file = None",
898
+ " ds = None",
843
899
  " ",
844
900
  " try:",
845
901
  " with tempfile.NamedTemporaryFile(suffix='.nc', delete=False) as tmp_file:",
846
902
  " tmp_file.write(file_bytes)",
847
903
  " tmp_file.flush()",
848
- " ds = xr.open_dataset(tmp_file.name, engine='scipy')",
904
+ " ds = xr.open_dataset(tmp_file.name, engine='h5netcdf')",
849
905
  " ",
850
906
  ])
851
907
 
908
+ # Add operations without excessive debugging
852
909
  for i, op in enumerate(self._pending_operations):
853
910
  op_type = op['type']
854
911
  params = op['params']
@@ -882,8 +939,13 @@ class GeoXarrayAccessor:
882
939
  ' output_filename = f"{base_filename}_processed.csv"',
883
940
  " csv_data = result_df.to_csv(index=False).encode()",
884
941
  " ",
885
- " ds.close()",
886
- " os.unlink(tmp_file.name)",
942
+ " if ds is not None:",
943
+ " ds.close()",
944
+ " if tmp_file and hasattr(tmp_file, 'name'):",
945
+ " try:",
946
+ " os.unlink(tmp_file.name)",
947
+ " except:",
948
+ " pass",
887
949
  " return output_filename, csv_data",
888
950
  " else:",
889
951
  " # Output as NetCDF - still has dimensions",
@@ -897,24 +959,44 @@ class GeoXarrayAccessor:
897
959
  " netcdf_data = f.read()",
898
960
  " ",
899
961
  " # Clean up temp files",
900
- " os.unlink(nc_tmp_file.name)",
962
+ " try:",
963
+ " os.unlink(nc_tmp_file.name)",
964
+ " except:",
965
+ " pass",
901
966
  " ",
902
- " ds.close()",
903
- " os.unlink(tmp_file.name)",
967
+ " if ds is not None:",
968
+ " ds.close()",
969
+ " if tmp_file and hasattr(tmp_file, 'name'):",
970
+ " try:",
971
+ " os.unlink(tmp_file.name)",
972
+ " except:",
973
+ " pass",
904
974
  " return output_filename, netcdf_data",
905
975
  ])
906
976
 
907
977
  script_lines.extend([
908
978
  " ",
909
979
  " except Exception as e:",
910
- " try:",
911
- " os.unlink(tmp_file.name)",
912
- " except:",
913
- " pass",
914
- " try:",
915
- " os.unlink(nc_tmp_file.name)",
916
- " except:",
917
- " pass",
980
+ " ",
981
+ " # Clean up resources",
982
+ " if ds is not None:",
983
+ " try:",
984
+ " ds.close()",
985
+ " except:",
986
+ " pass",
987
+ " ",
988
+ " if tmp_file and hasattr(tmp_file, 'name'):",
989
+ " try:",
990
+ " os.unlink(tmp_file.name)",
991
+ " except:",
992
+ " pass",
993
+ " ",
994
+ " if nc_tmp_file and hasattr(nc_tmp_file, 'name'):",
995
+ " try:",
996
+ " os.unlink(nc_tmp_file.name)",
997
+ " except:",
998
+ " pass",
999
+ " ",
918
1000
  " return None, None",
919
1001
  ])
920
1002
 
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import xarray as xr
2
3
 
3
4
  import geopandas as gpd
4
5
  from shapely.geometry import shape
@@ -139,8 +139,23 @@ class cloud_object(gpd.GeoDataFrame):
139
139
  raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) has failed, cancelled, or errored. Please check the job status!")
140
140
 
141
141
  else:
142
- raise RuntimeError(f"The zonal stats job (job_id: {self.job_id}) is still running. Please come back at a later time!")
143
-
142
+ # Job is still running - include progress information
143
+ completed = job_info.get('completed', 0)
144
+ total = job_info.get('total', 1)
145
+ progress = completed / total if total > 0 else 0
146
+ percentage = progress * 100
147
+
148
+ # Create progress bar
149
+ bar_length = 30 # Shorter bar for error message
150
+ filled_length = int(bar_length * progress)
151
+ bar = '█' * filled_length + '░' * (bar_length - filled_length)
152
+
153
+ raise RuntimeError(
154
+ f"The zonal stats job (job_id: {self.job_id}) is still running. "
155
+ f"Progress: [{bar}] {percentage:.1f}% ({completed}/{total}). "
156
+ f"Please come back at a later time!"
157
+ )
158
+
144
159
  def expand_on_time(gdf):
145
160
  """
146
161
  Expand datasets on time dimension - each time becomes a new row.
@@ -498,6 +513,35 @@ async def zonal_stats(
498
513
  job_name = await client.mass_stats.track_job([mass_stats_id])
499
514
  job_name = job_name[mass_stats_id]["name"]
500
515
  cloud_files_object = cloud_object(job_id = mass_stats_id, job_name = job_name, client = client)
516
+
517
+ # Attach id column behavior to cloud object via a wrapper method
518
+ async def _head_with_id(n = 5):
519
+ result_gdf = await cloud_files_object._head_async(n)
520
+ if id_column is not None and id_column in gdf.columns:
521
+ geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
522
+ if hasattr(result_gdf.index, 'names') and 'geometry' in result_gdf.index.names:
523
+ if isinstance(result_gdf.index, pd.MultiIndex):
524
+ geometry_index = result_gdf.index.get_level_values('geometry')
525
+ else:
526
+ geometry_index = result_gdf.index
527
+ id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
528
+ result_gdf[id_column] = id_values
529
+ result_gdf = result_gdf.reset_index()
530
+ if 'time' in result_gdf.columns:
531
+ result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
532
+ else:
533
+ result_gdf = result_gdf.set_index([id_column, 'geometry'])
534
+ else:
535
+ id_values = [geometry_to_id.get(geom.wkb) for geom in result_gdf['geometry']]
536
+ result_gdf[id_column] = id_values
537
+ if 'time' in result_gdf.columns:
538
+ result_gdf = result_gdf.set_index([id_column, 'geometry', 'time'])
539
+ else:
540
+ result_gdf = result_gdf.set_index([id_column, 'geometry'])
541
+ return result_gdf
542
+
543
+ # Monkey-patch a convenience method without modifying original class contract
544
+ cloud_files_object.head_with_id = lambda n=5: asyncio.run(_head_with_id(n))
501
545
  return cloud_files_object
502
546
 
503
547
  quries = []
@@ -524,5 +568,33 @@ async def zonal_stats(
524
568
  "is_cloud_backed": False,
525
569
  }
526
570
  gdf_with_datasets = expand_on_variables_and_time(gdf_with_datasets)
571
+
572
+ # If an id_column is provided, attach it to the result and include in the index
573
+ if id_column is not None and id_column in gdf.columns:
574
+ # Build a mapping from input geometries to id values (use WKB for robust equality)
575
+ geometry_to_id = {geom.wkb: id_val for geom, id_val in zip(gdf.geometry, gdf[id_column])}
576
+
577
+ # Determine geometry values in the result (index may be geometry or (geometry, time))
578
+ if hasattr(gdf_with_datasets.index, 'names') and 'geometry' in gdf_with_datasets.index.names:
579
+ if isinstance(gdf_with_datasets.index, pd.MultiIndex):
580
+ geometry_index = gdf_with_datasets.index.get_level_values('geometry')
581
+ else:
582
+ geometry_index = gdf_with_datasets.index
583
+ id_values = [geometry_to_id.get(geom.wkb) for geom in geometry_index]
584
+ gdf_with_datasets[id_column] = id_values
585
+ # Reset index to control index composition precisely, then set to desired levels
586
+ gdf_with_datasets = gdf_with_datasets.reset_index()
587
+ if 'time' in gdf_with_datasets.columns:
588
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
589
+ else:
590
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
591
+ else:
592
+ # geometry exists as a column
593
+ id_values = [geometry_to_id.get(geom.wkb) for geom in gdf_with_datasets['geometry']]
594
+ gdf_with_datasets[id_column] = id_values
595
+ if 'time' in gdf_with_datasets.columns:
596
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry', 'time'])
597
+ else:
598
+ gdf_with_datasets = gdf_with_datasets.set_index([id_column, 'geometry'])
527
599
  return gdf_with_datasets
528
600
 
@@ -69,7 +69,7 @@ class DatasetManagement:
69
69
  name: Name of the dataset (required)
70
70
  collection: Dataset collection (default: 'terrakio-datasets')
71
71
  products: List of products
72
- dates_iso8601: List of dates
72
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
73
73
  bucket: Storage bucket
74
74
  path: Storage path
75
75
  data_type: Data type
@@ -142,7 +142,7 @@ class DatasetManagement:
142
142
  append: Whether to append data or replace (default: True)
143
143
  collection: Dataset collection (default: 'terrakio-datasets')
144
144
  products: List of products
145
- dates_iso8601: List of dates
145
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
146
146
  bucket: Storage bucket
147
147
  path: Storage path
148
148
  data_type: Data type
@@ -162,6 +162,10 @@ class DatasetManagement:
162
162
  Raises:
163
163
  APIError: If the API request fails
164
164
  """
165
+ # Sort dates_iso8601 chronologically if provided
166
+ if dates_iso8601 is not None:
167
+ dates_iso8601 = sorted(dates_iso8601)
168
+
165
169
  params = {"collection": collection, "append": str(append).lower()}
166
170
  payload = {"name": name}
167
171
  param_mapping = {
@@ -215,7 +219,7 @@ class DatasetManagement:
215
219
  append: Whether to append data or replace (default: True)
216
220
  collection: Dataset collection (default: 'terrakio-datasets')
217
221
  products: List of products
218
- dates_iso8601: List of dates
222
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
219
223
  bucket: Storage bucket
220
224
  path: Storage path
221
225
  data_type: Data type
@@ -236,6 +240,10 @@ class DatasetManagement:
236
240
  Raises:
237
241
  APIError: If the API request fails
238
242
  """
243
+ # Sort dates_iso8601 chronologically if provided
244
+ if dates_iso8601 is not None:
245
+ dates_iso8601 = sorted(dates_iso8601)
246
+
239
247
  params = {"collection": collection, "append": str(append).lower()}
240
248
  payload = {"name": name}
241
249
  param_mapping = {
@@ -289,7 +297,7 @@ class DatasetManagement:
289
297
  name: Name of the dataset (required)
290
298
  collection: Dataset collection (default: 'terrakio-datasets')
291
299
  products: List of products
292
- dates_iso8601: List of dates
300
+ dates_iso8601: List of dates (will be automatically sorted chronologically)
293
301
  bucket: Storage bucket
294
302
  path: Storage path
295
303
  data_type: Data type
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: terrakio-core
3
- Version: 0.4.95
3
+ Version: 0.4.97
4
4
  Summary: Core package for the terrakio-python-api
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiofiles>=24.1.0
@@ -1,15 +1,15 @@
1
- terrakio_core/__init__.py,sha256=oQmSomwovopKB9FJwxuboCtt6Sd5BwiCuROUTTR92WI,274
2
- terrakio_core/accessors.py,sha256=pVZ7qqkkiZdUN5DwqDFwWNHRMfzT9pLnDehI8yiUNVw,43595
1
+ terrakio_core/__init__.py,sha256=HTlEcbJMNK-hrdq99CSMO6xU-A4UTVRBi3fVglJFSRQ,274
2
+ terrakio_core/accessors.py,sha256=UZIi9y4RpBxouSmKuwuNYLIYqDxD8BH-GnUzwJuc1JI,47570
3
3
  terrakio_core/async_client.py,sha256=txdSsX3IwqtHlcS86u6N6vjV0-PIiermxNOIjEMQ3Yg,14950
4
4
  terrakio_core/client.py,sha256=VXP7BtJWIfpPPZR7_yNdSTcGwNgTwhb7KorusqkQrzk,5603
5
5
  terrakio_core/config.py,sha256=r8NARVYOca4AuM88VP_j-8wQxOk1s7VcRdyEdseBlLE,4193
6
6
  terrakio_core/exceptions.py,sha256=4qnpOM1gOxsNIXDXY4qwY1d3I4Myhp7HBh7b2D0SVrU,529
7
7
  terrakio_core/sync_client.py,sha256=jbG2sMnbR3QPvhAxQX2dBWeX_6f-Qx_MFSRLLpvfRh4,14604
8
8
  terrakio_core/convenience_functions/create_dataset_file.py,sha256=RDTAQnKUigyczv3EKhKrs34VMDZDCgL4iz0bge1d9e4,4774
9
- terrakio_core/convenience_functions/geoquries.py,sha256=zIgt4fDCBgOIUM_h7-a6brOG-Mi2C_bQdnqcSliTVDs,3766
10
- terrakio_core/convenience_functions/zonal_stats.py,sha256=B0c95M5yqGE3uC2_Cad3FKXeaz97hVHdnaWSMdJdKsU,19496
9
+ terrakio_core/convenience_functions/geoquries.py,sha256=7E3drOD5ffNk2-rKLbwKsNp3_Berq-S1lQk5wwHSuAo,3786
10
+ terrakio_core/convenience_functions/zonal_stats.py,sha256=Sg_T3_85acMPvZkDxBf3fMTmNXnEfKnjVCEB7SKT4Fc,23807
11
11
  terrakio_core/endpoints/auth.py,sha256=FdLsPScPIBo-Gxl6ZnE-46cp2molggAJtL72LssN3fg,6049
12
- terrakio_core/endpoints/dataset_management.py,sha256=BUm8IIlW_Q45vDiQp16CiJGeSLheI8uWRVRQtMdhaNk,13161
12
+ terrakio_core/endpoints/dataset_management.py,sha256=D2foX8DGbSXQ4vYLRt0Es3j96a_qfd920Ct3uN3dd7Y,13641
13
13
  terrakio_core/endpoints/group_management.py,sha256=VFl3jakjQa9OPi351D3DZvLU9M7fHdfjCzGhmyJsx3U,6309
14
14
  terrakio_core/endpoints/mass_stats.py,sha256=Vb6Tf8kKf5Hlch4ddsrQnfayfiK6z7NSjO8D0pop4p8,25699
15
15
  terrakio_core/endpoints/model_management.py,sha256=LH_gHPrqYA-_45KWpDBRcFbwHgm-Kg0zk1ealy7P_C0,52379
@@ -18,6 +18,6 @@ terrakio_core/endpoints/user_management.py,sha256=WlFr3EfK8iI6DfkpMuYLHZUPk2n7_D
18
18
  terrakio_core/helper/bounded_taskgroup.py,sha256=wiTH10jhKZgrsgrFUNG6gig8bFkUEPHkGRT2XY7Rgmo,677
19
19
  terrakio_core/helper/decorators.py,sha256=L6om7wmWNgCei3Wy5U0aZ-70OzsCwclkjIf7SfQuhCg,2289
20
20
  terrakio_core/helper/tiles.py,sha256=lcLCO6KiP05lCI9vngo3zCZJ6Z9C0pUxHSQS4H58EHc,2699
21
- terrakio_core-0.4.95.dist-info/METADATA,sha256=gAjc5wDDg2a8vmkBVZWSK1QdtMTlaq9EGFY9qyQ16q4,1151
22
- terrakio_core-0.4.95.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
- terrakio_core-0.4.95.dist-info/RECORD,,
21
+ terrakio_core-0.4.97.dist-info/METADATA,sha256=wxkjwA2YX_vXeBnpenFaVjoglb7Muu84Oe5UdY5-jyM,1151
22
+ terrakio_core-0.4.97.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
+ terrakio_core-0.4.97.dist-info/RECORD,,