terrakio-core 0.1.9__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of terrakio-core might be problematic. Click here for more details.
- terrakio_core/client.py +449 -0
- terrakio_core/mass_stats.py +262 -0
- {terrakio_core-0.1.9.dist-info → terrakio_core-0.2.0.dist-info}/METADATA +2 -1
- {terrakio_core-0.1.9.dist-info → terrakio_core-0.2.0.dist-info}/RECORD +6 -5
- {terrakio_core-0.1.9.dist-info → terrakio_core-0.2.0.dist-info}/WHEEL +1 -1
- {terrakio_core-0.1.9.dist-info → terrakio_core-0.2.0.dist-info}/top_level.txt +0 -0
terrakio_core/client.py
CHANGED
|
@@ -3,6 +3,8 @@ import xarray as xr
|
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from typing import Dict, Any, Optional, Union
|
|
5
5
|
import json
|
|
6
|
+
import aiohttp
|
|
7
|
+
import asyncio
|
|
6
8
|
from shapely.geometry import shape
|
|
7
9
|
from shapely.geometry.base import BaseGeometry as ShapelyGeometry
|
|
8
10
|
from .exceptions import APIError, ConfigurationError
|
|
@@ -63,6 +65,117 @@ class BaseClient:
|
|
|
63
65
|
})
|
|
64
66
|
self.user_management = None
|
|
65
67
|
self.dataset_management = None
|
|
68
|
+
self.mass_stats = None
|
|
69
|
+
self._aiohttp_session = None
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
async def aiohttp_session(self):
|
|
73
|
+
if self._aiohttp_session is None or self._aiohttp_session.closed:
|
|
74
|
+
self._aiohttp_session = aiohttp.ClientSession(
|
|
75
|
+
headers={
|
|
76
|
+
'Content-Type': 'application/json',
|
|
77
|
+
'x-api-key': self.key
|
|
78
|
+
},
|
|
79
|
+
timeout=aiohttp.ClientTimeout(total=self.timeout)
|
|
80
|
+
)
|
|
81
|
+
return self._aiohttp_session
|
|
82
|
+
|
|
83
|
+
async def wcs_async(self, expr: str, feature: Union[Dict[str, Any], ShapelyGeometry],
|
|
84
|
+
in_crs: str = "epsg:4326", out_crs: str = "epsg:4326",
|
|
85
|
+
output: str = "csv", resolution: int = -1, **kwargs):
|
|
86
|
+
"""
|
|
87
|
+
Asynchronous version of the wcs() method using aiohttp.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
expr (str): The WCS expression to evaluate
|
|
91
|
+
feature (Union[Dict[str, Any], ShapelyGeometry]): The geographic feature
|
|
92
|
+
in_crs (str): Input coordinate reference system
|
|
93
|
+
out_crs (str): Output coordinate reference system
|
|
94
|
+
output (str): Output format ('csv' or 'netcdf')
|
|
95
|
+
resolution (int): Resolution parameter
|
|
96
|
+
**kwargs: Additional parameters to pass to the WCS request
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Union[pd.DataFrame, xr.Dataset, bytes]: The response data in the requested format
|
|
100
|
+
"""
|
|
101
|
+
if hasattr(feature, 'is_valid'):
|
|
102
|
+
from shapely.geometry import mapping
|
|
103
|
+
feature = {
|
|
104
|
+
"type": "Feature",
|
|
105
|
+
"geometry": mapping(feature),
|
|
106
|
+
"properties": {}
|
|
107
|
+
}
|
|
108
|
+
self.validate_feature(feature)
|
|
109
|
+
|
|
110
|
+
payload = {
|
|
111
|
+
"feature": feature,
|
|
112
|
+
"in_crs": in_crs,
|
|
113
|
+
"out_crs": out_crs,
|
|
114
|
+
"output": output,
|
|
115
|
+
"resolution": resolution,
|
|
116
|
+
"expr": expr,
|
|
117
|
+
**kwargs
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if not self.quiet:
|
|
121
|
+
print(f"Requesting data with expression: {expr}")
|
|
122
|
+
|
|
123
|
+
request_url = f"{self.url}/wcs"
|
|
124
|
+
print("the payload is ", payload)
|
|
125
|
+
print("the request url is ", request_url)
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
# Get the shared aiohttp session
|
|
129
|
+
session = await self.aiohttp_session
|
|
130
|
+
async with session.post(request_url, json=payload, ssl=self.verify) as response:
|
|
131
|
+
if not response.ok:
|
|
132
|
+
error_msg = f"API request failed: {response.status} {response.reason}"
|
|
133
|
+
try:
|
|
134
|
+
error_data = await response.json()
|
|
135
|
+
if "detail" in error_data:
|
|
136
|
+
error_msg += f" - {error_data['detail']}"
|
|
137
|
+
except:
|
|
138
|
+
pass
|
|
139
|
+
raise APIError(error_msg)
|
|
140
|
+
|
|
141
|
+
content = await response.read()
|
|
142
|
+
print("the content is ", content)
|
|
143
|
+
|
|
144
|
+
if output.lower() == "csv":
|
|
145
|
+
import pandas as pd
|
|
146
|
+
df = pd.read_csv(BytesIO(content))
|
|
147
|
+
print("the content is ", df)
|
|
148
|
+
return df
|
|
149
|
+
elif output.lower() == "netcdf":
|
|
150
|
+
return xr.open_dataset(BytesIO(content))
|
|
151
|
+
else:
|
|
152
|
+
try:
|
|
153
|
+
return xr.open_dataset(BytesIO(content))
|
|
154
|
+
except ValueError:
|
|
155
|
+
import pandas as pd
|
|
156
|
+
try:
|
|
157
|
+
return pd.read_csv(BytesIO(content))
|
|
158
|
+
except:
|
|
159
|
+
return content
|
|
160
|
+
|
|
161
|
+
except aiohttp.ClientError as e:
|
|
162
|
+
print(f"Client error in wcs_async: {str(e)}")
|
|
163
|
+
raise APIError(f"Request failed: {str(e)}")
|
|
164
|
+
except Exception as e:
|
|
165
|
+
print(f"Unexpected error in wcs_async: {str(e)}")
|
|
166
|
+
raise
|
|
167
|
+
|
|
168
|
+
async def close_async(self):
|
|
169
|
+
"""Close the aiohttp session"""
|
|
170
|
+
if self._aiohttp_session and not self._aiohttp_session.closed:
|
|
171
|
+
await self._aiohttp_session.close()
|
|
172
|
+
self._aiohttp_session = None
|
|
173
|
+
|
|
174
|
+
async def __aenter__(self):
|
|
175
|
+
return self
|
|
176
|
+
|
|
177
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
178
|
+
await self.close_async()
|
|
66
179
|
|
|
67
180
|
def validate_feature(self, feature: Dict[str, Any]) -> None:
|
|
68
181
|
if hasattr(feature, 'is_valid'):
|
|
@@ -176,6 +289,8 @@ class BaseClient:
|
|
|
176
289
|
if not self.quiet:
|
|
177
290
|
print(f"Requesting data with expression: {expr}")
|
|
178
291
|
request_url = f"{self.url}/wcs"
|
|
292
|
+
print("the payload is ", payload)
|
|
293
|
+
print("the request url is ", request_url)
|
|
179
294
|
try:
|
|
180
295
|
response = self.session.post(request_url, json=payload, timeout=self.timeout, verify=self.verify)
|
|
181
296
|
if not response.ok:
|
|
@@ -378,3 +493,337 @@ class BaseClient:
|
|
|
378
493
|
return self
|
|
379
494
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
380
495
|
self.close()
|
|
496
|
+
|
|
497
|
+
# Mass Stats methods
|
|
498
|
+
def upload_mass_stats(self, name, size, bucket, output, location=None, **kwargs):
|
|
499
|
+
if not self.mass_stats:
|
|
500
|
+
from terrakio_core.mass_stats import MassStats
|
|
501
|
+
if not self.url or not self.key:
|
|
502
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
503
|
+
self.mass_stats = MassStats(
|
|
504
|
+
base_url=self.url,
|
|
505
|
+
api_key=self.key,
|
|
506
|
+
verify=self.verify,
|
|
507
|
+
timeout=self.timeout
|
|
508
|
+
)
|
|
509
|
+
return self.mass_stats.upload_request(name, size, bucket, output, location, **kwargs)
|
|
510
|
+
|
|
511
|
+
def start_mass_stats_job(self, task_id):
|
|
512
|
+
if not self.mass_stats:
|
|
513
|
+
from terrakio_core.mass_stats import MassStats
|
|
514
|
+
if not self.url or not self.key:
|
|
515
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
516
|
+
self.mass_stats = MassStats(
|
|
517
|
+
base_url=self.url,
|
|
518
|
+
api_key=self.key,
|
|
519
|
+
verify=self.verify,
|
|
520
|
+
timeout=self.timeout
|
|
521
|
+
)
|
|
522
|
+
return self.mass_stats.start_job(task_id)
|
|
523
|
+
|
|
524
|
+
def get_mass_stats_task_id(self, name, stage, uid=None):
|
|
525
|
+
if not self.mass_stats:
|
|
526
|
+
from terrakio_core.mass_stats import MassStats
|
|
527
|
+
if not self.url or not self.key:
|
|
528
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
529
|
+
self.mass_stats = MassStats(
|
|
530
|
+
base_url=self.url,
|
|
531
|
+
api_key=self.key,
|
|
532
|
+
verify=self.verify,
|
|
533
|
+
timeout=self.timeout
|
|
534
|
+
)
|
|
535
|
+
return self.mass_stats.get_task_id(name, stage, uid)
|
|
536
|
+
|
|
537
|
+
def track_mass_stats_job(self, ids=None):
|
|
538
|
+
if not self.mass_stats:
|
|
539
|
+
from terrakio_core.mass_stats import MassStats
|
|
540
|
+
if not self.url or not self.key:
|
|
541
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
542
|
+
self.mass_stats = MassStats(
|
|
543
|
+
base_url=self.url,
|
|
544
|
+
api_key=self.key,
|
|
545
|
+
verify=self.verify,
|
|
546
|
+
timeout=self.timeout
|
|
547
|
+
)
|
|
548
|
+
return self.mass_stats.track_job(ids)
|
|
549
|
+
|
|
550
|
+
def get_mass_stats_history(self, limit=100):
|
|
551
|
+
if not self.mass_stats:
|
|
552
|
+
from terrakio_core.mass_stats import MassStats
|
|
553
|
+
if not self.url or not self.key:
|
|
554
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
555
|
+
self.mass_stats = MassStats(
|
|
556
|
+
base_url=self.url,
|
|
557
|
+
api_key=self.key,
|
|
558
|
+
verify=self.verify,
|
|
559
|
+
timeout=self.timeout
|
|
560
|
+
)
|
|
561
|
+
return self.mass_stats.get_history(limit)
|
|
562
|
+
|
|
563
|
+
def start_mass_stats_post_processing(self, process_name, data_name, output, consumer_path, overwrite=False):
|
|
564
|
+
if not self.mass_stats:
|
|
565
|
+
from terrakio_core.mass_stats import MassStats
|
|
566
|
+
if not self.url or not self.key:
|
|
567
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
568
|
+
self.mass_stats = MassStats(
|
|
569
|
+
base_url=self.url,
|
|
570
|
+
api_key=self.key,
|
|
571
|
+
verify=self.verify,
|
|
572
|
+
timeout=self.timeout
|
|
573
|
+
)
|
|
574
|
+
return self.mass_stats.start_post_processing(process_name, data_name, output, consumer_path, overwrite)
|
|
575
|
+
|
|
576
|
+
def download_mass_stats_results(self, id=None, force_loc=False, **kwargs):
|
|
577
|
+
if not self.mass_stats:
|
|
578
|
+
from terrakio_core.mass_stats import MassStats
|
|
579
|
+
if not self.url or not self.key:
|
|
580
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
581
|
+
self.mass_stats = MassStats(
|
|
582
|
+
base_url=self.url,
|
|
583
|
+
api_key=self.key,
|
|
584
|
+
verify=self.verify,
|
|
585
|
+
timeout=self.timeout
|
|
586
|
+
)
|
|
587
|
+
return self.mass_stats.download_results(id, force_loc, **kwargs)
|
|
588
|
+
|
|
589
|
+
def cancel_mass_stats_job(self, id):
|
|
590
|
+
if not self.mass_stats:
|
|
591
|
+
from terrakio_core.mass_stats import MassStats
|
|
592
|
+
if not self.url or not self.key:
|
|
593
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
594
|
+
self.mass_stats = MassStats(
|
|
595
|
+
base_url=self.url,
|
|
596
|
+
api_key=self.key,
|
|
597
|
+
verify=self.verify,
|
|
598
|
+
timeout=self.timeout
|
|
599
|
+
)
|
|
600
|
+
return self.mass_stats.cancel_job(id)
|
|
601
|
+
|
|
602
|
+
def cancel_all_mass_stats_jobs(self):
|
|
603
|
+
if not self.mass_stats:
|
|
604
|
+
from terrakio_core.mass_stats import MassStats
|
|
605
|
+
if not self.url or not self.key:
|
|
606
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
607
|
+
self.mass_stats = MassStats(
|
|
608
|
+
base_url=self.url,
|
|
609
|
+
api_key=self.key,
|
|
610
|
+
verify=self.verify,
|
|
611
|
+
timeout=self.timeout
|
|
612
|
+
)
|
|
613
|
+
return self.mass_stats.cancel_all_jobs()
|
|
614
|
+
|
|
615
|
+
def _create_pyramids(self, name, levels, config):
|
|
616
|
+
if not self.mass_stats:
|
|
617
|
+
from terrakio_core.mass_stats import MassStats
|
|
618
|
+
if not self.url or not self.key:
|
|
619
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
620
|
+
self.mass_stats = MassStats(
|
|
621
|
+
base_url=self.url,
|
|
622
|
+
api_key=self.key,
|
|
623
|
+
verify=self.verify,
|
|
624
|
+
timeout=self.timeout
|
|
625
|
+
)
|
|
626
|
+
return self.mass_stats.create_pyramids(name, levels, config)
|
|
627
|
+
|
|
628
|
+
def random_sample(self, name, **kwargs):
|
|
629
|
+
if not self.mass_stats:
|
|
630
|
+
from terrakio_core.mass_stats import MassStats
|
|
631
|
+
if not self.url or not self.key:
|
|
632
|
+
raise ConfigurationError("Mass Stats client not initialized. Make sure API URL and key are set.")
|
|
633
|
+
self.mass_stats = MassStats(
|
|
634
|
+
base_url=self.url,
|
|
635
|
+
api_key=self.key,
|
|
636
|
+
verify=self.verify,
|
|
637
|
+
timeout=self.timeout
|
|
638
|
+
)
|
|
639
|
+
return self.mass_stats.random_sample(name, **kwargs)
|
|
640
|
+
|
|
641
|
+
async def zonal_stats_async(self, gdb, expr, conc=20, inplace=False, output="csv"):
|
|
642
|
+
"""
|
|
643
|
+
Compute zonal statistics for all geometries in a GeoDataFrame using asyncio for concurrency.
|
|
644
|
+
"""
|
|
645
|
+
import asyncio
|
|
646
|
+
import pandas as pd
|
|
647
|
+
import geopandas as gpd
|
|
648
|
+
from shapely.geometry import mapping
|
|
649
|
+
|
|
650
|
+
print(f"Starting zonal_stats_async with {len(gdb)} geometries")
|
|
651
|
+
|
|
652
|
+
# Process geometries in batches
|
|
653
|
+
all_results = []
|
|
654
|
+
row_indices = []
|
|
655
|
+
|
|
656
|
+
async def process_geometry(geom, index):
|
|
657
|
+
"""Process a single geometry"""
|
|
658
|
+
try:
|
|
659
|
+
feature = {
|
|
660
|
+
"type": "Feature",
|
|
661
|
+
"geometry": mapping(geom),
|
|
662
|
+
"properties": {"index": index}
|
|
663
|
+
}
|
|
664
|
+
print(f"Processing geometry {index}")
|
|
665
|
+
result = await self.wcs_async(expr=expr, feature=feature, output=output)
|
|
666
|
+
print(f"Got result for geometry {index}: {type(result)}")
|
|
667
|
+
# Add original index to track which geometry this result belongs to
|
|
668
|
+
if isinstance(result, pd.DataFrame):
|
|
669
|
+
result['_geometry_index'] = index
|
|
670
|
+
return result
|
|
671
|
+
except Exception as e:
|
|
672
|
+
print(f"Error in process_geometry for index {index}: {str(e)}")
|
|
673
|
+
raise
|
|
674
|
+
|
|
675
|
+
async def process_batch(batch_indices):
|
|
676
|
+
"""Process a batch of geometries concurrently using TaskGroup"""
|
|
677
|
+
print(f"Processing batch with indices: {list(batch_indices)}")
|
|
678
|
+
try:
|
|
679
|
+
async with asyncio.TaskGroup() as tg:
|
|
680
|
+
tasks = []
|
|
681
|
+
for idx in batch_indices:
|
|
682
|
+
geom = gdb.geometry.iloc[idx]
|
|
683
|
+
task = tg.create_task(process_geometry(geom, idx))
|
|
684
|
+
tasks.append(task)
|
|
685
|
+
|
|
686
|
+
# Get results from completed tasks
|
|
687
|
+
results = []
|
|
688
|
+
for task in tasks:
|
|
689
|
+
try:
|
|
690
|
+
result = task.result()
|
|
691
|
+
print(f"Task completed successfully: {type(result)}")
|
|
692
|
+
results.append(result)
|
|
693
|
+
except Exception as e:
|
|
694
|
+
print(f"Error getting task result: {str(e)}")
|
|
695
|
+
raise
|
|
696
|
+
|
|
697
|
+
return results
|
|
698
|
+
except* Exception as e:
|
|
699
|
+
print(f"TaskGroup error: {str(e)}")
|
|
700
|
+
# Get the actual exceptions from the tasks
|
|
701
|
+
for task in tasks:
|
|
702
|
+
if task.done() and task.exception():
|
|
703
|
+
print(f"Task exception: {str(task.exception())}")
|
|
704
|
+
raise
|
|
705
|
+
|
|
706
|
+
# Process in batches to control concurrency
|
|
707
|
+
for i in range(0, len(gdb), conc):
|
|
708
|
+
batch_indices = range(i, min(i + conc, len(gdb)))
|
|
709
|
+
try:
|
|
710
|
+
print(f"Starting batch {i//conc + 1}")
|
|
711
|
+
batch_results = await process_batch(batch_indices)
|
|
712
|
+
print(f"Batch {i//conc + 1} completed successfully")
|
|
713
|
+
all_results.extend(batch_results)
|
|
714
|
+
row_indices.extend(batch_indices)
|
|
715
|
+
except Exception as e:
|
|
716
|
+
print(f"Error processing batch starting at index {i}: {str(e)}")
|
|
717
|
+
if hasattr(e, 'response'):
|
|
718
|
+
print(f"API Response: {e.response.text}")
|
|
719
|
+
raise
|
|
720
|
+
|
|
721
|
+
if not all_results:
|
|
722
|
+
raise ValueError("No valid results were returned for any geometry")
|
|
723
|
+
|
|
724
|
+
# Combine all results
|
|
725
|
+
combined_df = pd.concat(all_results, ignore_index=True)
|
|
726
|
+
|
|
727
|
+
# Check if we have temporal results
|
|
728
|
+
has_time = 'time' in combined_df.columns
|
|
729
|
+
|
|
730
|
+
# Create a result GeoDataFrame
|
|
731
|
+
if has_time:
|
|
732
|
+
# For temporal data, we'll create a hierarchical index
|
|
733
|
+
# First make sure we have the geometry index and time columns
|
|
734
|
+
if '_geometry_index' not in combined_df.columns:
|
|
735
|
+
raise ValueError("Missing geometry index in results")
|
|
736
|
+
|
|
737
|
+
# Create hierarchical index on geometry_index and time
|
|
738
|
+
combined_df.set_index(['_geometry_index', 'time'], inplace=True)
|
|
739
|
+
|
|
740
|
+
# For each unique geometry index, we need the corresponding geometry
|
|
741
|
+
geometry_series = gdb.geometry.copy()
|
|
742
|
+
|
|
743
|
+
# Get columns that will become new attributes (exclude index/utility columns)
|
|
744
|
+
result_cols = combined_df.columns
|
|
745
|
+
|
|
746
|
+
# Create a new GeoDataFrame with multi-index
|
|
747
|
+
result_rows = []
|
|
748
|
+
geometries = []
|
|
749
|
+
|
|
750
|
+
# Iterate through the hierarchical index
|
|
751
|
+
for (geom_idx, time_val), row in combined_df.iterrows():
|
|
752
|
+
# Create a new row with geometry properties + result columns
|
|
753
|
+
new_row = {}
|
|
754
|
+
|
|
755
|
+
# Add original GeoDataFrame columns (except geometry)
|
|
756
|
+
for col in gdb.columns:
|
|
757
|
+
if col != 'geometry':
|
|
758
|
+
new_row[col] = gdb.loc[geom_idx, col]
|
|
759
|
+
|
|
760
|
+
# Add result columns
|
|
761
|
+
for col in result_cols:
|
|
762
|
+
new_row[col] = row[col]
|
|
763
|
+
|
|
764
|
+
result_rows.append(new_row)
|
|
765
|
+
geometries.append(gdb.geometry.iloc[geom_idx])
|
|
766
|
+
|
|
767
|
+
# Create a new GeoDataFrame with multi-index
|
|
768
|
+
multi_index = pd.MultiIndex.from_tuples(
|
|
769
|
+
combined_df.index.tolist(),
|
|
770
|
+
names=['geometry_index', 'time']
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
result_gdf = gpd.GeoDataFrame(
|
|
774
|
+
result_rows,
|
|
775
|
+
geometry=geometries,
|
|
776
|
+
index=multi_index
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
if inplace:
|
|
780
|
+
# Can't really do inplace with multi-temporal results as we're changing the structure
|
|
781
|
+
print("Warning: inplace=True ignored for temporal results, returning new GeoDataFrame")
|
|
782
|
+
return result_gdf
|
|
783
|
+
else:
|
|
784
|
+
return result_gdf
|
|
785
|
+
else:
|
|
786
|
+
# Non-temporal data - just add new columns to the existing GeoDataFrame
|
|
787
|
+
result_gdf = gdb.copy() if not inplace else gdb
|
|
788
|
+
|
|
789
|
+
# Get column names from the results (excluding utility columns)
|
|
790
|
+
result_cols = [col for col in combined_df.columns if col not in ['_geometry_index']]
|
|
791
|
+
|
|
792
|
+
# Create a mapping from geometry index to result rows
|
|
793
|
+
geom_idx_to_row = {}
|
|
794
|
+
for idx, row in combined_df.iterrows():
|
|
795
|
+
geom_idx = int(row['_geometry_index'])
|
|
796
|
+
geom_idx_to_row[geom_idx] = row
|
|
797
|
+
|
|
798
|
+
# Add results as new columns to the GeoDataFrame
|
|
799
|
+
for col in result_cols:
|
|
800
|
+
# Initialize the column with None or appropriate default
|
|
801
|
+
if col not in result_gdf.columns:
|
|
802
|
+
result_gdf[col] = None
|
|
803
|
+
|
|
804
|
+
# Fill in values from results
|
|
805
|
+
for geom_idx, row in geom_idx_to_row.items():
|
|
806
|
+
result_gdf.loc[geom_idx, col] = row[col]
|
|
807
|
+
|
|
808
|
+
if inplace:
|
|
809
|
+
return None
|
|
810
|
+
else:
|
|
811
|
+
return result_gdf
|
|
812
|
+
|
|
813
|
+
def zonal_stats(self, gdb, expr, conc=20, inplace=False, output="csv"):
|
|
814
|
+
"""
|
|
815
|
+
Compute zonal statistics for all geometries in a GeoDataFrame.
|
|
816
|
+
|
|
817
|
+
Args:
|
|
818
|
+
gdb (geopandas.GeoDataFrame): GeoDataFrame containing geometries
|
|
819
|
+
expr (str): Terrakio expression to evaluate, can include spatial aggregations
|
|
820
|
+
conc (int): Number of concurrent requests to make
|
|
821
|
+
inplace (bool): Whether to modify the input GeoDataFrame in place
|
|
822
|
+
output (str): Output format (csv or netcdf)
|
|
823
|
+
|
|
824
|
+
Returns:
|
|
825
|
+
geopandas.GeoDataFrame: GeoDataFrame with added columns for results, or None if inplace=True
|
|
826
|
+
"""
|
|
827
|
+
import asyncio
|
|
828
|
+
return asyncio.run(self.zonal_stats_async(gdb, expr, conc, inplace, output))
|
|
829
|
+
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from typing import Optional, Dict, Any
|
|
3
|
+
|
|
4
|
+
class MassStats:
|
|
5
|
+
def __init__(self, base_url: str, api_key: str, verify: bool = True, timeout: int = 60):
|
|
6
|
+
self.base_url = base_url.rstrip('/')
|
|
7
|
+
self.api_key = api_key
|
|
8
|
+
self.verify = verify
|
|
9
|
+
self.timeout = timeout
|
|
10
|
+
self.session = requests.Session()
|
|
11
|
+
self.session.headers.update({
|
|
12
|
+
'x-api-key': self.api_key
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
def upload_request(
|
|
16
|
+
self,
|
|
17
|
+
name: str,
|
|
18
|
+
size: int,
|
|
19
|
+
bucket: str,
|
|
20
|
+
output: str,
|
|
21
|
+
location: Optional[str] = None,
|
|
22
|
+
force_loc: bool = False,
|
|
23
|
+
config: Optional[Dict[str, Any]] = None,
|
|
24
|
+
overwrite: bool = False,
|
|
25
|
+
server: Optional[str] = None,
|
|
26
|
+
skip_existing: bool = False
|
|
27
|
+
) -> Dict[str, Any]:
|
|
28
|
+
"""
|
|
29
|
+
Initiate a mass stats upload job.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
name: Name of the job
|
|
33
|
+
size: Size of the data
|
|
34
|
+
bucket: Storage bucket
|
|
35
|
+
output: Output path or identifier
|
|
36
|
+
location: (Optional) Location for the upload
|
|
37
|
+
force_loc: Force location usage
|
|
38
|
+
config: Optional configuration dictionary
|
|
39
|
+
overwrite: Overwrite existing data
|
|
40
|
+
server: Optional server
|
|
41
|
+
skip_existing: Skip existing files
|
|
42
|
+
"""
|
|
43
|
+
url = f"{self.base_url}/mass_stats/upload"
|
|
44
|
+
data = {
|
|
45
|
+
"name": name,
|
|
46
|
+
"size": size,
|
|
47
|
+
"bucket": bucket,
|
|
48
|
+
"output": output,
|
|
49
|
+
"force_loc": force_loc,
|
|
50
|
+
"overwrite": overwrite,
|
|
51
|
+
"skip_existing": skip_existing
|
|
52
|
+
}
|
|
53
|
+
if location is not None:
|
|
54
|
+
data["location"] = location
|
|
55
|
+
if config is not None:
|
|
56
|
+
data["config"] = config
|
|
57
|
+
if server is not None:
|
|
58
|
+
data["server"] = server
|
|
59
|
+
response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
|
|
60
|
+
print("the response is ", response.text)
|
|
61
|
+
# response.raise_for_status()
|
|
62
|
+
return response.json()
|
|
63
|
+
|
|
64
|
+
def start_job(self, task_id: str) -> Dict[str, Any]:
|
|
65
|
+
"""
|
|
66
|
+
Start a mass stats job by task ID.
|
|
67
|
+
"""
|
|
68
|
+
url = f"{self.base_url}/mass_stats/start/{task_id}"
|
|
69
|
+
print("the self session header is ", self.session.headers)
|
|
70
|
+
response = self.session.post(url, verify=self.verify, timeout=self.timeout)
|
|
71
|
+
response.raise_for_status()
|
|
72
|
+
return response.json()
|
|
73
|
+
|
|
74
|
+
def get_task_id(self, name: str, stage: str, uid: Optional[str] = None) -> Dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Get the task ID for a mass stats job by name and stage (and optionally user ID).
|
|
77
|
+
"""
|
|
78
|
+
url = f"{self.base_url}/mass_stats/job_id?name={name}&stage={stage}"
|
|
79
|
+
if uid is not None:
|
|
80
|
+
url += f"&uid={uid}"
|
|
81
|
+
response = self.session.get(url, verify=self.verify, timeout=self.timeout)
|
|
82
|
+
print("response text is ", response.text)
|
|
83
|
+
return response.json()
|
|
84
|
+
|
|
85
|
+
def track_job(self, ids: Optional[list] = None) -> Dict[str, Any]:
|
|
86
|
+
"""
|
|
87
|
+
Track the status of one or more mass stats jobs.
|
|
88
|
+
If ids is None, gets progress for all of the user's jobs.
|
|
89
|
+
"""
|
|
90
|
+
url = f"{self.base_url}/mass_stats/track"
|
|
91
|
+
data = {"ids": ids} if ids is not None else {}
|
|
92
|
+
response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
|
|
93
|
+
response.raise_for_status()
|
|
94
|
+
return response.json()
|
|
95
|
+
|
|
96
|
+
def get_history(self, limit: int = 100) -> Dict[str, Any]:
|
|
97
|
+
"""
|
|
98
|
+
Get the history of mass stats jobs.
|
|
99
|
+
"""
|
|
100
|
+
url = f"{self.base_url}/mass_stats/history"
|
|
101
|
+
params = {"limit": limit}
|
|
102
|
+
response = self.session.get(url, params=params, verify=self.verify, timeout=self.timeout)
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
return response.json()
|
|
105
|
+
|
|
106
|
+
def start_post_processing(
|
|
107
|
+
self,
|
|
108
|
+
process_name: str,
|
|
109
|
+
data_name: str,
|
|
110
|
+
output: str,
|
|
111
|
+
consumer_path: str,
|
|
112
|
+
overwrite: bool = False
|
|
113
|
+
) -> Dict[str, Any]:
|
|
114
|
+
"""
|
|
115
|
+
Start post processing for a mass stats job.
|
|
116
|
+
Args:
|
|
117
|
+
process_name: Folder to store output
|
|
118
|
+
data_name: Name of job used to create data
|
|
119
|
+
output: Output type
|
|
120
|
+
consumer_path: Path to the post processing script (Python file)
|
|
121
|
+
overwrite: Overwrite existing post processing output in same location
|
|
122
|
+
Returns:
|
|
123
|
+
Dict with task_id
|
|
124
|
+
"""
|
|
125
|
+
url = f"{self.base_url}/mass_stats/post_process"
|
|
126
|
+
files = {
|
|
127
|
+
'consumer': (consumer_path, open(consumer_path, 'rb'), 'text/x-python')
|
|
128
|
+
}
|
|
129
|
+
data = {
|
|
130
|
+
'process_name': process_name,
|
|
131
|
+
'data_name': data_name,
|
|
132
|
+
'output': output,
|
|
133
|
+
'overwrite': str(overwrite).lower()
|
|
134
|
+
}
|
|
135
|
+
response = self.session.post(url, data=data, files=files, verify=self.verify, timeout=self.timeout)
|
|
136
|
+
print("the response is ", response.text)
|
|
137
|
+
# response.raise_for_status()
|
|
138
|
+
return response.json()
|
|
139
|
+
|
|
140
|
+
def download_results(
|
|
141
|
+
self,
|
|
142
|
+
id: Optional[str] = None,
|
|
143
|
+
force_loc: bool = False,
|
|
144
|
+
bucket: Optional[str] = None,
|
|
145
|
+
location: Optional[str] = None,
|
|
146
|
+
output: Optional[str] = None,
|
|
147
|
+
file_name: Optional[str] = None
|
|
148
|
+
) -> bytes:
|
|
149
|
+
"""
|
|
150
|
+
Download results from a mass stats job or arbitrary results if force_loc is True.
|
|
151
|
+
Returns the content of the .zip file.
|
|
152
|
+
"""
|
|
153
|
+
url = f"{self.base_url}/mass_stats/download"
|
|
154
|
+
data = {}
|
|
155
|
+
if id is not None:
|
|
156
|
+
data["id"] = id
|
|
157
|
+
if force_loc:
|
|
158
|
+
data["force_loc"] = True
|
|
159
|
+
if bucket is not None:
|
|
160
|
+
data["bucket"] = bucket
|
|
161
|
+
if location is not None:
|
|
162
|
+
data["location"] = location
|
|
163
|
+
if output is not None:
|
|
164
|
+
data["output"] = output
|
|
165
|
+
if file_name is not None:
|
|
166
|
+
data["file_name"] = file_name
|
|
167
|
+
response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
|
|
168
|
+
print("the response is ", response.text)
|
|
169
|
+
# response.raise_for_status()
|
|
170
|
+
print("the response content is ", response.content)
|
|
171
|
+
return response.content
|
|
172
|
+
|
|
173
|
+
def cancel_job(self, id: str) -> Dict[str, Any]:
|
|
174
|
+
"""
|
|
175
|
+
Cancel a mass stats job by ID.
|
|
176
|
+
"""
|
|
177
|
+
url = f"{self.base_url}/mass_stats/cancel/{id}"
|
|
178
|
+
response = self.session.post(url, verify=self.verify, timeout=self.timeout)
|
|
179
|
+
response.raise_for_status()
|
|
180
|
+
return response.json()
|
|
181
|
+
|
|
182
|
+
def cancel_all_jobs(self) -> Dict[str, Any]:
|
|
183
|
+
"""
|
|
184
|
+
Cancel all mass stats jobs for the user.
|
|
185
|
+
"""
|
|
186
|
+
url = f"{self.base_url}/mass_stats/cancel"
|
|
187
|
+
response = self.session.post(url, verify=self.verify, timeout=self.timeout)
|
|
188
|
+
response.raise_for_status()
|
|
189
|
+
return response.json()
|
|
190
|
+
|
|
191
|
+
def create_pyramids(self, name: str, levels: int, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
192
|
+
"""
|
|
193
|
+
Create pyramids for a dataset.
|
|
194
|
+
Args:
|
|
195
|
+
name: Name for the pyramid job
|
|
196
|
+
levels: Number of zoom levels to compute
|
|
197
|
+
config: Dataset config (mapping)
|
|
198
|
+
Returns:
|
|
199
|
+
Dict with task_id
|
|
200
|
+
"""
|
|
201
|
+
url = f"{self.base_url}/pyramids/create"
|
|
202
|
+
data = {
|
|
203
|
+
"name": name,
|
|
204
|
+
"levels": levels,
|
|
205
|
+
"config": config
|
|
206
|
+
}
|
|
207
|
+
response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
|
|
208
|
+
print("the url is ", url)
|
|
209
|
+
print("the response is ", response.text)
|
|
210
|
+
print("the response status code is ", response.status_code)
|
|
211
|
+
# response.raise_for_status()
|
|
212
|
+
return response.json()
|
|
213
|
+
|
|
214
|
+
def random_sample(
|
|
215
|
+
self,
|
|
216
|
+
name: str,
|
|
217
|
+
config: dict,
|
|
218
|
+
aoi: dict,
|
|
219
|
+
samples: int,
|
|
220
|
+
year_range: list,
|
|
221
|
+
crs: str,
|
|
222
|
+
tile_size: int,
|
|
223
|
+
res: float,
|
|
224
|
+
output: str,
|
|
225
|
+
server: str,
|
|
226
|
+
region: str,
|
|
227
|
+
bucket: str,
|
|
228
|
+
overwrite: bool = False
|
|
229
|
+
) -> Dict[str, Any]:
|
|
230
|
+
"""
|
|
231
|
+
Submit a random sample job.
|
|
232
|
+
"""
|
|
233
|
+
if year_range is None or len(year_range) != 2:
|
|
234
|
+
raise ValueError("year_range must be a list of two integers")
|
|
235
|
+
start_year, end_year = year_range
|
|
236
|
+
if start_year is None or end_year is None:
|
|
237
|
+
raise ValueError("Both start_year and end_year must be provided for year_range.")
|
|
238
|
+
|
|
239
|
+
url = f"{self.base_url}/random_sample"
|
|
240
|
+
data = {
|
|
241
|
+
"name": name,
|
|
242
|
+
"overwrite": overwrite,
|
|
243
|
+
"config": config,
|
|
244
|
+
"aoi": aoi,
|
|
245
|
+
"samples": samples,
|
|
246
|
+
"year_range": [start_year, end_year],
|
|
247
|
+
"crs": crs,
|
|
248
|
+
"tile_size": tile_size,
|
|
249
|
+
"res": res,
|
|
250
|
+
"output": output,
|
|
251
|
+
"server": server,
|
|
252
|
+
"region": region,
|
|
253
|
+
"bucket": bucket
|
|
254
|
+
}
|
|
255
|
+
print("the data is ", data)
|
|
256
|
+
print("the url is ", url)
|
|
257
|
+
response = self.session.post(url, json=data, verify=self.verify, timeout=self.timeout)
|
|
258
|
+
print("Status code:", response.status_code)
|
|
259
|
+
print("Response text:", response.text)
|
|
260
|
+
# response.raise_for_status()
|
|
261
|
+
return response.json()
|
|
262
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: terrakio-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Core components for Terrakio API clients
|
|
5
5
|
Home-page: https://github.com/HaizeaAnalytics/terrakio-python-api
|
|
6
6
|
Author: Yupeng Chao
|
|
@@ -18,6 +18,7 @@ Classifier: Development Status :: 4 - Beta
|
|
|
18
18
|
Requires-Python: >=3.7
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
Requires-Dist: requests>=2.25.0
|
|
21
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
21
22
|
Requires-Dist: pyyaml>=5.1
|
|
22
23
|
Requires-Dist: xarray>=2023.1.0
|
|
23
24
|
Requires-Dist: shapely>=2.0.0
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
terrakio_core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
terrakio_core/auth.py,sha256=Y3X5CcRUO7rAsrv995cSedZFKJAsW6ObDinYCbcQMpc,7605
|
|
3
|
-
terrakio_core/client.py,sha256=
|
|
3
|
+
terrakio_core/client.py,sha256=A8W-Tun8ZeMnQhr7d6i2ekyl4sylWTVq5amVLUhV6kE,36446
|
|
4
4
|
terrakio_core/config.py,sha256=AwJ1VgR5K7N32XCU5k7_Dp1nIv_FYt8MBonq9yKlGzA,2658
|
|
5
5
|
terrakio_core/dataset_management.py,sha256=hhO35fwStS6HYFQdKP9wkr3DxHgjvpctmIU8UWH6w6U,8742
|
|
6
6
|
terrakio_core/exceptions.py,sha256=9S-I20-QiDRj1qgjFyYUwYM7BLic_bxurcDOIm2Fu_0,410
|
|
7
|
+
terrakio_core/mass_stats.py,sha256=AqYJsd6nqo2BDh4vEPUDgsv4T0UR1_TPDoXa3WO3gTU,9284
|
|
7
8
|
terrakio_core/user_management.py,sha256=Sl7wJOg1eUVUpcsgRjeknibYiIleLJk1VgJI7Mdpsss,7345
|
|
8
|
-
terrakio_core-0.
|
|
9
|
-
terrakio_core-0.
|
|
10
|
-
terrakio_core-0.
|
|
11
|
-
terrakio_core-0.
|
|
9
|
+
terrakio_core-0.2.0.dist-info/METADATA,sha256=vvW6ODeKysnCJ0FbE2Uf2XbtYn2Tx9SaBlWcLbb50Ds,1518
|
|
10
|
+
terrakio_core-0.2.0.dist-info/WHEEL,sha256=QZxptf4Y1BKFRCEDxD4h2V0mBFQOVFLFEpvxHmIs52A,91
|
|
11
|
+
terrakio_core-0.2.0.dist-info/top_level.txt,sha256=5cBj6O7rNWyn97ND4YuvvXm0Crv4RxttT4JZvNdOG6Q,14
|
|
12
|
+
terrakio_core-0.2.0.dist-info/RECORD,,
|
|
File without changes
|