tilebox-workflows 0.46.0__tar.gz → 0.48.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/PKG-INFO +1 -1
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/pyproject.toml +7 -1
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/cron.py +3 -4
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/storage_event.py +3 -4
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/cache.py +62 -10
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/client.py +1 -1
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/data.py +3 -3
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/formatting/job.py +1 -2
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/jobs/client.py +10 -5
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/observability/logging.py +2 -2
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/observability/tracing.py +1 -1
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/runner/task_runner.py +12 -7
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/task.py +26 -15
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/timeseries.py +10 -12
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/.gitignore +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/README.md +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/client.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/service.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/clusters/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/clusters/client.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/clusters/service.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/formatting/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/interceptors.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/jobs/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/jobs/service.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/observability/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/runner/__init__.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/runner/task_service.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/automation_pb2.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/automation_pb2.pyi +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/automation_pb2_grpc.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/core_pb2.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/core_pb2.pyi +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/core_pb2_grpc.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/diagram_pb2.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/diagram_pb2.pyi +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/diagram_pb2_grpc.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/job_pb2.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/job_pb2.pyi +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/job_pb2_grpc.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/task_pb2.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/task_pb2.pyi +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/task_pb2_grpc.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/workflows_pb2.py +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/workflows_pb2.pyi +0 -0
- {tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/workflows_pb2_grpc.py +0 -0
|
@@ -35,7 +35,13 @@ dependencies = [
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
[dependency-groups]
|
|
38
|
-
dev = [
|
|
38
|
+
dev = [
|
|
39
|
+
"hypothesis>=6.112.1",
|
|
40
|
+
"pytest-cov>=5.0.0",
|
|
41
|
+
"pytest>=8.3.2",
|
|
42
|
+
"moto>=5",
|
|
43
|
+
"pytest-asyncio>=1.3.0",
|
|
44
|
+
]
|
|
39
45
|
|
|
40
46
|
[project.urls]
|
|
41
47
|
Homepage = "https://tilebox.com"
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from dataclasses import replace
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
|
-
from typing import cast
|
|
4
3
|
|
|
5
4
|
try:
|
|
6
|
-
from typing import Self
|
|
5
|
+
from typing import Self # ty: ignore[unresolved-import]
|
|
7
6
|
except ImportError: # Self is only available in Python 3.11+
|
|
8
7
|
from typing_extensions import Self
|
|
9
8
|
|
|
@@ -35,11 +34,11 @@ class CronTask(Task):
|
|
|
35
34
|
return message.SerializeToString()
|
|
36
35
|
|
|
37
36
|
@classmethod
|
|
38
|
-
def _deserialize(cls, task_input: bytes, context: RunnerContext | None = None) -> Self: # noqa: ARG003
|
|
37
|
+
def _deserialize(cls: "type[CronTask]", task_input: bytes, context: RunnerContext | None = None) -> Self: # noqa: ARG003
|
|
39
38
|
message = AutomationMessage()
|
|
40
39
|
message.ParseFromString(task_input)
|
|
41
40
|
|
|
42
|
-
task =
|
|
41
|
+
task = deserialize_task(cls, message.args)
|
|
43
42
|
|
|
44
43
|
event_message = TriggeredCronEventMessage()
|
|
45
44
|
event_message.ParseFromString(message.trigger_event)
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/storage_event.py
RENAMED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from dataclasses import replace
|
|
2
|
-
from typing import cast
|
|
3
2
|
from uuid import UUID
|
|
4
3
|
|
|
5
4
|
try:
|
|
6
|
-
from typing import Self
|
|
5
|
+
from typing import Self # ty: ignore[unresolved-import]
|
|
7
6
|
except ImportError: # Self is only available in Python 3.11+
|
|
8
7
|
from typing_extensions import Self
|
|
9
8
|
|
|
@@ -43,11 +42,11 @@ class StorageEventTask(Task):
|
|
|
43
42
|
return message.SerializeToString()
|
|
44
43
|
|
|
45
44
|
@classmethod
|
|
46
|
-
def _deserialize(cls, task_input: bytes, context: RunnerContext | None = None) -> Self:
|
|
45
|
+
def _deserialize(cls: "type[StorageEventTask]", task_input: bytes, context: RunnerContext | None = None) -> Self:
|
|
47
46
|
message = AutomationMessage()
|
|
48
47
|
message.ParseFromString(task_input)
|
|
49
48
|
|
|
50
|
-
task =
|
|
49
|
+
task = deserialize_task(cls, message.args)
|
|
51
50
|
|
|
52
51
|
event_message = TriggeredStorageEventMessage()
|
|
53
52
|
event_message.ParseFromString(message.trigger_event)
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
+
import contextlib
|
|
1
2
|
import warnings
|
|
2
3
|
from abc import ABC, abstractmethod
|
|
3
4
|
from collections.abc import Iterator
|
|
4
5
|
from io import BytesIO
|
|
5
6
|
from pathlib import Path
|
|
7
|
+
from pathlib import PurePosixPath as ObjectPath
|
|
6
8
|
|
|
7
9
|
import boto3
|
|
8
10
|
from botocore.exceptions import ClientError
|
|
9
11
|
from google.cloud.exceptions import NotFound
|
|
10
12
|
from google.cloud.storage import Blob, Bucket
|
|
13
|
+
from obstore.store import ObjectStore
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class JobCache(ABC):
|
|
@@ -62,6 +65,53 @@ class NoCache(JobCache):
|
|
|
62
65
|
return self
|
|
63
66
|
|
|
64
67
|
|
|
68
|
+
class ObstoreCache(JobCache):
|
|
69
|
+
def __init__(self, store: ObjectStore, prefix: str | ObjectPath = ObjectPath(".")) -> None:
|
|
70
|
+
"""A cache implementation backed by an obstore ObjectStore.
|
|
71
|
+
|
|
72
|
+
This cache implementation is the recommended way of working with the cache, as it provides a unified interface
|
|
73
|
+
for working with different object stores, while also providing a way to transparently work with local files
|
|
74
|
+
as well.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
store: The object store to use for the cache.
|
|
78
|
+
prefix: A path prefix to append to all objects stored in the cache. Defaults to no prefix.
|
|
79
|
+
"""
|
|
80
|
+
self.store = store
|
|
81
|
+
self.prefix = ObjectPath(prefix)
|
|
82
|
+
|
|
83
|
+
def __contains__(self, key: str) -> bool:
|
|
84
|
+
with contextlib.suppress(OSError):
|
|
85
|
+
self.store.get(str(self.prefix / key))
|
|
86
|
+
return True # if get is successful, we know the key is in the cache
|
|
87
|
+
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
def __setitem__(self, key: str, value: bytes) -> None:
|
|
91
|
+
self.store.put(str(self.prefix / key), value)
|
|
92
|
+
|
|
93
|
+
def __delitem__(self, key: str) -> None:
|
|
94
|
+
try:
|
|
95
|
+
self.store.delete(str(self.prefix / key))
|
|
96
|
+
except OSError:
|
|
97
|
+
raise KeyError(f"{key} is not cached!") from None
|
|
98
|
+
|
|
99
|
+
def __getitem__(self, key: str) -> bytes:
|
|
100
|
+
try:
|
|
101
|
+
entry = self.store.get(str(self.prefix / key))
|
|
102
|
+
return bytes(entry.bytes())
|
|
103
|
+
except OSError:
|
|
104
|
+
raise KeyError(f"{key} is not cached!") from None
|
|
105
|
+
|
|
106
|
+
def __iter__(self) -> Iterator[str]:
|
|
107
|
+
for obj in self.store.list_with_delimiter(str(self.prefix))["objects"]:
|
|
108
|
+
path: str = obj["path"]
|
|
109
|
+
yield path.removeprefix(str(self.prefix) + "/")
|
|
110
|
+
|
|
111
|
+
def group(self, key: str) -> "ObstoreCache":
|
|
112
|
+
return ObstoreCache(self.store, prefix=str(self.prefix / key))
|
|
113
|
+
|
|
114
|
+
|
|
65
115
|
class InMemoryCache(JobCache):
|
|
66
116
|
def __init__(self) -> None:
|
|
67
117
|
"""A simple in-memory cache implementation.
|
|
@@ -153,7 +203,7 @@ class LocalFileSystemCache(JobCache):
|
|
|
153
203
|
Args:
|
|
154
204
|
root: File system path where the cache will be stored. Defaults to "cache" in the current working directory.
|
|
155
205
|
"""
|
|
156
|
-
self.root =
|
|
206
|
+
self.root = Path(root)
|
|
157
207
|
|
|
158
208
|
def __contains__(self, key: str) -> bool:
|
|
159
209
|
return (self.root / key).exists()
|
|
@@ -184,7 +234,7 @@ class LocalFileSystemCache(JobCache):
|
|
|
184
234
|
|
|
185
235
|
|
|
186
236
|
class GoogleStorageCache(JobCache):
|
|
187
|
-
def __init__(self, bucket: Bucket, prefix: str = "jobs") -> None:
|
|
237
|
+
def __init__(self, bucket: Bucket, prefix: str | ObjectPath = "jobs") -> None:
|
|
188
238
|
"""A cache implementation that stores data in Google Cloud Storage.
|
|
189
239
|
|
|
190
240
|
Args:
|
|
@@ -192,23 +242,25 @@ class GoogleStorageCache(JobCache):
|
|
|
192
242
|
prefix: A path prefix to append to all objects stored in the cache. Defaults to "jobs".
|
|
193
243
|
"""
|
|
194
244
|
self.bucket = bucket
|
|
195
|
-
self.prefix =
|
|
245
|
+
self.prefix = ObjectPath(
|
|
246
|
+
prefix
|
|
247
|
+
) # we still use pathlib here, because it's easier to work with when joining paths
|
|
196
248
|
|
|
197
249
|
def _blob(self, key: str) -> Blob:
|
|
198
250
|
return self.bucket.blob(str(self.prefix / key))
|
|
199
251
|
|
|
200
252
|
def __contains__(self, key: str) -> bool:
|
|
201
253
|
# GCS library has some weird typing issues, so let's ignore them for now
|
|
202
|
-
return self._blob(key).exists()
|
|
254
|
+
return self._blob(key).exists()
|
|
203
255
|
|
|
204
256
|
def __setitem__(self, key: str, value: bytes) -> None:
|
|
205
257
|
# GCS library has some weird typing issues, so let's ignore them for now
|
|
206
|
-
self._blob(key).upload_from_file(BytesIO(value))
|
|
258
|
+
self._blob(key).upload_from_file(BytesIO(value))
|
|
207
259
|
|
|
208
260
|
def __getitem__(self, key: str) -> bytes:
|
|
209
261
|
try:
|
|
210
262
|
# GCS library has some weird typing issues, so let's ignore them for now
|
|
211
|
-
return self._blob(key).download_as_bytes()
|
|
263
|
+
return self._blob(key).download_as_bytes()
|
|
212
264
|
except NotFound:
|
|
213
265
|
raise KeyError(f"{key} is not cached!") from None
|
|
214
266
|
|
|
@@ -224,18 +276,18 @@ class GoogleStorageCache(JobCache):
|
|
|
224
276
|
# in the "folder", and not the ones in subfolders
|
|
225
277
|
|
|
226
278
|
# GCS library has some weird typing issues, so let's ignore them for now
|
|
227
|
-
blobs = self.bucket.list_blobs(prefix=prefix, delimiter="/")
|
|
279
|
+
blobs = self.bucket.list_blobs(prefix=prefix, delimiter="/")
|
|
228
280
|
|
|
229
281
|
# make the names relative to the cache prefix (but including the key in the name)
|
|
230
282
|
for blob in blobs:
|
|
231
|
-
yield str(
|
|
283
|
+
yield str(ObjectPath(blob.name).relative_to(self.prefix))
|
|
232
284
|
|
|
233
285
|
def group(self, key: str) -> "GoogleStorageCache":
|
|
234
286
|
return GoogleStorageCache(self.bucket, prefix=str(self.prefix / key))
|
|
235
287
|
|
|
236
288
|
|
|
237
289
|
class AmazonS3Cache(JobCache):
|
|
238
|
-
def __init__(self, bucket: str, prefix: str = "jobs") -> None:
|
|
290
|
+
def __init__(self, bucket: str, prefix: str | ObjectPath = "jobs") -> None:
|
|
239
291
|
"""A cache implementation that stores data in Amazon S3.
|
|
240
292
|
|
|
241
293
|
Args:
|
|
@@ -243,7 +295,7 @@ class AmazonS3Cache(JobCache):
|
|
|
243
295
|
prefix: A path prefix to append to all objects stored in the cache. Defaults to "jobs".
|
|
244
296
|
"""
|
|
245
297
|
self.bucket = bucket
|
|
246
|
-
self.prefix =
|
|
298
|
+
self.prefix = ObjectPath(prefix)
|
|
247
299
|
with warnings.catch_warnings():
|
|
248
300
|
# https://github.com/boto/boto3/issues/3889
|
|
249
301
|
warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*datetime.utcnow.*")
|
|
@@ -29,7 +29,7 @@ class Client:
|
|
|
29
29
|
token: The API Key to authenticate with. If not set the `TILEBOX_API_KEY` environment variable will be used.
|
|
30
30
|
"""
|
|
31
31
|
token = _token_from_env(url, token)
|
|
32
|
-
self._auth = {"token": token, "url": url}
|
|
32
|
+
self._auth: dict[str, str] = {"token": token, "url": url}
|
|
33
33
|
self._channel = open_channel(url, token)
|
|
34
34
|
|
|
35
35
|
self._tracer: WorkflowTracer | None = None
|
|
@@ -200,7 +200,7 @@ class JobState(Enum):
|
|
|
200
200
|
_JOB_STATES = {state.value: state for state in JobState}
|
|
201
201
|
|
|
202
202
|
# JobState.QUEUED is deprecated and has been renamed to SUBMITTED, but we keep it around for backwards compatibility
|
|
203
|
-
JobState.QUEUED = JobState.SUBMITTED #
|
|
203
|
+
JobState.QUEUED = JobState.SUBMITTED # ty: ignore[unresolved-attribute]
|
|
204
204
|
|
|
205
205
|
|
|
206
206
|
@dataclass(order=True, frozen=True)
|
|
@@ -529,8 +529,8 @@ class StorageLocation:
|
|
|
529
529
|
span.set_attribute("bucket", self.location)
|
|
530
530
|
span.set_attribute("path", path)
|
|
531
531
|
# GCS library has some weird typing issues, so let's ignore them for now
|
|
532
|
-
blob = runner_context.gcs_client(self.location).blob(path)
|
|
533
|
-
return blob.download_as_bytes()
|
|
532
|
+
blob = runner_context.gcs_client(self.location).blob(path)
|
|
533
|
+
return blob.download_as_bytes()
|
|
534
534
|
case StorageType.S3:
|
|
535
535
|
with runner_context.tracer.start_as_current_span("s3.read") as span:
|
|
536
536
|
span.set_attribute("bucket", self.location)
|
|
@@ -341,8 +341,7 @@ def _progress_indicator_bar(label: str, done: int, total: int, state: JobState)
|
|
|
341
341
|
f"<span class='tbx-detail-mono'><span class='tbx-detail-value'>{percentage:.0%}</span> "
|
|
342
342
|
f"<span class='tbx-detail-value-muted'>({done} / {total})</span></span>"
|
|
343
343
|
)
|
|
344
|
-
|
|
345
|
-
return HBox([progress, label])
|
|
344
|
+
return HBox([progress, HTML(label_html)])
|
|
346
345
|
|
|
347
346
|
|
|
348
347
|
_eye_icon = """
|
|
@@ -19,7 +19,7 @@ from tilebox.workflows.task import FutureTask, merge_future_tasks_to_submissions
|
|
|
19
19
|
from tilebox.workflows.task import Task as TaskInstance
|
|
20
20
|
|
|
21
21
|
try:
|
|
22
|
-
from IPython.display import HTML, display
|
|
22
|
+
from IPython.display import HTML, display
|
|
23
23
|
except ImportError:
|
|
24
24
|
|
|
25
25
|
class HTML:
|
|
@@ -159,7 +159,7 @@ class JobClient:
|
|
|
159
159
|
|
|
160
160
|
def query(
|
|
161
161
|
self,
|
|
162
|
-
temporal_extent: TimeIntervalLike | IDIntervalLike,
|
|
162
|
+
temporal_extent: "TimeIntervalLike | IDIntervalLike",
|
|
163
163
|
automation_ids: UUID | list[UUID] | None = None,
|
|
164
164
|
job_states: JobState | list[JobState] | None = None,
|
|
165
165
|
name: str | None = None,
|
|
@@ -192,11 +192,13 @@ class JobClient:
|
|
|
192
192
|
id_interval: IDInterval | None = None
|
|
193
193
|
match temporal_extent:
|
|
194
194
|
case (str(), str()):
|
|
195
|
+
# ty doesn't narrow types on match statements yet, once it does we can remove this cast
|
|
196
|
+
str_temporal_extent: tuple[str, str] = temporal_extent # ty: ignore[invalid-assignment]
|
|
195
197
|
# this is either a tuple of datetimes or a tuple of UUIDs
|
|
196
198
|
try:
|
|
197
|
-
id_interval = IDInterval.parse(
|
|
199
|
+
id_interval = IDInterval.parse(str_temporal_extent)
|
|
198
200
|
except ValueError:
|
|
199
|
-
dataset_time_interval = TimeInterval.parse(
|
|
201
|
+
dataset_time_interval = TimeInterval.parse(str_temporal_extent)
|
|
200
202
|
time_interval = TimeInterval(
|
|
201
203
|
start=dataset_time_interval.start,
|
|
202
204
|
end=dataset_time_interval.end,
|
|
@@ -206,7 +208,10 @@ class JobClient:
|
|
|
206
208
|
case IDInterval(_, _, _, _) | (UUID(), UUID()):
|
|
207
209
|
id_interval = IDInterval.parse(temporal_extent)
|
|
208
210
|
case _:
|
|
209
|
-
|
|
211
|
+
# ty doesn't narrow types on match statements yet, once it does we can remove this cast
|
|
212
|
+
# because due to the match statement above we know that temporal_extent is a TimeIntervalLike
|
|
213
|
+
time_interval_like: TimeIntervalLike = temporal_extent # ty: ignore[invalid-assignment]
|
|
214
|
+
dataset_time_interval = TimeInterval.parse(time_interval_like)
|
|
210
215
|
time_interval = TimeInterval(
|
|
211
216
|
start=dataset_time_interval.start,
|
|
212
217
|
end=dataset_time_interval.end,
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/observability/logging.py
RENAMED
|
@@ -110,7 +110,7 @@ def _otel_log_exporter(
|
|
|
110
110
|
headers=headers,
|
|
111
111
|
)
|
|
112
112
|
schedule_delay = int(export_interval.total_seconds() * 1000) if export_interval is not None else None
|
|
113
|
-
return BatchLogRecordProcessor(exporter, schedule_delay_millis=schedule_delay)
|
|
113
|
+
return BatchLogRecordProcessor(exporter, schedule_delay_millis=schedule_delay)
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def configure_otel_logging(
|
|
@@ -324,7 +324,7 @@ def get_logger(name: str | None = None, level: int = logging.NOTSET) -> logging.
|
|
|
324
324
|
handler.setFormatter(ColorfulConsoleFormatter())
|
|
325
325
|
# we set a special attribute, which allows as to remove this handler again as soon
|
|
326
326
|
# as we configure an actual logging handler
|
|
327
|
-
handler._is_default = True #
|
|
327
|
+
handler._is_default = True # ty: ignore[unresolved-attribute] # noqa: SLF001
|
|
328
328
|
root_logger.addHandler(handler)
|
|
329
329
|
|
|
330
330
|
logger = logging.getLogger(f"{_LOGGING_NAMESPACE}.{name}")
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/observability/tracing.py
RENAMED
|
@@ -116,7 +116,7 @@ def _otel_span_exporter(
|
|
|
116
116
|
headers=headers,
|
|
117
117
|
)
|
|
118
118
|
schedule_delay = int(export_interval.total_seconds() * 1000) if export_interval is not None else None
|
|
119
|
-
return BatchSpanProcessor(exporter, schedule_delay_millis=schedule_delay)
|
|
119
|
+
return BatchSpanProcessor(exporter, schedule_delay_millis=schedule_delay)
|
|
120
120
|
|
|
121
121
|
|
|
122
122
|
class SpanEventLoggingHandler(logging.Handler):
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/runner/task_runner.py
RENAMED
|
@@ -19,6 +19,11 @@ from typing import Any, TypeAlias, TypeVar
|
|
|
19
19
|
from uuid import UUID
|
|
20
20
|
from warnings import warn
|
|
21
21
|
|
|
22
|
+
try:
|
|
23
|
+
from typing import Self # ty: ignore[unresolved-import]
|
|
24
|
+
except ImportError: # Self is only available in Python 3.11+
|
|
25
|
+
from typing_extensions import Self
|
|
26
|
+
|
|
22
27
|
from loguru import logger
|
|
23
28
|
from opentelemetry.trace.status import StatusCode
|
|
24
29
|
from tenacity import retry, retry_if_exception_type, stop_when_event_set, wait_random_exponential
|
|
@@ -77,7 +82,7 @@ def _retry_backoff(func: Callable[..., WrappedFnReturnT], stop: stop_base) -> Ca
|
|
|
77
82
|
Returns:
|
|
78
83
|
The wrapped function
|
|
79
84
|
"""
|
|
80
|
-
return retry(
|
|
85
|
+
return retry(
|
|
81
86
|
retry=retry_if_exception_type(InternalServerError),
|
|
82
87
|
stop=stop,
|
|
83
88
|
wait=wait_random_exponential(
|
|
@@ -159,8 +164,8 @@ class _LeaseRenewer(SpawnProcess):
|
|
|
159
164
|
# we don't want to fork the current process, but instead spawn a new one
|
|
160
165
|
# therefore we need to use the spawn context to create the queues
|
|
161
166
|
ctx = get_context("spawn")
|
|
162
|
-
self._new_leases: Queue[tuple[UUID, TaskLease]] = ctx.Queue()
|
|
163
|
-
self._done_tasks: Queue[UUID] = ctx.Queue()
|
|
167
|
+
self._new_leases: Queue[tuple[UUID, TaskLease]] = ctx.Queue()
|
|
168
|
+
self._done_tasks: Queue[UUID] = ctx.Queue()
|
|
164
169
|
|
|
165
170
|
def run(self) -> None:
|
|
166
171
|
lease_renewer(self._url, self._token, self._new_leases, self._done_tasks)
|
|
@@ -260,7 +265,7 @@ class _GracefulShutdown:
|
|
|
260
265
|
"""Sleep for a given number of seconds, or until an interrupt signal is received."""
|
|
261
266
|
self._interrupted.wait(seconds)
|
|
262
267
|
|
|
263
|
-
def __enter__(self) ->
|
|
268
|
+
def __enter__(self) -> Self:
|
|
264
269
|
"""Enter a graceful shutdown context. Intercepts SIGTERM and SIGINT signals and delays them by a grace period."""
|
|
265
270
|
self._original_sigterm = signal.signal(signal.SIGTERM, self._external_interrupt_handler)
|
|
266
271
|
self._original_sigint = signal.signal(signal.SIGINT, self._external_interrupt_handler)
|
|
@@ -467,7 +472,7 @@ class TaskRunner:
|
|
|
467
472
|
span.update_name(f"task/{task_class.__name__}")
|
|
468
473
|
|
|
469
474
|
try:
|
|
470
|
-
task_instance = task_class._deserialize(task.input, self._context) # noqa: SLF001
|
|
475
|
+
task_instance = task_class._deserialize(task.input, self._context) # ty: ignore[possibly-missing-attribute] # noqa: SLF001
|
|
471
476
|
except json.JSONDecodeError:
|
|
472
477
|
self.logger.exception(f"Failed to deserialize input for task execution {task.id}")
|
|
473
478
|
raise ValueError(f"Failed to deserialize input for task execution {task.id}") from None
|
|
@@ -559,9 +564,9 @@ class ExecutionContext(ExecutionContextBase):
|
|
|
559
564
|
def submit_subtasks(
|
|
560
565
|
self,
|
|
561
566
|
tasks: Sequence[TaskInstance],
|
|
567
|
+
depends_on: FutureTask | list[FutureTask] | None = None,
|
|
562
568
|
cluster: str | None = None,
|
|
563
569
|
max_retries: int = 0,
|
|
564
|
-
depends_on: FutureTask | list[FutureTask] | None = None,
|
|
565
570
|
) -> list[FutureTask]:
|
|
566
571
|
return [
|
|
567
572
|
self.submit_subtask(task, cluster=cluster, max_retries=max_retries, depends_on=depends_on) for task in tasks
|
|
@@ -575,7 +580,7 @@ class ExecutionContext(ExecutionContextBase):
|
|
|
575
580
|
DeprecationWarning,
|
|
576
581
|
stacklevel=2,
|
|
577
582
|
)
|
|
578
|
-
return self.submit_subtasks(tasks, cluster, max_retries)
|
|
583
|
+
return self.submit_subtasks(tasks, cluster=cluster, max_retries=max_retries)
|
|
579
584
|
|
|
580
585
|
def progress(self, label: str | None = None) -> ProgressUpdate:
|
|
581
586
|
if label == "":
|
|
@@ -50,7 +50,7 @@ class _Taskify(type):
|
|
|
50
50
|
return task_class
|
|
51
51
|
|
|
52
52
|
# Convert the class to a dataclass
|
|
53
|
-
task_class = dataclass(task_class)
|
|
53
|
+
task_class = dataclass(task_class)
|
|
54
54
|
|
|
55
55
|
# we allow overriding the execute method, but we still want to validate it
|
|
56
56
|
# so we search for the closest base class that has an execute method and use
|
|
@@ -118,7 +118,7 @@ class Task(metaclass=_ABCTaskify):
|
|
|
118
118
|
|
|
119
119
|
@classmethod
|
|
120
120
|
def _deserialize(cls, task_input: bytes, context: RunnerContext | None = None) -> "Task": # noqa: ARG003
|
|
121
|
-
return
|
|
121
|
+
return deserialize_task(cls, task_input)
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
def _validate_execute_method(
|
|
@@ -201,7 +201,7 @@ def _get_task_identifier(task_class: type) -> TaskIdentifier:
|
|
|
201
201
|
class_name = task_class.__name__
|
|
202
202
|
if hasattr(task_class, "identifier"): # if the task class has an identifier method, we use that
|
|
203
203
|
try:
|
|
204
|
-
name, version = task_class.identifier()
|
|
204
|
+
name, version = task_class.identifier() # ty: ignore[call-non-callable]
|
|
205
205
|
except TypeError as err:
|
|
206
206
|
raise ValueError(
|
|
207
207
|
f"Failed to invoke {class_name}.identifier(). Is it a staticmethod or classmethod without parameters?"
|
|
@@ -356,7 +356,11 @@ class ExecutionContext(ABC):
|
|
|
356
356
|
|
|
357
357
|
@abstractmethod
|
|
358
358
|
def submit_subtask(
|
|
359
|
-
self,
|
|
359
|
+
self,
|
|
360
|
+
task: Task,
|
|
361
|
+
depends_on: FutureTask | list[FutureTask] | None = None,
|
|
362
|
+
cluster: str | None = None,
|
|
363
|
+
max_retries: int = 0,
|
|
360
364
|
) -> FutureTask:
|
|
361
365
|
"""Submit a subtask of the current task.
|
|
362
366
|
|
|
@@ -374,7 +378,11 @@ class ExecutionContext(ABC):
|
|
|
374
378
|
|
|
375
379
|
@abstractmethod
|
|
376
380
|
def submit_subtasks(
|
|
377
|
-
self,
|
|
381
|
+
self,
|
|
382
|
+
tasks: Sequence[Task],
|
|
383
|
+
depends_on: FutureTask | list[FutureTask] | None = None,
|
|
384
|
+
cluster: str | None = None,
|
|
385
|
+
max_retries: int = 0,
|
|
378
386
|
) -> list[FutureTask]:
|
|
379
387
|
"""Submit a batch of subtasks of the current task. Similar to `submit_subtask`, but for multiple tasks."""
|
|
380
388
|
|
|
@@ -414,12 +422,12 @@ def serialize_task(task: Task) -> bytes:
|
|
|
414
422
|
field = json.dumps(field).encode()
|
|
415
423
|
return field
|
|
416
424
|
|
|
417
|
-
return json.dumps(_serialize_as_dict(task)).encode()
|
|
425
|
+
return json.dumps(_serialize_as_dict(task)).encode()
|
|
418
426
|
|
|
419
427
|
|
|
420
428
|
def _serialize_as_dict(task: Task) -> dict[str, Any]:
|
|
421
429
|
as_dict: dict[str, Any] = {}
|
|
422
|
-
for dataclass_field in fields(task): #
|
|
430
|
+
for dataclass_field in fields(task): # ty: ignore[invalid-argument-type]
|
|
423
431
|
skip = dataclass_field.metadata.get("skip_serialization", False)
|
|
424
432
|
if skip:
|
|
425
433
|
continue
|
|
@@ -444,11 +452,14 @@ def _serialize_value(value: Any, base64_encode_protobuf: bool) -> Any: # noqa:
|
|
|
444
452
|
return b64encode(value.SerializeToString()).decode("ascii")
|
|
445
453
|
return value.SerializeToString()
|
|
446
454
|
if is_dataclass(value):
|
|
447
|
-
return _serialize_as_dict(value)
|
|
455
|
+
return _serialize_as_dict(value)
|
|
448
456
|
return value
|
|
449
457
|
|
|
450
458
|
|
|
451
|
-
|
|
459
|
+
_T = TypeVar("_T", bound=Task)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def deserialize_task(task_cls: type[_T], task_input: bytes) -> _T:
|
|
452
463
|
"""Deserialize the input of a task from a buffer of bytes.
|
|
453
464
|
|
|
454
465
|
The task_cls is expected to be a dataclass, containing an arbitrary number of fields.
|
|
@@ -460,22 +471,22 @@ def deserialize_task(task_cls: type, task_input: bytes) -> Task:
|
|
|
460
471
|
return task_cls() # empty task
|
|
461
472
|
if len(task_fields) == 1:
|
|
462
473
|
# if there is only one field, we deserialize it directly
|
|
463
|
-
field_type = _get_deserialization_field_type(task_fields[0].type) #
|
|
474
|
+
field_type = _get_deserialization_field_type(task_fields[0].type) # ty: ignore[invalid-argument-type]
|
|
464
475
|
if hasattr(field_type, "FromString"): # protobuf message
|
|
465
|
-
value = field_type.FromString(task_input) #
|
|
476
|
+
value = field_type.FromString(task_input) # ty: ignore[call-non-callable]
|
|
466
477
|
else:
|
|
467
|
-
value = _deserialize_value(field_type, json.loads(task_input.decode()))
|
|
478
|
+
value = _deserialize_value(field_type, json.loads(task_input.decode()))
|
|
468
479
|
|
|
469
480
|
return task_cls(**{task_fields[0].name: value})
|
|
470
481
|
|
|
471
482
|
return _deserialize_dataclass(task_cls, json.loads(task_input.decode()))
|
|
472
483
|
|
|
473
484
|
|
|
474
|
-
def _deserialize_dataclass(cls: type, params: dict[str, Any]) ->
|
|
485
|
+
def _deserialize_dataclass(cls: type[_T], params: dict[str, Any]) -> _T:
|
|
475
486
|
"""Deserialize a dataclass, while allowing recursively nested dataclasses or protobuf messages."""
|
|
476
487
|
for param in list(params):
|
|
477
488
|
# recursively deserialize nested dataclasses
|
|
478
|
-
field = cls.__dataclass_fields__[param]
|
|
489
|
+
field = cls.__dataclass_fields__[param] # ty: ignore[unresolved-attribute]
|
|
479
490
|
params[field.name] = _deserialize_value(field.type, params[field.name])
|
|
480
491
|
|
|
481
492
|
return cls(**params)
|
|
@@ -487,7 +498,7 @@ def _deserialize_value(field_type: type, value: Any) -> Any: # noqa: PLR0911
|
|
|
487
498
|
|
|
488
499
|
field_type = _get_deserialization_field_type(field_type)
|
|
489
500
|
if hasattr(field_type, "FromString"):
|
|
490
|
-
return field_type.FromString(b64decode(value))
|
|
501
|
+
return field_type.FromString(b64decode(value)) # ty: ignore[call-non-callable]
|
|
491
502
|
if is_dataclass(field_type) and isinstance(value, dict):
|
|
492
503
|
return _deserialize_dataclass(field_type, value)
|
|
493
504
|
|
|
@@ -26,11 +26,11 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
|
|
|
26
26
|
if not isinstance(task, TimeseriesTask):
|
|
27
27
|
raise TypeError("Task is not a timeseries task. Inherit from TimeseriesTask to mark it as such.")
|
|
28
28
|
|
|
29
|
-
chunk: TimeseriesDatasetChunk = task.timeseries_data
|
|
29
|
+
chunk: TimeseriesDatasetChunk = task.timeseries_data
|
|
30
30
|
|
|
31
31
|
# let's get a collection client
|
|
32
32
|
datasets_client = context.runner_context.datasets_client
|
|
33
|
-
dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) #
|
|
33
|
+
dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) # ty: ignore[possibly-missing-attribute] # noqa: SLF001
|
|
34
34
|
# we already know the collection id, so we can skip the lookup (we don't know the name, but don't need it)
|
|
35
35
|
collection_info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None)
|
|
36
36
|
collection = CollectionClient(dataset, collection_info)
|
|
@@ -50,7 +50,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
|
|
|
50
50
|
|
|
51
51
|
for i in range(datapoints.sizes["time"]):
|
|
52
52
|
datapoint = datapoints.isel(time=i)
|
|
53
|
-
call_next(context, datapoint) #
|
|
53
|
+
call_next(context, datapoint) # ty: ignore[too-many-positional-arguments]
|
|
54
54
|
|
|
55
55
|
return # we are done
|
|
56
56
|
|
|
@@ -90,7 +90,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
|
|
|
90
90
|
for sub_chunk_start, sub_chunk_end in pairwise(chunks):
|
|
91
91
|
sub_chunks.append(replace(chunk, time_interval=TimeInterval(sub_chunk_start, sub_chunk_end)))
|
|
92
92
|
|
|
93
|
-
subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks]
|
|
93
|
+
subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks]
|
|
94
94
|
if len(subtasks) > 0:
|
|
95
95
|
context.submit_subtasks(subtasks)
|
|
96
96
|
|
|
@@ -103,7 +103,7 @@ class TimeseriesTask(Task):
|
|
|
103
103
|
timeseries_data: TimeseriesDatasetChunk
|
|
104
104
|
|
|
105
105
|
@override
|
|
106
|
-
def execute(self, context: ExecutionContext, datapoint: xr.Dataset) -> None: #
|
|
106
|
+
def execute(self, context: ExecutionContext, datapoint: xr.Dataset) -> None: # ty: ignore[invalid-method-override]
|
|
107
107
|
pass
|
|
108
108
|
|
|
109
109
|
|
|
@@ -136,14 +136,14 @@ def _time_interval_chunk(task: Task, call_next: ForwardExecution, context: Execu
|
|
|
136
136
|
if not isinstance(task, TimeIntervalTask):
|
|
137
137
|
raise TypeError("Task is not a time interval task. Inherit from TimeIntervalTask to mark it as such.")
|
|
138
138
|
|
|
139
|
-
chunk: TimeChunk = task.interval
|
|
139
|
+
chunk: TimeChunk = task.interval
|
|
140
140
|
|
|
141
141
|
start = _make_multiple(chunk.time_interval.start, chunk.chunk_size, before=True)
|
|
142
142
|
end = _make_multiple(chunk.time_interval.end, chunk.chunk_size, before=False)
|
|
143
143
|
|
|
144
144
|
n = (end - start) // chunk.chunk_size
|
|
145
145
|
if n <= 1: # we are already a leaf task
|
|
146
|
-
return call_next(context, TimeInterval(start, end)) #
|
|
146
|
+
return call_next(context, TimeInterval(start, end)) # ty: ignore[too-many-positional-arguments]
|
|
147
147
|
|
|
148
148
|
chunks: list[datetime] = []
|
|
149
149
|
if n < 4: # we are a branch task with less than 4 sub chunks, so a further split is not worth it
|
|
@@ -158,9 +158,7 @@ def _time_interval_chunk(task: Task, call_next: ForwardExecution, context: Execu
|
|
|
158
158
|
TimeChunk(TimeInterval(chunk_start, chunk_end), chunk.chunk_size) for chunk_start, chunk_end in pairwise(chunks)
|
|
159
159
|
]
|
|
160
160
|
|
|
161
|
-
context.submit_subtasks(
|
|
162
|
-
[replace(task, interval=time_chunk) for time_chunk in time_chunks] # type: ignore[misc]
|
|
163
|
-
)
|
|
161
|
+
context.submit_subtasks([replace(task, interval=time_chunk) for time_chunk in time_chunks])
|
|
164
162
|
return None
|
|
165
163
|
|
|
166
164
|
|
|
@@ -170,12 +168,12 @@ class TimeIntervalTask(Task):
|
|
|
170
168
|
interval: TimeChunk
|
|
171
169
|
|
|
172
170
|
@override
|
|
173
|
-
def execute(self, context: ExecutionContext, time_interval: TimeInterval) -> None: #
|
|
171
|
+
def execute(self, context: ExecutionContext, time_interval: TimeInterval) -> None: # ty: ignore[invalid-method-override]
|
|
174
172
|
pass
|
|
175
173
|
|
|
176
174
|
|
|
177
175
|
def batch_process_time_interval(interval: TimeIntervalLike, chunk_size: timedelta) -> TimeChunk:
|
|
178
|
-
return TimeChunk(time_interval=TimeInterval.parse(interval).to_half_open(), chunk_size=chunk_size)
|
|
176
|
+
return TimeChunk(time_interval=TimeInterval.parse(interval).to_half_open(), chunk_size=chunk_size)
|
|
179
177
|
|
|
180
178
|
|
|
181
179
|
def _make_multiple(time: datetime, duration: timedelta, start: datetime = _EPOCH, before: bool = True) -> datetime:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/__init__.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/client.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/automations/service.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/clusters/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/formatting/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/observability/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/runner/task_service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/core_pb2.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/core_pb2.pyi
RENAMED
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/diagram_pb2.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/diagram_pb2.pyi
RENAMED
|
File without changes
|
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/job_pb2.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/job_pb2.pyi
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/job_pb2_grpc.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/task_pb2.py
RENAMED
|
File without changes
|
{tilebox_workflows-0.46.0 → tilebox_workflows-0.48.0}/tilebox/workflows/workflows/v1/task_pb2.pyi
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|