tilebox-workflows 0.46.0__py3-none-any.whl → 0.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  from dataclasses import replace
2
2
  from datetime import datetime, timezone
3
- from typing import cast
4
3
 
5
4
  try:
6
- from typing import Self
5
+ from typing import Self # ty: ignore[unresolved-import]
7
6
  except ImportError: # Self is only available in Python 3.11+
8
7
  from typing_extensions import Self
9
8
 
@@ -35,11 +34,11 @@ class CronTask(Task):
35
34
  return message.SerializeToString()
36
35
 
37
36
  @classmethod
38
- def _deserialize(cls, task_input: bytes, context: RunnerContext | None = None) -> Self: # noqa: ARG003
37
+ def _deserialize(cls: "type[CronTask]", task_input: bytes, context: RunnerContext | None = None) -> Self: # noqa: ARG003
39
38
  message = AutomationMessage()
40
39
  message.ParseFromString(task_input)
41
40
 
42
- task = cast(Self, deserialize_task(cls, message.args))
41
+ task = deserialize_task(cls, message.args)
43
42
 
44
43
  event_message = TriggeredCronEventMessage()
45
44
  event_message.ParseFromString(message.trigger_event)
@@ -1,9 +1,8 @@
1
1
  from dataclasses import replace
2
- from typing import cast
3
2
  from uuid import UUID
4
3
 
5
4
  try:
6
- from typing import Self
5
+ from typing import Self # ty: ignore[unresolved-import]
7
6
  except ImportError: # Self is only available in Python 3.11+
8
7
  from typing_extensions import Self
9
8
 
@@ -43,11 +42,11 @@ class StorageEventTask(Task):
43
42
  return message.SerializeToString()
44
43
 
45
44
  @classmethod
46
- def _deserialize(cls, task_input: bytes, context: RunnerContext | None = None) -> Self:
45
+ def _deserialize(cls: "type[StorageEventTask]", task_input: bytes, context: RunnerContext | None = None) -> Self:
47
46
  message = AutomationMessage()
48
47
  message.ParseFromString(task_input)
49
48
 
50
- task = cast(cls, deserialize_task(cls, message.args)) # type: ignore[invalid-type-form]
49
+ task = deserialize_task(cls, message.args)
51
50
 
52
51
  event_message = TriggeredStorageEventMessage()
53
52
  event_message.ParseFromString(message.trigger_event)
@@ -1,13 +1,16 @@
1
+ import contextlib
1
2
  import warnings
2
3
  from abc import ABC, abstractmethod
3
4
  from collections.abc import Iterator
4
5
  from io import BytesIO
5
6
  from pathlib import Path
7
+ from pathlib import PurePosixPath as ObjectPath
6
8
 
7
9
  import boto3
8
10
  from botocore.exceptions import ClientError
9
11
  from google.cloud.exceptions import NotFound
10
12
  from google.cloud.storage import Blob, Bucket
13
+ from obstore.store import ObjectStore
11
14
 
12
15
 
13
16
  class JobCache(ABC):
@@ -62,6 +65,53 @@ class NoCache(JobCache):
62
65
  return self
63
66
 
64
67
 
68
+ class ObstoreCache(JobCache):
69
+ def __init__(self, store: ObjectStore, prefix: str | ObjectPath = ObjectPath(".")) -> None:
70
+ """A cache implementation backed by an obstore ObjectStore.
71
+
72
+ This cache implementation is the recommended way of working with the cache, as it provides a unified interface
73
+ for working with different object stores, while also providing a way to transparently work with local files
74
+ as well.
75
+
76
+ Args:
77
+ store: The object store to use for the cache.
78
+ prefix: A path prefix to append to all objects stored in the cache. Defaults to no prefix.
79
+ """
80
+ self.store = store
81
+ self.prefix = ObjectPath(prefix)
82
+
83
+ def __contains__(self, key: str) -> bool:
84
+ with contextlib.suppress(OSError):
85
+ self.store.get(str(self.prefix / key))
86
+ return True # if get is successful, we know the key is in the cache
87
+
88
+ return False
89
+
90
+ def __setitem__(self, key: str, value: bytes) -> None:
91
+ self.store.put(str(self.prefix / key), value)
92
+
93
+ def __delitem__(self, key: str) -> None:
94
+ try:
95
+ self.store.delete(str(self.prefix / key))
96
+ except OSError:
97
+ raise KeyError(f"{key} is not cached!") from None
98
+
99
+ def __getitem__(self, key: str) -> bytes:
100
+ try:
101
+ entry = self.store.get(str(self.prefix / key))
102
+ return bytes(entry.bytes())
103
+ except OSError:
104
+ raise KeyError(f"{key} is not cached!") from None
105
+
106
+ def __iter__(self) -> Iterator[str]:
107
+ for obj in self.store.list_with_delimiter(str(self.prefix))["objects"]:
108
+ path: str = obj["path"]
109
+ yield path.removeprefix(str(self.prefix) + "/")
110
+
111
+ def group(self, key: str) -> "ObstoreCache":
112
+ return ObstoreCache(self.store, prefix=str(self.prefix / key))
113
+
114
+
65
115
  class InMemoryCache(JobCache):
66
116
  def __init__(self) -> None:
67
117
  """A simple in-memory cache implementation.
@@ -153,7 +203,7 @@ class LocalFileSystemCache(JobCache):
153
203
  Args:
154
204
  root: File system path where the cache will be stored. Defaults to "cache" in the current working directory.
155
205
  """
156
- self.root = root if isinstance(root, Path) else Path(root)
206
+ self.root = Path(root)
157
207
 
158
208
  def __contains__(self, key: str) -> bool:
159
209
  return (self.root / key).exists()
@@ -184,7 +234,7 @@ class LocalFileSystemCache(JobCache):
184
234
 
185
235
 
186
236
  class GoogleStorageCache(JobCache):
187
- def __init__(self, bucket: Bucket, prefix: str = "jobs") -> None:
237
+ def __init__(self, bucket: Bucket, prefix: str | ObjectPath = "jobs") -> None:
188
238
  """A cache implementation that stores data in Google Cloud Storage.
189
239
 
190
240
  Args:
@@ -192,23 +242,25 @@ class GoogleStorageCache(JobCache):
192
242
  prefix: A path prefix to append to all objects stored in the cache. Defaults to "jobs".
193
243
  """
194
244
  self.bucket = bucket
195
- self.prefix = Path(prefix) # we still use pathlib here, because it's easier to work with when joining paths
245
+ self.prefix = ObjectPath(
246
+ prefix
247
+ ) # we still use pathlib here, because it's easier to work with when joining paths
196
248
 
197
249
  def _blob(self, key: str) -> Blob:
198
250
  return self.bucket.blob(str(self.prefix / key))
199
251
 
200
252
  def __contains__(self, key: str) -> bool:
201
253
  # GCS library has some weird typing issues, so let's ignore them for now
202
- return self._blob(key).exists() # type: ignore[arg-type]
254
+ return self._blob(key).exists()
203
255
 
204
256
  def __setitem__(self, key: str, value: bytes) -> None:
205
257
  # GCS library has some weird typing issues, so let's ignore them for now
206
- self._blob(key).upload_from_file(BytesIO(value)) # type: ignore[arg-type]
258
+ self._blob(key).upload_from_file(BytesIO(value))
207
259
 
208
260
  def __getitem__(self, key: str) -> bytes:
209
261
  try:
210
262
  # GCS library has some weird typing issues, so let's ignore them for now
211
- return self._blob(key).download_as_bytes() # type: ignore[arg-type]
263
+ return self._blob(key).download_as_bytes()
212
264
  except NotFound:
213
265
  raise KeyError(f"{key} is not cached!") from None
214
266
 
@@ -224,18 +276,18 @@ class GoogleStorageCache(JobCache):
224
276
  # in the "folder", and not the ones in subfolders
225
277
 
226
278
  # GCS library has some weird typing issues, so let's ignore them for now
227
- blobs = self.bucket.list_blobs(prefix=prefix, delimiter="/") # type: ignore[arg-type]
279
+ blobs = self.bucket.list_blobs(prefix=prefix, delimiter="/")
228
280
 
229
281
  # make the names relative to the cache prefix (but including the key in the name)
230
282
  for blob in blobs:
231
- yield str(Path(blob.name).relative_to(self.prefix))
283
+ yield str(ObjectPath(blob.name).relative_to(self.prefix))
232
284
 
233
285
  def group(self, key: str) -> "GoogleStorageCache":
234
286
  return GoogleStorageCache(self.bucket, prefix=str(self.prefix / key))
235
287
 
236
288
 
237
289
  class AmazonS3Cache(JobCache):
238
- def __init__(self, bucket: str, prefix: str = "jobs") -> None:
290
+ def __init__(self, bucket: str, prefix: str | ObjectPath = "jobs") -> None:
239
291
  """A cache implementation that stores data in Amazon S3.
240
292
 
241
293
  Args:
@@ -243,7 +295,7 @@ class AmazonS3Cache(JobCache):
243
295
  prefix: A path prefix to append to all objects stored in the cache. Defaults to "jobs".
244
296
  """
245
297
  self.bucket = bucket
246
- self.prefix = Path(prefix)
298
+ self.prefix = ObjectPath(prefix)
247
299
  with warnings.catch_warnings():
248
300
  # https://github.com/boto/boto3/issues/3889
249
301
  warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*datetime.utcnow.*")
@@ -29,7 +29,7 @@ class Client:
29
29
  token: The API Key to authenticate with. If not set the `TILEBOX_API_KEY` environment variable will be used.
30
30
  """
31
31
  token = _token_from_env(url, token)
32
- self._auth = {"token": token, "url": url}
32
+ self._auth: dict[str, str] = {"token": token, "url": url}
33
33
  self._channel = open_channel(url, token)
34
34
 
35
35
  self._tracer: WorkflowTracer | None = None
tilebox/workflows/data.py CHANGED
@@ -200,7 +200,7 @@ class JobState(Enum):
200
200
  _JOB_STATES = {state.value: state for state in JobState}
201
201
 
202
202
  # JobState.QUEUED is deprecated and has been renamed to SUBMITTED, but we keep it around for backwards compatibility
203
- JobState.QUEUED = JobState.SUBMITTED # type: ignore[assignment]
203
+ JobState.QUEUED = JobState.SUBMITTED # ty: ignore[unresolved-attribute]
204
204
 
205
205
 
206
206
  @dataclass(order=True, frozen=True)
@@ -529,8 +529,8 @@ class StorageLocation:
529
529
  span.set_attribute("bucket", self.location)
530
530
  span.set_attribute("path", path)
531
531
  # GCS library has some weird typing issues, so let's ignore them for now
532
- blob = runner_context.gcs_client(self.location).blob(path) # type: ignore[arg-type]
533
- return blob.download_as_bytes() # type: ignore[arg-type]
532
+ blob = runner_context.gcs_client(self.location).blob(path)
533
+ return blob.download_as_bytes()
534
534
  case StorageType.S3:
535
535
  with runner_context.tracer.start_as_current_span("s3.read") as span:
536
536
  span.set_attribute("bucket", self.location)
@@ -341,8 +341,7 @@ def _progress_indicator_bar(label: str, done: int, total: int, state: JobState)
341
341
  f"<span class='tbx-detail-mono'><span class='tbx-detail-value'>{percentage:.0%}</span> "
342
342
  f"<span class='tbx-detail-value-muted'>({done} / {total})</span></span>"
343
343
  )
344
- label = HTML(label_html)
345
- return HBox([progress, label])
344
+ return HBox([progress, HTML(label_html)])
346
345
 
347
346
 
348
347
  _eye_icon = """
@@ -19,7 +19,7 @@ from tilebox.workflows.task import FutureTask, merge_future_tasks_to_submissions
19
19
  from tilebox.workflows.task import Task as TaskInstance
20
20
 
21
21
  try:
22
- from IPython.display import HTML, display # type: ignore[assignment]
22
+ from IPython.display import HTML, display
23
23
  except ImportError:
24
24
 
25
25
  class HTML:
@@ -159,7 +159,7 @@ class JobClient:
159
159
 
160
160
  def query(
161
161
  self,
162
- temporal_extent: TimeIntervalLike | IDIntervalLike,
162
+ temporal_extent: "TimeIntervalLike | IDIntervalLike",
163
163
  automation_ids: UUID | list[UUID] | None = None,
164
164
  job_states: JobState | list[JobState] | None = None,
165
165
  name: str | None = None,
@@ -192,11 +192,13 @@ class JobClient:
192
192
  id_interval: IDInterval | None = None
193
193
  match temporal_extent:
194
194
  case (str(), str()):
195
+ # ty doesn't narrow types on match statements yet, once it does we can remove this cast
196
+ str_temporal_extent: tuple[str, str] = temporal_extent # ty: ignore[invalid-assignment]
195
197
  # this is either a tuple of datetimes or a tuple of UUIDs
196
198
  try:
197
- id_interval = IDInterval.parse(temporal_extent)
199
+ id_interval = IDInterval.parse(str_temporal_extent)
198
200
  except ValueError:
199
- dataset_time_interval = TimeInterval.parse(temporal_extent)
201
+ dataset_time_interval = TimeInterval.parse(str_temporal_extent)
200
202
  time_interval = TimeInterval(
201
203
  start=dataset_time_interval.start,
202
204
  end=dataset_time_interval.end,
@@ -206,7 +208,10 @@ class JobClient:
206
208
  case IDInterval(_, _, _, _) | (UUID(), UUID()):
207
209
  id_interval = IDInterval.parse(temporal_extent)
208
210
  case _:
209
- dataset_time_interval = TimeInterval.parse(temporal_extent)
211
+ # ty doesn't narrow types on match statements yet, once it does we can remove this cast
212
+ # because due to the match statement above we know that temporal_extent is a TimeIntervalLike
213
+ time_interval_like: TimeIntervalLike = temporal_extent # ty: ignore[invalid-assignment]
214
+ dataset_time_interval = TimeInterval.parse(time_interval_like)
210
215
  time_interval = TimeInterval(
211
216
  start=dataset_time_interval.start,
212
217
  end=dataset_time_interval.end,
@@ -110,7 +110,7 @@ def _otel_log_exporter(
110
110
  headers=headers,
111
111
  )
112
112
  schedule_delay = int(export_interval.total_seconds() * 1000) if export_interval is not None else None
113
- return BatchLogRecordProcessor(exporter, schedule_delay_millis=schedule_delay) # type: ignore[arg-type]
113
+ return BatchLogRecordProcessor(exporter, schedule_delay_millis=schedule_delay)
114
114
 
115
115
 
116
116
  def configure_otel_logging(
@@ -324,7 +324,7 @@ def get_logger(name: str | None = None, level: int = logging.NOTSET) -> logging.
324
324
  handler.setFormatter(ColorfulConsoleFormatter())
325
325
  # we set a special attribute, which allows as to remove this handler again as soon
326
326
  # as we configure an actual logging handler
327
- handler._is_default = True # type: ignore[attr-defined] # noqa: SLF001
327
+ handler._is_default = True # ty: ignore[unresolved-attribute] # noqa: SLF001
328
328
  root_logger.addHandler(handler)
329
329
 
330
330
  logger = logging.getLogger(f"{_LOGGING_NAMESPACE}.{name}")
@@ -116,7 +116,7 @@ def _otel_span_exporter(
116
116
  headers=headers,
117
117
  )
118
118
  schedule_delay = int(export_interval.total_seconds() * 1000) if export_interval is not None else None
119
- return BatchSpanProcessor(exporter, schedule_delay_millis=schedule_delay) # type: ignore[arg-type]
119
+ return BatchSpanProcessor(exporter, schedule_delay_millis=schedule_delay)
120
120
 
121
121
 
122
122
  class SpanEventLoggingHandler(logging.Handler):
@@ -19,6 +19,11 @@ from typing import Any, TypeAlias, TypeVar
19
19
  from uuid import UUID
20
20
  from warnings import warn
21
21
 
22
+ try:
23
+ from typing import Self # ty: ignore[unresolved-import]
24
+ except ImportError: # Self is only available in Python 3.11+
25
+ from typing_extensions import Self
26
+
22
27
  from loguru import logger
23
28
  from opentelemetry.trace.status import StatusCode
24
29
  from tenacity import retry, retry_if_exception_type, stop_when_event_set, wait_random_exponential
@@ -77,7 +82,7 @@ def _retry_backoff(func: Callable[..., WrappedFnReturnT], stop: stop_base) -> Ca
77
82
  Returns:
78
83
  The wrapped function
79
84
  """
80
- return retry( # type: ignore[no-any-return]
85
+ return retry(
81
86
  retry=retry_if_exception_type(InternalServerError),
82
87
  stop=stop,
83
88
  wait=wait_random_exponential(
@@ -159,8 +164,8 @@ class _LeaseRenewer(SpawnProcess):
159
164
  # we don't want to fork the current process, but instead spawn a new one
160
165
  # therefore we need to use the spawn context to create the queues
161
166
  ctx = get_context("spawn")
162
- self._new_leases: Queue[tuple[UUID, TaskLease]] = ctx.Queue() # type: ignore[assignment]
163
- self._done_tasks: Queue[UUID] = ctx.Queue() # type: ignore[assignment]
167
+ self._new_leases: Queue[tuple[UUID, TaskLease]] = ctx.Queue()
168
+ self._done_tasks: Queue[UUID] = ctx.Queue()
164
169
 
165
170
  def run(self) -> None:
166
171
  lease_renewer(self._url, self._token, self._new_leases, self._done_tasks)
@@ -260,7 +265,7 @@ class _GracefulShutdown:
260
265
  """Sleep for a given number of seconds, or until an interrupt signal is received."""
261
266
  self._interrupted.wait(seconds)
262
267
 
263
- def __enter__(self) -> "_GracefulShutdown":
268
+ def __enter__(self) -> Self:
264
269
  """Enter a graceful shutdown context. Intercepts SIGTERM and SIGINT signals and delays them by a grace period."""
265
270
  self._original_sigterm = signal.signal(signal.SIGTERM, self._external_interrupt_handler)
266
271
  self._original_sigint = signal.signal(signal.SIGINT, self._external_interrupt_handler)
@@ -467,7 +472,7 @@ class TaskRunner:
467
472
  span.update_name(f"task/{task_class.__name__}")
468
473
 
469
474
  try:
470
- task_instance = task_class._deserialize(task.input, self._context) # noqa: SLF001
475
+ task_instance = task_class._deserialize(task.input, self._context) # ty: ignore[possibly-missing-attribute] # noqa: SLF001
471
476
  except json.JSONDecodeError:
472
477
  self.logger.exception(f"Failed to deserialize input for task execution {task.id}")
473
478
  raise ValueError(f"Failed to deserialize input for task execution {task.id}") from None
@@ -559,9 +564,9 @@ class ExecutionContext(ExecutionContextBase):
559
564
  def submit_subtasks(
560
565
  self,
561
566
  tasks: Sequence[TaskInstance],
567
+ depends_on: FutureTask | list[FutureTask] | None = None,
562
568
  cluster: str | None = None,
563
569
  max_retries: int = 0,
564
- depends_on: FutureTask | list[FutureTask] | None = None,
565
570
  ) -> list[FutureTask]:
566
571
  return [
567
572
  self.submit_subtask(task, cluster=cluster, max_retries=max_retries, depends_on=depends_on) for task in tasks
@@ -575,7 +580,7 @@ class ExecutionContext(ExecutionContextBase):
575
580
  DeprecationWarning,
576
581
  stacklevel=2,
577
582
  )
578
- return self.submit_subtasks(tasks, cluster, max_retries)
583
+ return self.submit_subtasks(tasks, cluster=cluster, max_retries=max_retries)
579
584
 
580
585
  def progress(self, label: str | None = None) -> ProgressUpdate:
581
586
  if label == "":
tilebox/workflows/task.py CHANGED
@@ -50,7 +50,7 @@ class _Taskify(type):
50
50
  return task_class
51
51
 
52
52
  # Convert the class to a dataclass
53
- task_class = dataclass(task_class) # type: ignore[arg-type]
53
+ task_class = dataclass(task_class)
54
54
 
55
55
  # we allow overriding the execute method, but we still want to validate it
56
56
  # so we search for the closest base class that has an execute method and use
@@ -118,7 +118,7 @@ class Task(metaclass=_ABCTaskify):
118
118
 
119
119
  @classmethod
120
120
  def _deserialize(cls, task_input: bytes, context: RunnerContext | None = None) -> "Task": # noqa: ARG003
121
- return cast(Task, deserialize_task(cls, task_input))
121
+ return deserialize_task(cls, task_input)
122
122
 
123
123
 
124
124
  def _validate_execute_method(
@@ -201,7 +201,7 @@ def _get_task_identifier(task_class: type) -> TaskIdentifier:
201
201
  class_name = task_class.__name__
202
202
  if hasattr(task_class, "identifier"): # if the task class has an identifier method, we use that
203
203
  try:
204
- name, version = task_class.identifier()
204
+ name, version = task_class.identifier() # ty: ignore[call-non-callable]
205
205
  except TypeError as err:
206
206
  raise ValueError(
207
207
  f"Failed to invoke {class_name}.identifier(). Is it a staticmethod or classmethod without parameters?"
@@ -356,7 +356,11 @@ class ExecutionContext(ABC):
356
356
 
357
357
  @abstractmethod
358
358
  def submit_subtask(
359
- self, task: Task, depends_on: list[FutureTask] | None = None, cluster: str | None = None, max_retries: int = 0
359
+ self,
360
+ task: Task,
361
+ depends_on: FutureTask | list[FutureTask] | None = None,
362
+ cluster: str | None = None,
363
+ max_retries: int = 0,
360
364
  ) -> FutureTask:
361
365
  """Submit a subtask of the current task.
362
366
 
@@ -374,7 +378,11 @@ class ExecutionContext(ABC):
374
378
 
375
379
  @abstractmethod
376
380
  def submit_subtasks(
377
- self, tasks: Sequence[Task], cluster: str | None = None, max_retries: int = 0
381
+ self,
382
+ tasks: Sequence[Task],
383
+ depends_on: FutureTask | list[FutureTask] | None = None,
384
+ cluster: str | None = None,
385
+ max_retries: int = 0,
378
386
  ) -> list[FutureTask]:
379
387
  """Submit a batch of subtasks of the current task. Similar to `submit_subtask`, but for multiple tasks."""
380
388
 
@@ -414,12 +422,12 @@ def serialize_task(task: Task) -> bytes:
414
422
  field = json.dumps(field).encode()
415
423
  return field
416
424
 
417
- return json.dumps(_serialize_as_dict(task)).encode() # type: ignore[arg-type]
425
+ return json.dumps(_serialize_as_dict(task)).encode()
418
426
 
419
427
 
420
428
  def _serialize_as_dict(task: Task) -> dict[str, Any]:
421
429
  as_dict: dict[str, Any] = {}
422
- for dataclass_field in fields(task): # type: ignore[union-attr]
430
+ for dataclass_field in fields(task): # ty: ignore[invalid-argument-type]
423
431
  skip = dataclass_field.metadata.get("skip_serialization", False)
424
432
  if skip:
425
433
  continue
@@ -444,11 +452,14 @@ def _serialize_value(value: Any, base64_encode_protobuf: bool) -> Any: # noqa:
444
452
  return b64encode(value.SerializeToString()).decode("ascii")
445
453
  return value.SerializeToString()
446
454
  if is_dataclass(value):
447
- return _serialize_as_dict(value) # type: ignore[arg-type]
455
+ return _serialize_as_dict(value)
448
456
  return value
449
457
 
450
458
 
451
- def deserialize_task(task_cls: type, task_input: bytes) -> Task:
459
+ _T = TypeVar("_T", bound=Task)
460
+
461
+
462
+ def deserialize_task(task_cls: type[_T], task_input: bytes) -> _T:
452
463
  """Deserialize the input of a task from a buffer of bytes.
453
464
 
454
465
  The task_cls is expected to be a dataclass, containing an arbitrary number of fields.
@@ -460,22 +471,22 @@ def deserialize_task(task_cls: type, task_input: bytes) -> Task:
460
471
  return task_cls() # empty task
461
472
  if len(task_fields) == 1:
462
473
  # if there is only one field, we deserialize it directly
463
- field_type = _get_deserialization_field_type(task_fields[0].type) # type: ignore[arg-type]
474
+ field_type = _get_deserialization_field_type(task_fields[0].type) # ty: ignore[invalid-argument-type]
464
475
  if hasattr(field_type, "FromString"): # protobuf message
465
- value = field_type.FromString(task_input) # type: ignore[arg-type]
476
+ value = field_type.FromString(task_input) # ty: ignore[call-non-callable]
466
477
  else:
467
- value = _deserialize_value(field_type, json.loads(task_input.decode())) # type: ignore[arg-type]
478
+ value = _deserialize_value(field_type, json.loads(task_input.decode()))
468
479
 
469
480
  return task_cls(**{task_fields[0].name: value})
470
481
 
471
482
  return _deserialize_dataclass(task_cls, json.loads(task_input.decode()))
472
483
 
473
484
 
474
- def _deserialize_dataclass(cls: type, params: dict[str, Any]) -> Task:
485
+ def _deserialize_dataclass(cls: type[_T], params: dict[str, Any]) -> _T:
475
486
  """Deserialize a dataclass, while allowing recursively nested dataclasses or protobuf messages."""
476
487
  for param in list(params):
477
488
  # recursively deserialize nested dataclasses
478
- field = cls.__dataclass_fields__[param]
489
+ field = cls.__dataclass_fields__[param] # ty: ignore[unresolved-attribute]
479
490
  params[field.name] = _deserialize_value(field.type, params[field.name])
480
491
 
481
492
  return cls(**params)
@@ -487,7 +498,7 @@ def _deserialize_value(field_type: type, value: Any) -> Any: # noqa: PLR0911
487
498
 
488
499
  field_type = _get_deserialization_field_type(field_type)
489
500
  if hasattr(field_type, "FromString"):
490
- return field_type.FromString(b64decode(value))
501
+ return field_type.FromString(b64decode(value)) # ty: ignore[call-non-callable]
491
502
  if is_dataclass(field_type) and isinstance(value, dict):
492
503
  return _deserialize_dataclass(field_type, value)
493
504
 
@@ -26,11 +26,11 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
26
26
  if not isinstance(task, TimeseriesTask):
27
27
  raise TypeError("Task is not a timeseries task. Inherit from TimeseriesTask to mark it as such.")
28
28
 
29
- chunk: TimeseriesDatasetChunk = task.timeseries_data # type: ignore[attr-defined]
29
+ chunk: TimeseriesDatasetChunk = task.timeseries_data
30
30
 
31
31
  # let's get a collection client
32
32
  datasets_client = context.runner_context.datasets_client
33
- dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) # type: ignore[attr-defined] # noqa: SLF001
33
+ dataset = datasets_client._dataset_by_id(str(chunk.dataset_id)) # ty: ignore[possibly-missing-attribute] # noqa: SLF001
34
34
  # we already know the collection id, so we can skip the lookup (we don't know the name, but don't need it)
35
35
  collection_info = CollectionInfo(Collection(chunk.collection_id, "unknown"), None, None)
36
36
  collection = CollectionClient(dataset, collection_info)
@@ -50,7 +50,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
50
50
 
51
51
  for i in range(datapoints.sizes["time"]):
52
52
  datapoint = datapoints.isel(time=i)
53
- call_next(context, datapoint) # type: ignore[call-arg]
53
+ call_next(context, datapoint) # ty: ignore[too-many-positional-arguments]
54
54
 
55
55
  return # we are done
56
56
 
@@ -90,7 +90,7 @@ def _timeseries_dataset_chunk(task: Task, call_next: ForwardExecution, context:
90
90
  for sub_chunk_start, sub_chunk_end in pairwise(chunks):
91
91
  sub_chunks.append(replace(chunk, time_interval=TimeInterval(sub_chunk_start, sub_chunk_end)))
92
92
 
93
- subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks] # type: ignore[misc]
93
+ subtasks = [replace(task, timeseries_data=sub_chunk) for sub_chunk in sub_chunks]
94
94
  if len(subtasks) > 0:
95
95
  context.submit_subtasks(subtasks)
96
96
 
@@ -103,7 +103,7 @@ class TimeseriesTask(Task):
103
103
  timeseries_data: TimeseriesDatasetChunk
104
104
 
105
105
  @override
106
- def execute(self, context: ExecutionContext, datapoint: xr.Dataset) -> None: # type: ignore[override]
106
+ def execute(self, context: ExecutionContext, datapoint: xr.Dataset) -> None: # ty: ignore[invalid-method-override]
107
107
  pass
108
108
 
109
109
 
@@ -136,14 +136,14 @@ def _time_interval_chunk(task: Task, call_next: ForwardExecution, context: Execu
136
136
  if not isinstance(task, TimeIntervalTask):
137
137
  raise TypeError("Task is not a time interval task. Inherit from TimeIntervalTask to mark it as such.")
138
138
 
139
- chunk: TimeChunk = task.interval # type: ignore[attr-defined]
139
+ chunk: TimeChunk = task.interval
140
140
 
141
141
  start = _make_multiple(chunk.time_interval.start, chunk.chunk_size, before=True)
142
142
  end = _make_multiple(chunk.time_interval.end, chunk.chunk_size, before=False)
143
143
 
144
144
  n = (end - start) // chunk.chunk_size
145
145
  if n <= 1: # we are already a leaf task
146
- return call_next(context, TimeInterval(start, end)) # type: ignore[call-arg]
146
+ return call_next(context, TimeInterval(start, end)) # ty: ignore[too-many-positional-arguments]
147
147
 
148
148
  chunks: list[datetime] = []
149
149
  if n < 4: # we are a branch task with less than 4 sub chunks, so a further split is not worth it
@@ -158,9 +158,7 @@ def _time_interval_chunk(task: Task, call_next: ForwardExecution, context: Execu
158
158
  TimeChunk(TimeInterval(chunk_start, chunk_end), chunk.chunk_size) for chunk_start, chunk_end in pairwise(chunks)
159
159
  ]
160
160
 
161
- context.submit_subtasks(
162
- [replace(task, interval=time_chunk) for time_chunk in time_chunks] # type: ignore[misc]
163
- )
161
+ context.submit_subtasks([replace(task, interval=time_chunk) for time_chunk in time_chunks])
164
162
  return None
165
163
 
166
164
 
@@ -170,12 +168,12 @@ class TimeIntervalTask(Task):
170
168
  interval: TimeChunk
171
169
 
172
170
  @override
173
- def execute(self, context: ExecutionContext, time_interval: TimeInterval) -> None: # type: ignore[override]
171
+ def execute(self, context: ExecutionContext, time_interval: TimeInterval) -> None: # ty: ignore[invalid-method-override]
174
172
  pass
175
173
 
176
174
 
177
175
  def batch_process_time_interval(interval: TimeIntervalLike, chunk_size: timedelta) -> TimeChunk:
178
- return TimeChunk(time_interval=TimeInterval.parse(interval).to_half_open(), chunk_size=chunk_size) # type: ignore[arg-type]
176
+ return TimeChunk(time_interval=TimeInterval.parse(interval).to_half_open(), chunk_size=chunk_size)
179
177
 
180
178
 
181
179
  def _make_multiple(time: datetime, duration: timedelta, start: datetime = _EPOCH, before: bool = True) -> datetime:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tilebox-workflows
3
- Version: 0.46.0
3
+ Version: 0.48.0
4
4
  Summary: Workflow client and task runner for Tilebox
5
5
  Project-URL: Homepage, https://tilebox.com
6
6
  Project-URL: Documentation, https://docs.tilebox.com/workflows/introduction
@@ -1,28 +1,28 @@
1
1
  tilebox/workflows/__init__.py,sha256=D6NXvTUjWv0YWN5tYD09p1cFkGs8nGrZ9V7agtjvp8c,636
2
- tilebox/workflows/cache.py,sha256=EoCzpYIJLmKVbQRox1J48zUjYVpWvruso-h4BRbLXG0,11357
3
- tilebox/workflows/client.py,sha256=gmcL4PsV_oIOZAC5rYIEel392N8r5kQSjxNK9pu5vCE,5664
4
- tilebox/workflows/data.py,sha256=pO3UG5kMTeZ3rXeiAROXmX76onbr-XCI_UNIrzkZe0I,29603
2
+ tilebox/workflows/cache.py,sha256=ZgVUm2O4Qr1nPXvUVx82YGtiDg4IjUbHpnm5KTfrKvo,13233
3
+ tilebox/workflows/client.py,sha256=L8MZXZ-yDutu91sD1am24yfANLCaasRXO8ITJHg5UgE,5680
4
+ tilebox/workflows/data.py,sha256=8DBWZJonsLKCMMF02jpD0xfEENbrSUqANFvBYzMb1eY,29559
5
5
  tilebox/workflows/interceptors.py,sha256=yfo6pCxUdhb0EC1J506k1ge4S_BAl83TAFYxCcxu8sU,1799
6
- tilebox/workflows/task.py,sha256=P7vTtKMP0m-VzN3pZTrV0-3TDAk_A2uBJF8QYPzkIAk,21656
7
- tilebox/workflows/timeseries.py,sha256=Q3kcp4sXKpQ6v1-yBWwdoNOyuK3InNYVDjm4_FVjGOo,9022
6
+ tilebox/workflows/task.py,sha256=Tl08qxSjn1lwOcoBhD1VgPgwCz0_JZMSl39gQN5d0Z4,21842
7
+ tilebox/workflows/timeseries.py,sha256=wjzYaym8Z3ymdnLhfa_7UTQJHm41XLPMyFxFoe3dKaw,8954
8
8
  tilebox/workflows/automations/__init__.py,sha256=HDyTj_H0Z-w9m0noWiXAcrvEylXmSpy8pgrVUWWqjbg,226
9
9
  tilebox/workflows/automations/client.py,sha256=EJZHcRXfZyU0NAH1PZuA9fGuYQwQKnz03nyz6lJqtrc,5622
10
- tilebox/workflows/automations/cron.py,sha256=aLBoo6KKHaTUNeVH-4VBw8mlq0ujyv4uJDUPV7eo3dk,2046
10
+ tilebox/workflows/automations/cron.py,sha256=5y2imdEjZ3Swqj3ZIes0Q6FakW6I8Ttnpmbsicsf5bw,2061
11
11
  tilebox/workflows/automations/service.py,sha256=Iwh4kcjLh8pJOUBvVHTEqdVmTzEAOj1qjeTLY7CKl4s,2462
12
- tilebox/workflows/automations/storage_event.py,sha256=4rf3DO8GjCfCxLPYPZyZjK1SsaM__EUIF1QB3YboE5w,2407
12
+ tilebox/workflows/automations/storage_event.py,sha256=6NxF-C94JWVzvISXBuxGyzB1Gou3-vV2v1p_0WOilkk,2396
13
13
  tilebox/workflows/clusters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  tilebox/workflows/clusters/client.py,sha256=uGKmieEKE8Gy2uIV5c_9kT1lObJIe48J4rYEpcNmLPc,1656
15
15
  tilebox/workflows/clusters/service.py,sha256=4ikKwl_69OW8M1yOlsx0QTlJURXfAAbAh-JY-pgjs9w,1634
16
16
  tilebox/workflows/formatting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- tilebox/workflows/formatting/job.py,sha256=1bOXGGyWTCmgRJbZmTWVDJAZae3I7xKIOysJtZqMQJY,14115
17
+ tilebox/workflows/formatting/job.py,sha256=fKhxHrx6mngfXSwtvqo6vzlpYoS5Kd10kJ45b4jmAEI,14097
18
18
  tilebox/workflows/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- tilebox/workflows/jobs/client.py,sha256=1hl6Y97bEIddN46utl3knomVug-9WKHJyGJLByKw1TA,11230
19
+ tilebox/workflows/jobs/client.py,sha256=T_dUM7I-lyb9JsQ99-IKs0ezHyLzBaGITPbNcZTDy6g,11743
20
20
  tilebox/workflows/jobs/service.py,sha256=HpCg9v2kUM4rYgDSUtezpJ_yU9VhCHqX8EBboLSwKKE,3493
21
21
  tilebox/workflows/observability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- tilebox/workflows/observability/logging.py,sha256=rYryCUNQGNtnAcRmQXd0hkZhw1qZINgsylaQqUQNA5k,16450
23
- tilebox/workflows/observability/tracing.py,sha256=KawdvsQq7JPkLI9eJ8_l0AXWhyhharWmPNLR88CJsRA,11672
22
+ tilebox/workflows/observability/logging.py,sha256=JLXvuC2Xky2HjFfyxEYAlFKYI9kF2jepBRrdHnmKCLs,16430
23
+ tilebox/workflows/observability/tracing.py,sha256=96SqqPrXI0fB_vEIlHLKjErIsnHEeBQNBGCpROdYl0Q,11646
24
24
  tilebox/workflows/runner/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- tilebox/workflows/runner/task_runner.py,sha256=YXJNPKdJBP0uXq7pOw3LPs6EI4La5o3vKFu7hrE5Aws,28339
25
+ tilebox/workflows/runner/task_runner.py,sha256=F9ePKUyI-7LB_xAlbDtCRNPsKRYJf1YkMasENTnLsRo,28466
26
26
  tilebox/workflows/runner/task_service.py,sha256=C1bgoSjXBW4KUKR-_GQSplVa0XojscngOMQfTI5IQ6E,2837
27
27
  tilebox/workflows/workflows/v1/automation_pb2.py,sha256=Mf9D84ujlL-vQ0gkHwQh9_yvZKsJ0I7dsIAXL_fczqo,9179
28
28
  tilebox/workflows/workflows/v1/automation_pb2.pyi,sha256=LrucnhtVCSDfN88ZfJ2ruHvQq1Kc6CxMRAxKb_Q9kG4,5838
@@ -42,6 +42,6 @@ tilebox/workflows/workflows/v1/task_pb2_grpc.py,sha256=nkQjtsDiql1ofbSxhDXbPkySd
42
42
  tilebox/workflows/workflows/v1/workflows_pb2.py,sha256=rGwIydUP4osLD_fG3QmHaqs42mKmRoCqtwihJTLJ314,3990
43
43
  tilebox/workflows/workflows/v1/workflows_pb2.pyi,sha256=qiDQUM2Vlu_izQvaSDlK5GqKYGsNJgtm0bo9zW-qNjU,1633
44
44
  tilebox/workflows/workflows/v1/workflows_pb2_grpc.py,sha256=36Vp_TIxtS-MRBZHECf84fHFbnrm3-UizCsMOlz7qfo,8529
45
- tilebox_workflows-0.46.0.dist-info/METADATA,sha256=mttQREdxdDcxYLRNcIQimVQZ7XaCbsvIk0lAZ5cxKf8,3948
46
- tilebox_workflows-0.46.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
47
- tilebox_workflows-0.46.0.dist-info/RECORD,,
45
+ tilebox_workflows-0.48.0.dist-info/METADATA,sha256=g_7EhTNi4DXYUYheJjhks2sGQU4hwdFcUCFhhwxu6fU,3948
46
+ tilebox_workflows-0.48.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
47
+ tilebox_workflows-0.48.0.dist-info/RECORD,,