wandb 0.15.10__py3-none-any.whl → 0.15.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. wandb/__init__.py +2 -1
  2. wandb/apis/public.py +51 -9
  3. wandb/apis/reports/blocks.py +1 -0
  4. wandb/cli/cli.py +14 -9
  5. wandb/env.py +11 -1
  6. wandb/integration/xgboost/xgboost.py +3 -3
  7. wandb/proto/v3/wandb_internal_pb2.py +300 -267
  8. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  9. wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
  10. wandb/proto/v4/wandb_internal_pb2.py +260 -252
  11. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  12. wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
  13. wandb/sdk/artifacts/artifact.py +9 -6
  14. wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
  15. wandb/sdk/data_types/image.py +1 -1
  16. wandb/sdk/internal/file_stream.py +2 -1
  17. wandb/sdk/internal/handler.py +24 -20
  18. wandb/sdk/internal/internal_api.py +9 -1
  19. wandb/sdk/internal/sender.py +4 -1
  20. wandb/sdk/internal/system/system_info.py +2 -2
  21. wandb/sdk/launch/__init__.py +5 -0
  22. wandb/sdk/launch/{launch.py → _launch.py} +53 -54
  23. wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
  24. wandb/sdk/launch/agent/agent.py +36 -18
  25. wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
  26. wandb/sdk/launch/runner/abstract.py +0 -2
  27. wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
  28. wandb/sdk/launch/runner/kubernetes_runner.py +44 -301
  29. wandb/sdk/launch/runner/local_container.py +5 -2
  30. wandb/sdk/launch/sweeps/scheduler.py +14 -10
  31. wandb/sdk/launch/sweeps/utils.py +5 -3
  32. wandb/sdk/launch/utils.py +3 -1
  33. wandb/sdk/lib/_settings_toposort_generated.py +5 -0
  34. wandb/sdk/lib/gql_request.py +3 -0
  35. wandb/sdk/lib/ipython.py +4 -0
  36. wandb/sdk/service/service.py +19 -6
  37. wandb/sdk/wandb_init.py +7 -2
  38. wandb/sdk/wandb_run.py +2 -5
  39. wandb/sdk/wandb_settings.py +48 -2
  40. wandb/util.py +1 -1
  41. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/METADATA +4 -1
  42. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/RECORD +46 -45
  43. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
  44. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/WHEEL +0 -0
  45. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
  46. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import pprint
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
4
  import wandb
5
- import wandb.apis.public as public
5
+ from wandb.apis import public
6
6
  from wandb.apis.internal import Api
7
7
  from wandb.sdk.launch._project_spec import create_project_from_spec
8
8
  from wandb.sdk.launch.builder.build import build_image_from_project
@@ -49,39 +49,42 @@ def launch_add(
49
49
  """Enqueue a W&B launch experiment. With either a source uri, job or docker_image.
50
50
 
51
51
  Arguments:
52
- uri: URI of experiment to run. A wandb run uri or a Git repository URI.
53
- job: string reference to a wandb.Job eg: wandb/test/my-job:latest
54
- config: A dictionary containing the configuration for the run. May also contain
55
- resource specific arguments under the key "resource_args"
56
- project: Target project to send launched run to
57
- entity: Target entity to send launched run to
58
- queue: the name of the queue to enqueue the run to
59
- resource: Execution backend for the run: W&B provides built-in support for "local-container" backend
60
- entry_point: Entry point to run within the project. Defaults to using the entry point used
61
- in the original run for wandb URIs, or main.py for git repository URIs.
62
- name: Name run under which to launch the run.
63
- version: For Git-based projects, either a commit hash or a branch name.
64
- docker_image: The name of the docker image to use for the run.
65
- resource_args: Resource related arguments for launching runs onto a remote backend.
66
- Will be stored on the constructed launch config under ``resource_args``.
67
- run_id: optional string indicating the id of the launched run
68
- build: optional flag defaulting to false, requires queue to be set
69
- if build, an image is created, creates a job artifact, pushes a reference
70
- to that job artifact to queue
71
- repository: optional string to control the name of the remote repository, used when
72
- pushing images to a registry
73
- project_queue: optional string to control the name of the project for the queue. Primarily used
74
- for back compatibility with project scoped queues
52
+ uri: URI of experiment to run. A wandb run uri or a Git repository URI.
53
+ job: string reference to a wandb.Job eg: wandb/test/my-job:latest
54
+ config: A dictionary containing the configuration for the run. May also contain
55
+ resource specific arguments under the key "resource_args"
56
+ project: Target project to send launched run to
57
+ entity: Target entity to send launched run to
58
+ queue: the name of the queue to enqueue the run to
59
+ resource: Execution backend for the run: W&B provides built-in support for "local-container" backend
60
+ entry_point: Entry point to run within the project. Defaults to using the entry point used
61
+ in the original run for wandb URIs, or main.py for git repository URIs.
62
+ name: Name run under which to launch the run.
63
+ version: For Git-based projects, either a commit hash or a branch name.
64
+ docker_image: The name of the docker image to use for the run.
65
+ resource_args: Resource related arguments for launching runs onto a remote backend.
66
+ Will be stored on the constructed launch config under ``resource_args``.
67
+ run_id: optional string indicating the id of the launched run
68
+ build: optional flag defaulting to false, requires queue to be set
69
+ if build, an image is created, creates a job artifact, pushes a reference
70
+ to that job artifact to queue
71
+ repository: optional string to control the name of the remote repository, used when
72
+ pushing images to a registry
73
+ project_queue: optional string to control the name of the project for the queue. Primarily used
74
+ for back compatibility with project scoped queues
75
75
 
76
76
 
77
77
  Example:
78
- import wandb
79
- project_uri = "https://github.com/wandb/examples"
80
- params = {"alpha": 0.5, "l1_ratio": 0.01}
81
- # Run W&B project and create a reproducible docker environment
82
- # on a local host
83
- api = wandb.apis.internal.Api()
84
- wandb.launch_add(uri=project_uri, parameters=params)
78
+ ```python
79
+ from wandb.sdk.launch import launch_add
80
+
81
+ project_uri = "https://github.com/wandb/examples"
82
+ params = {"alpha": 0.5, "l1_ratio": 0.01}
83
+ # Run W&B project and create a reproducible docker environment
84
+ # on a local host
85
+ api = wandb.apis.internal.Api()
86
+ launch_add(uri=project_uri, parameters=params)
87
+ ```
85
88
 
86
89
 
87
90
  Returns:
@@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Union
11
11
  import wandb
12
12
  from wandb.apis.internal import Api
13
13
  from wandb.errors import CommError
14
- from wandb.sdk.launch.launch_add import launch_add
14
+ from wandb.sdk.launch._launch_add import launch_add
15
15
  from wandb.sdk.launch.runner.local_container import LocalSubmittedRun
16
16
  from wandb.sdk.launch.runner.local_process import LocalProcessRunner
17
17
  from wandb.sdk.launch.sweeps.scheduler import Scheduler
@@ -36,6 +36,8 @@ HIDDEN_AGENT_RUN_TYPE = "sweep-controller"
36
36
 
37
37
  MAX_RESUME_COUNT = 5
38
38
 
39
+ RUN_INFO_GRACE_PERIOD = 60
40
+
39
41
  _env_timeout = os.environ.get("WANDB_LAUNCH_START_TIMEOUT")
40
42
  if _env_timeout:
41
43
  try:
@@ -301,27 +303,43 @@ class LaunchAgent:
301
303
  job_and_run_status.err_stage,
302
304
  fnames,
303
305
  )
304
- elif job_and_run_status.completed_status not in ["stopped", "failed"]:
305
- _logger.info(
306
- "Skipping check for completed run status because run was successful"
307
- )
308
306
  elif job_and_run_status.run is not None:
309
307
  run_info = None
310
- # sweep runs exist but have no info before they are started
311
- # so run_info returned will be None
312
- # normal runs just throw a comm error
313
- # TODO: make more clear
314
- try:
315
- run_info = self._api.get_run_info(
316
- self._entity, job_and_run_status.project, job_and_run_status.run_id
317
- )
308
+ # We do some weird stuff here getting run info to check for a
309
+ # created in run in W&B.
310
+ #
311
+ # We retry for 60 seconds with an exponential backoff in case
312
+ # upsert run is taking a while.
313
+ #
314
+ # Sweep runs exist but have no info before they are started
315
+ # so run_info returned will be None, while normal runs just throw a
316
+ # comm error.
317
+ start_time = time.time()
318
+ interval = 1
319
+ while True:
320
+ try:
321
+ run_info = self._api.get_run_info(
322
+ self._entity,
323
+ job_and_run_status.project,
324
+ job_and_run_status.run_id,
325
+ )
326
+ except CommError:
327
+ pass
328
+ if (
329
+ run_info is not None
330
+ or time.time() - start_time > RUN_INFO_GRACE_PERIOD
331
+ ):
332
+ break
333
+ if run_info is None:
334
+ time.sleep(interval)
335
+ interval *= 2
318
336
 
319
- except CommError:
320
- pass
321
337
  if run_info is None:
322
- _msg = "The submitted run was not successfully started"
323
338
  fnames = None
324
-
339
+ if job_and_run_status.completed_status == "finished":
340
+ _msg = "The submitted job exited successfully but failed to call wandb.init"
341
+ else:
342
+ _msg = "The submitted run was not successfully started"
325
343
  logs = job_and_run_status.run.get_logs()
326
344
  if logs:
327
345
  fnames = job_and_run_status.saver.save_contents(
@@ -331,7 +349,7 @@ class LaunchAgent:
331
349
  job_and_run_status.run_queue_item_id, _msg, "run", fnames
332
350
  )
333
351
  else:
334
- _logger.info("Finish thread id had no exception, ror run")
352
+ _logger.info(f"Finish thread id {thread_id} had no exception and no run")
335
353
  wandb._sentry.exception(
336
354
  "launch agent called finish thread id on thread without run or exception"
337
355
  )
@@ -5,8 +5,6 @@ import sys
5
5
  from typing import List, Optional, Union
6
6
 
7
7
  import wandb
8
- from wandb.sdk.lib import RunDisabled
9
- from wandb.sdk.wandb_run import Run
10
8
 
11
9
  if sys.version_info >= (3, 8):
12
10
  from typing import Literal
@@ -18,7 +16,11 @@ FileSubtypes = Literal["warning", "error"]
18
16
 
19
17
  class RunQueueItemFileSaver:
20
18
  def __init__(
21
- self, agent_run: Optional[Union[Run, RunDisabled]], run_queue_item_id: str
19
+ self,
20
+ agent_run: Optional[
21
+ Union["wandb.sdk.wandb_run.Run", "wandb.sdk.lib.RunDisabled"]
22
+ ],
23
+ run_queue_item_id: str,
22
24
  ):
23
25
  self.run_queue_item_id = run_queue_item_id
24
26
  self.run = agent_run
@@ -26,7 +28,7 @@ class RunQueueItemFileSaver:
26
28
  def save_contents(
27
29
  self, contents: str, fname: str, file_sub_type: FileSubtypes
28
30
  ) -> Optional[List[str]]:
29
- if not isinstance(self.run, Run):
31
+ if not isinstance(self.run, wandb.sdk.wandb_run.Run):
30
32
  wandb.termwarn("Not saving file contents because agent has no run")
31
33
  return None
32
34
  root_dir = self.run._settings.files_dir
@@ -13,7 +13,6 @@ from typing import Any, Dict, List, Optional, Union
13
13
  from dockerpycreds.utils import find_executable # type: ignore
14
14
 
15
15
  import wandb
16
- from wandb import Settings
17
16
  from wandb.apis.internal import Api
18
17
  from wandb.sdk.lib import runid
19
18
 
@@ -136,7 +135,6 @@ class AbstractRunner(ABC):
136
135
  api: Api,
137
136
  backend_config: Dict[str, Any],
138
137
  ) -> None:
139
- self._settings = Settings()
140
138
  self._api = api
141
139
  self.backend_config = backend_config
142
140
  self._cwd = os.getcwd()
@@ -0,0 +1,329 @@
1
+ import logging
2
+ from threading import Lock, Thread
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import urllib3
6
+ from kubernetes import watch # type: ignore # noqa: F401
7
+ from kubernetes.client import ( # type: ignore # noqa: F401
8
+ ApiException,
9
+ BatchV1Api,
10
+ CoreV1Api,
11
+ CustomObjectsApi,
12
+ V1PodStatus,
13
+ )
14
+
15
+ import wandb
16
+
17
+ from .abstract import State, Status
18
+
19
+ # Dict for mapping possible states of custom objects to the states we want to report
20
+ # to the agent.
21
+ CRD_STATE_DICT: Dict[str, State] = {
22
+ # Starting states.
23
+ "created": "starting",
24
+ "pending": "starting",
25
+ # Running states.
26
+ "running": "running",
27
+ "completing": "running",
28
+ # Finished states.
29
+ "succeeded": "finished",
30
+ "completed": "finished",
31
+ # Failed states.
32
+ "failed": "failed",
33
+ "aborted": "failed",
34
+ "timeout": "failed",
35
+ "terminated": "failed",
36
+ # Stopping states.
37
+ "terminating": "stopping",
38
+ }
39
+
40
+
41
+ _logger = logging.getLogger(__name__)
42
+
43
+
44
+ class SafeWatch:
45
+ """Wrapper for the kubernetes watch class that can recover in more situations."""
46
+
47
+ def __init__(self, watcher: "watch.Watch") -> None:
48
+ """Initialize the SafeWatch."""
49
+ self._watcher = watcher
50
+ self._last_seen_resource_version: Optional[str] = None
51
+ self._stopped = False
52
+
53
+ def stream(self, func: Any, *args: Any, **kwargs: Any) -> Any:
54
+ """Stream the watcher."""
55
+ while True:
56
+ try:
57
+ for event in self._watcher.stream(
58
+ func, *args, **kwargs, timeout_seconds=15
59
+ ):
60
+ if self._stopped:
61
+ break
62
+ # Save the resource version so that we can resume the stream
63
+ # if it breaks.
64
+ object = event.get("object")
65
+ if isinstance(object, dict):
66
+ self._last_seen_resource_version = object.get(
67
+ "metadata", dict()
68
+ ).get("resourceVersion")
69
+ else:
70
+ self._last_seen_resource_version = (
71
+ object.metadata.resource_version
72
+ )
73
+ kwargs["resource_version"] = self._last_seen_resource_version
74
+ yield event
75
+ # If stream ends after stop just break
76
+ if self._stopped:
77
+ break
78
+ except urllib3.exceptions.ProtocolError as e:
79
+ wandb.termwarn(f"Broken event stream: {e}")
80
+ except ApiException as e:
81
+ if e.status == 410:
82
+ # If resource version is too old we need to start over.
83
+ del kwargs["resource_version"]
84
+ self._last_seen_resource_version = None
85
+ except Exception as E:
86
+ wandb.termerror(f"Unknown exception in event stream: {E}")
87
+
88
+ def stop(self) -> None:
89
+ """Stop the watcher."""
90
+ self._watcher.stop()
91
+ self._stopped = True
92
+
93
+
94
+ def _is_preempted(status: "V1PodStatus") -> bool:
95
+ """Check if this pod has been preempted."""
96
+ if hasattr(status, "conditions") and status.conditions is not None:
97
+ for condition in status.conditions:
98
+ if condition.type == "DisruptionTarget" and condition.reason in [
99
+ "EvictionByEvictionAPI",
100
+ "PreemptionByScheduler",
101
+ "TerminationByKubelet",
102
+ ]:
103
+ return True
104
+ return False
105
+
106
+
107
+ def _is_container_creating(status: "V1PodStatus") -> bool:
108
+ """Check if this pod has started creating containers."""
109
+ for container_status in status.container_statuses or []:
110
+ if (
111
+ container_status.state
112
+ and container_status.state.waiting
113
+ and container_status.state.waiting.reason == "ContainerCreating"
114
+ ):
115
+ return True
116
+ return False
117
+
118
+
119
+ def _state_from_conditions(conditions: List[Dict[str, Any]]) -> Optional[str]:
120
+ """Get the status from the pod conditions."""
121
+ true_conditions = [
122
+ c.get("type", "").lower() for c in conditions if c.get("status") == "True"
123
+ ]
124
+ detected_states = {
125
+ CRD_STATE_DICT[c] for c in true_conditions if c in CRD_STATE_DICT
126
+ }
127
+ for state in ["finished", "failed", "stopping", "running", "starting"]:
128
+ if state in detected_states:
129
+ return state
130
+ return None
131
+
132
+
133
+ class KubernetesRunMonitor:
134
+ def __init__(
135
+ self,
136
+ job_field_selector: str,
137
+ pod_label_selector: str,
138
+ namespace: str,
139
+ batch_api: "BatchV1Api",
140
+ core_api: "CoreV1Api",
141
+ custom_api: "CustomObjectsApi" = None,
142
+ group: Optional[str] = None,
143
+ version: Optional[str] = None,
144
+ plural: Optional[str] = None,
145
+ ) -> None:
146
+ """Initialize KubernetesRunMonitor.
147
+
148
+ If a custom api is provided, the group, version, and plural arguments must also
149
+ be provided. These are used to query the custom api for a launched custom
150
+ object (CRD). Group, version, and plural in this context refer to the
151
+ Kubernetes API group, version, and plural for the CRD. For more information
152
+ see: https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/
153
+
154
+ The run monitor starts two threads to watch for pods and jobs/crds matching the
155
+ provided selectors. The status is set to "starting" when the run monitor is
156
+ initialized. The status is set to "running" when a pod matching the pod selector
157
+ is found with a status of "Running" or has a container with a status of
158
+ "ContainerCreating". The status is set to "finished" when a job matching the job
159
+ selector is found with a status of "Succeeded". The status is set to "failed"
160
+ when a job matching the job selector is found with a status of "Failed" or a pod
161
+ matching the pod selector is found with a status of "Failed". The status is set
162
+ to "preempted" when a pod matching the pod selector is found with a condition
163
+ type of "DisruptionTarget" and a reason of "EvictionByEvictionAPI",
164
+ "PreemptionByScheduler", or "TerminationByKubelet".
165
+
166
+ The logic for the CRD is similar to the logic for the job, but we inspect
167
+ both the phase of the CRD and the conditions since some CRDs do not have a
168
+ phase field.
169
+
170
+ Arguments:
171
+ job_field_selector: The field selector for the job or crd.
172
+ pod_label_selector: The label selector for the pods.
173
+ namespace: The namespace to monitor.
174
+ batch_api: The batch api client.
175
+ core_api: The core api client.
176
+ custom_api: The custom api client.
177
+ group: The group of the CRD.
178
+ version: The version of the CRD.
179
+ plural: The plural of the CRD.
180
+
181
+ Returns:
182
+ None.
183
+ """
184
+ self.pod_label_selector = pod_label_selector
185
+ self.job_field_selector = job_field_selector
186
+ self.namespace = namespace
187
+ self.batch_api = batch_api
188
+ self.core_api = core_api
189
+ self.custom_api = custom_api
190
+ self.group = group
191
+ self.version = version
192
+ self.plural = plural
193
+
194
+ self._status_lock = Lock()
195
+ self._status = Status("starting")
196
+
197
+ # Only one of the job or crd watchers will be used.
198
+ self._watch_job_thread = Thread(target=self._watch_job, daemon=True)
199
+ self._watch_crd_thread = Thread(target=self._watch_crd, daemon=True)
200
+
201
+ self._watch_pods_thread = Thread(target=self._watch_pods, daemon=True)
202
+
203
+ self._job_watcher = SafeWatch(watch.Watch())
204
+ self._pod_watcher = SafeWatch(watch.Watch())
205
+
206
+ def start(self) -> None:
207
+ """Start the run monitor."""
208
+ if self.custom_api is None:
209
+ self._watch_job_thread.start()
210
+ else:
211
+ self._watch_crd_thread.start()
212
+ self._watch_pods_thread.start()
213
+
214
+ def stop(self) -> None:
215
+ """Stop the run monitor."""
216
+ self._job_watcher.stop()
217
+ self._pod_watcher.stop()
218
+
219
+ def _set_status(self, status: Status) -> None:
220
+ """Set the run status."""
221
+ with self._status_lock:
222
+ self._status = status
223
+
224
+ def get_status(self) -> Status:
225
+ """Get the run status."""
226
+ with self._status_lock:
227
+ # Each time this is called we verify that our watchers are active.
228
+ if self._status.state in ["running", "starting"]:
229
+ if self.custom_api is None:
230
+ if not self._watch_job_thread.is_alive():
231
+ wandb.termwarn(
232
+ f"Job watcher thread is dead for {self.job_field_selector}"
233
+ )
234
+ self._watch_job_thread = Thread(
235
+ target=self._watch_job, daemon=True
236
+ )
237
+ self._watch_job_thread.start()
238
+ else:
239
+ if not self._watch_crd_thread.is_alive():
240
+ wandb.termwarn(
241
+ f"CRD watcher thread is dead for {self.job_field_selector}"
242
+ )
243
+ self._watch_crd_thread = Thread(
244
+ target=self._watch_crd, daemon=True
245
+ )
246
+ self._watch_crd_thread.start()
247
+ if not self._watch_pods_thread.is_alive():
248
+ wandb.termwarn(
249
+ f"Pod watcher thread is dead for {self.pod_label_selector}"
250
+ )
251
+ self._watch_pods_thread = Thread(
252
+ target=self._watch_pods, daemon=True
253
+ )
254
+ self._watch_pods_thread.start()
255
+ return self._status
256
+
257
+ def _watch_pods(self) -> None:
258
+ """Watch for pods created matching the jobname."""
259
+ # Stream with no timeout polling for pod status updates
260
+ for event in self._pod_watcher.stream(
261
+ self.core_api.list_namespaced_pod,
262
+ namespace=self.namespace,
263
+ label_selector=self.pod_label_selector,
264
+ ):
265
+ object = event.get("object")
266
+ # Sometimes ADDED events will be missing field.
267
+ if not hasattr(object, "status"):
268
+ continue
269
+ if object.status.phase == "Running":
270
+ self._set_status(Status("running"))
271
+ if _is_preempted(object.status):
272
+ self._set_status(Status("preempted"))
273
+ self.stop()
274
+ break
275
+ if _is_container_creating(object.status):
276
+ self._set_status(Status("running"))
277
+
278
+ def _watch_job(self) -> None:
279
+ """Watch for job matching the jobname."""
280
+ for event in self._job_watcher.stream(
281
+ self.batch_api.list_namespaced_job,
282
+ namespace=self.namespace,
283
+ field_selector=self.job_field_selector,
284
+ ):
285
+ object = event.get("object")
286
+ if object.status.succeeded == 1:
287
+ self._set_status(Status("finished"))
288
+ self.stop()
289
+ break
290
+ elif object.status.failed is not None and object.status.failed >= 1:
291
+ self._set_status(Status("failed"))
292
+ self.stop()
293
+ break
294
+
295
+ def _watch_crd(self) -> None:
296
+ """Watch for CRD matching the jobname."""
297
+ for event in self._job_watcher.stream(
298
+ self.custom_api.list_namespaced_custom_object,
299
+ namespace=self.namespace,
300
+ field_selector=self.job_field_selector,
301
+ group=self.group,
302
+ version=self.version,
303
+ plural=self.plural,
304
+ ):
305
+ object = event.get("object")
306
+ status = object.get("status")
307
+ if status is None:
308
+ continue
309
+ state = status.get("state")
310
+ if isinstance(state, dict):
311
+ raw_state = state.get("phase", "")
312
+ state = CRD_STATE_DICT.get(raw_state)
313
+ else:
314
+ conditions = status.get("conditions")
315
+ if isinstance(conditions, list):
316
+ state = _state_from_conditions(conditions)
317
+ else:
318
+ # This should never happen.
319
+ _logger.warning(
320
+ f"Unexpected conditions type {type(conditions)} "
321
+ f"for CRD {self.job_field_selector}: {conditions}"
322
+ )
323
+ if state is None:
324
+ continue
325
+ status = Status(state)
326
+ self._set_status(status)
327
+ if status.state in ["finished", "failed", "preempted"]:
328
+ self.stop()
329
+ break