wandb 0.15.10__py3-none-any.whl → 0.15.11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. wandb/__init__.py +2 -1
  2. wandb/apis/public.py +51 -9
  3. wandb/apis/reports/blocks.py +1 -0
  4. wandb/cli/cli.py +14 -9
  5. wandb/env.py +11 -1
  6. wandb/integration/xgboost/xgboost.py +3 -3
  7. wandb/proto/v3/wandb_internal_pb2.py +300 -267
  8. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  9. wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
  10. wandb/proto/v4/wandb_internal_pb2.py +260 -252
  11. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  12. wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
  13. wandb/sdk/artifacts/artifact.py +9 -6
  14. wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
  15. wandb/sdk/data_types/image.py +1 -1
  16. wandb/sdk/internal/file_stream.py +2 -1
  17. wandb/sdk/internal/handler.py +24 -20
  18. wandb/sdk/internal/internal_api.py +9 -1
  19. wandb/sdk/internal/sender.py +4 -1
  20. wandb/sdk/internal/system/system_info.py +2 -2
  21. wandb/sdk/launch/__init__.py +5 -0
  22. wandb/sdk/launch/{launch.py → _launch.py} +53 -54
  23. wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
  24. wandb/sdk/launch/agent/agent.py +36 -18
  25. wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
  26. wandb/sdk/launch/runner/abstract.py +0 -2
  27. wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
  28. wandb/sdk/launch/runner/kubernetes_runner.py +44 -301
  29. wandb/sdk/launch/runner/local_container.py +5 -2
  30. wandb/sdk/launch/sweeps/scheduler.py +14 -10
  31. wandb/sdk/launch/sweeps/utils.py +5 -3
  32. wandb/sdk/launch/utils.py +3 -1
  33. wandb/sdk/lib/_settings_toposort_generated.py +5 -0
  34. wandb/sdk/lib/gql_request.py +3 -0
  35. wandb/sdk/lib/ipython.py +4 -0
  36. wandb/sdk/service/service.py +19 -6
  37. wandb/sdk/wandb_init.py +7 -2
  38. wandb/sdk/wandb_run.py +2 -5
  39. wandb/sdk/wandb_settings.py +48 -2
  40. wandb/util.py +1 -1
  41. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/METADATA +4 -1
  42. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/RECORD +46 -45
  43. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
  44. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/WHEEL +0 -0
  45. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
  46. {wandb-0.15.10.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import pprint
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
4
  import wandb
5
- import wandb.apis.public as public
5
+ from wandb.apis import public
6
6
  from wandb.apis.internal import Api
7
7
  from wandb.sdk.launch._project_spec import create_project_from_spec
8
8
  from wandb.sdk.launch.builder.build import build_image_from_project
@@ -49,39 +49,42 @@ def launch_add(
49
49
  """Enqueue a W&B launch experiment. With either a source uri, job or docker_image.
50
50
 
51
51
  Arguments:
52
- uri: URI of experiment to run. A wandb run uri or a Git repository URI.
53
- job: string reference to a wandb.Job eg: wandb/test/my-job:latest
54
- config: A dictionary containing the configuration for the run. May also contain
55
- resource specific arguments under the key "resource_args"
56
- project: Target project to send launched run to
57
- entity: Target entity to send launched run to
58
- queue: the name of the queue to enqueue the run to
59
- resource: Execution backend for the run: W&B provides built-in support for "local-container" backend
60
- entry_point: Entry point to run within the project. Defaults to using the entry point used
61
- in the original run for wandb URIs, or main.py for git repository URIs.
62
- name: Name run under which to launch the run.
63
- version: For Git-based projects, either a commit hash or a branch name.
64
- docker_image: The name of the docker image to use for the run.
65
- resource_args: Resource related arguments for launching runs onto a remote backend.
66
- Will be stored on the constructed launch config under ``resource_args``.
67
- run_id: optional string indicating the id of the launched run
68
- build: optional flag defaulting to false, requires queue to be set
69
- if build, an image is created, creates a job artifact, pushes a reference
70
- to that job artifact to queue
71
- repository: optional string to control the name of the remote repository, used when
72
- pushing images to a registry
73
- project_queue: optional string to control the name of the project for the queue. Primarily used
74
- for back compatibility with project scoped queues
52
+ uri: URI of experiment to run. A wandb run uri or a Git repository URI.
53
+ job: string reference to a wandb.Job eg: wandb/test/my-job:latest
54
+ config: A dictionary containing the configuration for the run. May also contain
55
+ resource specific arguments under the key "resource_args"
56
+ project: Target project to send launched run to
57
+ entity: Target entity to send launched run to
58
+ queue: the name of the queue to enqueue the run to
59
+ resource: Execution backend for the run: W&B provides built-in support for "local-container" backend
60
+ entry_point: Entry point to run within the project. Defaults to using the entry point used
61
+ in the original run for wandb URIs, or main.py for git repository URIs.
62
+ name: Name run under which to launch the run.
63
+ version: For Git-based projects, either a commit hash or a branch name.
64
+ docker_image: The name of the docker image to use for the run.
65
+ resource_args: Resource related arguments for launching runs onto a remote backend.
66
+ Will be stored on the constructed launch config under ``resource_args``.
67
+ run_id: optional string indicating the id of the launched run
68
+ build: optional flag defaulting to false, requires queue to be set
69
+ if build, an image is created, creates a job artifact, pushes a reference
70
+ to that job artifact to queue
71
+ repository: optional string to control the name of the remote repository, used when
72
+ pushing images to a registry
73
+ project_queue: optional string to control the name of the project for the queue. Primarily used
74
+ for back compatibility with project scoped queues
75
75
 
76
76
 
77
77
  Example:
78
- import wandb
79
- project_uri = "https://github.com/wandb/examples"
80
- params = {"alpha": 0.5, "l1_ratio": 0.01}
81
- # Run W&B project and create a reproducible docker environment
82
- # on a local host
83
- api = wandb.apis.internal.Api()
84
- wandb.launch_add(uri=project_uri, parameters=params)
78
+ ```python
79
+ from wandb.sdk.launch import launch_add
80
+
81
+ project_uri = "https://github.com/wandb/examples"
82
+ params = {"alpha": 0.5, "l1_ratio": 0.01}
83
+ # Run W&B project and create a reproducible docker environment
84
+ # on a local host
85
+ api = wandb.apis.internal.Api()
86
+ launch_add(uri=project_uri, parameters=params)
87
+ ```
85
88
 
86
89
 
87
90
  Returns:
@@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Union
11
11
  import wandb
12
12
  from wandb.apis.internal import Api
13
13
  from wandb.errors import CommError
14
- from wandb.sdk.launch.launch_add import launch_add
14
+ from wandb.sdk.launch._launch_add import launch_add
15
15
  from wandb.sdk.launch.runner.local_container import LocalSubmittedRun
16
16
  from wandb.sdk.launch.runner.local_process import LocalProcessRunner
17
17
  from wandb.sdk.launch.sweeps.scheduler import Scheduler
@@ -36,6 +36,8 @@ HIDDEN_AGENT_RUN_TYPE = "sweep-controller"
36
36
 
37
37
  MAX_RESUME_COUNT = 5
38
38
 
39
+ RUN_INFO_GRACE_PERIOD = 60
40
+
39
41
  _env_timeout = os.environ.get("WANDB_LAUNCH_START_TIMEOUT")
40
42
  if _env_timeout:
41
43
  try:
@@ -301,27 +303,43 @@ class LaunchAgent:
301
303
  job_and_run_status.err_stage,
302
304
  fnames,
303
305
  )
304
- elif job_and_run_status.completed_status not in ["stopped", "failed"]:
305
- _logger.info(
306
- "Skipping check for completed run status because run was successful"
307
- )
308
306
  elif job_and_run_status.run is not None:
309
307
  run_info = None
310
- # sweep runs exist but have no info before they are started
311
- # so run_info returned will be None
312
- # normal runs just throw a comm error
313
- # TODO: make more clear
314
- try:
315
- run_info = self._api.get_run_info(
316
- self._entity, job_and_run_status.project, job_and_run_status.run_id
317
- )
308
+ # We do some weird stuff here getting run info to check for a
309
+ # created in run in W&B.
310
+ #
311
+ # We retry for 60 seconds with an exponential backoff in case
312
+ # upsert run is taking a while.
313
+ #
314
+ # Sweep runs exist but have no info before they are started
315
+ # so run_info returned will be None, while normal runs just throw a
316
+ # comm error.
317
+ start_time = time.time()
318
+ interval = 1
319
+ while True:
320
+ try:
321
+ run_info = self._api.get_run_info(
322
+ self._entity,
323
+ job_and_run_status.project,
324
+ job_and_run_status.run_id,
325
+ )
326
+ except CommError:
327
+ pass
328
+ if (
329
+ run_info is not None
330
+ or time.time() - start_time > RUN_INFO_GRACE_PERIOD
331
+ ):
332
+ break
333
+ if run_info is None:
334
+ time.sleep(interval)
335
+ interval *= 2
318
336
 
319
- except CommError:
320
- pass
321
337
  if run_info is None:
322
- _msg = "The submitted run was not successfully started"
323
338
  fnames = None
324
-
339
+ if job_and_run_status.completed_status == "finished":
340
+ _msg = "The submitted job exited successfully but failed to call wandb.init"
341
+ else:
342
+ _msg = "The submitted run was not successfully started"
325
343
  logs = job_and_run_status.run.get_logs()
326
344
  if logs:
327
345
  fnames = job_and_run_status.saver.save_contents(
@@ -331,7 +349,7 @@ class LaunchAgent:
331
349
  job_and_run_status.run_queue_item_id, _msg, "run", fnames
332
350
  )
333
351
  else:
334
- _logger.info("Finish thread id had no exception, ror run")
352
+ _logger.info(f"Finish thread id {thread_id} had no exception and no run")
335
353
  wandb._sentry.exception(
336
354
  "launch agent called finish thread id on thread without run or exception"
337
355
  )
@@ -5,8 +5,6 @@ import sys
5
5
  from typing import List, Optional, Union
6
6
 
7
7
  import wandb
8
- from wandb.sdk.lib import RunDisabled
9
- from wandb.sdk.wandb_run import Run
10
8
 
11
9
  if sys.version_info >= (3, 8):
12
10
  from typing import Literal
@@ -18,7 +16,11 @@ FileSubtypes = Literal["warning", "error"]
18
16
 
19
17
  class RunQueueItemFileSaver:
20
18
  def __init__(
21
- self, agent_run: Optional[Union[Run, RunDisabled]], run_queue_item_id: str
19
+ self,
20
+ agent_run: Optional[
21
+ Union["wandb.sdk.wandb_run.Run", "wandb.sdk.lib.RunDisabled"]
22
+ ],
23
+ run_queue_item_id: str,
22
24
  ):
23
25
  self.run_queue_item_id = run_queue_item_id
24
26
  self.run = agent_run
@@ -26,7 +28,7 @@ class RunQueueItemFileSaver:
26
28
  def save_contents(
27
29
  self, contents: str, fname: str, file_sub_type: FileSubtypes
28
30
  ) -> Optional[List[str]]:
29
- if not isinstance(self.run, Run):
31
+ if not isinstance(self.run, wandb.sdk.wandb_run.Run):
30
32
  wandb.termwarn("Not saving file contents because agent has no run")
31
33
  return None
32
34
  root_dir = self.run._settings.files_dir
@@ -13,7 +13,6 @@ from typing import Any, Dict, List, Optional, Union
13
13
  from dockerpycreds.utils import find_executable # type: ignore
14
14
 
15
15
  import wandb
16
- from wandb import Settings
17
16
  from wandb.apis.internal import Api
18
17
  from wandb.sdk.lib import runid
19
18
 
@@ -136,7 +135,6 @@ class AbstractRunner(ABC):
136
135
  api: Api,
137
136
  backend_config: Dict[str, Any],
138
137
  ) -> None:
139
- self._settings = Settings()
140
138
  self._api = api
141
139
  self.backend_config = backend_config
142
140
  self._cwd = os.getcwd()
@@ -0,0 +1,329 @@
1
+ import logging
2
+ from threading import Lock, Thread
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import urllib3
6
+ from kubernetes import watch # type: ignore # noqa: F401
7
+ from kubernetes.client import ( # type: ignore # noqa: F401
8
+ ApiException,
9
+ BatchV1Api,
10
+ CoreV1Api,
11
+ CustomObjectsApi,
12
+ V1PodStatus,
13
+ )
14
+
15
+ import wandb
16
+
17
+ from .abstract import State, Status
18
+
19
+ # Dict for mapping possible states of custom objects to the states we want to report
20
+ # to the agent.
21
+ CRD_STATE_DICT: Dict[str, State] = {
22
+ # Starting states.
23
+ "created": "starting",
24
+ "pending": "starting",
25
+ # Running states.
26
+ "running": "running",
27
+ "completing": "running",
28
+ # Finished states.
29
+ "succeeded": "finished",
30
+ "completed": "finished",
31
+ # Failed states.
32
+ "failed": "failed",
33
+ "aborted": "failed",
34
+ "timeout": "failed",
35
+ "terminated": "failed",
36
+ # Stopping states.
37
+ "terminating": "stopping",
38
+ }
39
+
40
+
41
+ _logger = logging.getLogger(__name__)
42
+
43
+
44
+ class SafeWatch:
45
+ """Wrapper for the kubernetes watch class that can recover in more situations."""
46
+
47
+ def __init__(self, watcher: "watch.Watch") -> None:
48
+ """Initialize the SafeWatch."""
49
+ self._watcher = watcher
50
+ self._last_seen_resource_version: Optional[str] = None
51
+ self._stopped = False
52
+
53
+ def stream(self, func: Any, *args: Any, **kwargs: Any) -> Any:
54
+ """Stream the watcher."""
55
+ while True:
56
+ try:
57
+ for event in self._watcher.stream(
58
+ func, *args, **kwargs, timeout_seconds=15
59
+ ):
60
+ if self._stopped:
61
+ break
62
+ # Save the resource version so that we can resume the stream
63
+ # if it breaks.
64
+ object = event.get("object")
65
+ if isinstance(object, dict):
66
+ self._last_seen_resource_version = object.get(
67
+ "metadata", dict()
68
+ ).get("resourceVersion")
69
+ else:
70
+ self._last_seen_resource_version = (
71
+ object.metadata.resource_version
72
+ )
73
+ kwargs["resource_version"] = self._last_seen_resource_version
74
+ yield event
75
+ # If stream ends after stop just break
76
+ if self._stopped:
77
+ break
78
+ except urllib3.exceptions.ProtocolError as e:
79
+ wandb.termwarn(f"Broken event stream: {e}")
80
+ except ApiException as e:
81
+ if e.status == 410:
82
+ # If resource version is too old we need to start over.
83
+ del kwargs["resource_version"]
84
+ self._last_seen_resource_version = None
85
+ except Exception as E:
86
+ wandb.termerror(f"Unknown exception in event stream: {E}")
87
+
88
+ def stop(self) -> None:
89
+ """Stop the watcher."""
90
+ self._watcher.stop()
91
+ self._stopped = True
92
+
93
+
94
+ def _is_preempted(status: "V1PodStatus") -> bool:
95
+ """Check if this pod has been preempted."""
96
+ if hasattr(status, "conditions") and status.conditions is not None:
97
+ for condition in status.conditions:
98
+ if condition.type == "DisruptionTarget" and condition.reason in [
99
+ "EvictionByEvictionAPI",
100
+ "PreemptionByScheduler",
101
+ "TerminationByKubelet",
102
+ ]:
103
+ return True
104
+ return False
105
+
106
+
107
+ def _is_container_creating(status: "V1PodStatus") -> bool:
108
+ """Check if this pod has started creating containers."""
109
+ for container_status in status.container_statuses or []:
110
+ if (
111
+ container_status.state
112
+ and container_status.state.waiting
113
+ and container_status.state.waiting.reason == "ContainerCreating"
114
+ ):
115
+ return True
116
+ return False
117
+
118
+
119
+ def _state_from_conditions(conditions: List[Dict[str, Any]]) -> Optional[str]:
120
+ """Get the status from the pod conditions."""
121
+ true_conditions = [
122
+ c.get("type", "").lower() for c in conditions if c.get("status") == "True"
123
+ ]
124
+ detected_states = {
125
+ CRD_STATE_DICT[c] for c in true_conditions if c in CRD_STATE_DICT
126
+ }
127
+ for state in ["finished", "failed", "stopping", "running", "starting"]:
128
+ if state in detected_states:
129
+ return state
130
+ return None
131
+
132
+
133
+ class KubernetesRunMonitor:
134
+ def __init__(
135
+ self,
136
+ job_field_selector: str,
137
+ pod_label_selector: str,
138
+ namespace: str,
139
+ batch_api: "BatchV1Api",
140
+ core_api: "CoreV1Api",
141
+ custom_api: "CustomObjectsApi" = None,
142
+ group: Optional[str] = None,
143
+ version: Optional[str] = None,
144
+ plural: Optional[str] = None,
145
+ ) -> None:
146
+ """Initialize KubernetesRunMonitor.
147
+
148
+ If a custom api is provided, the group, version, and plural arguments must also
149
+ be provided. These are used to query the custom api for a launched custom
150
+ object (CRD). Group, version, and plural in this context refer to the
151
+ Kubernetes API group, version, and plural for the CRD. For more information
152
+ see: https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/
153
+
154
+ The run monitor starts two threads to watch for pods and jobs/crds matching the
155
+ provided selectors. The status is set to "starting" when the run monitor is
156
+ initialized. The status is set to "running" when a pod matching the pod selector
157
+ is found with a status of "Running" or has a container with a status of
158
+ "ContainerCreating". The status is set to "finished" when a job matching the job
159
+ selector is found with a status of "Succeeded". The status is set to "failed"
160
+ when a job matching the job selector is found with a status of "Failed" or a pod
161
+ matching the pod selector is found with a status of "Failed". The status is set
162
+ to "preempted" when a pod matching the pod selector is found with a condition
163
+ type of "DisruptionTarget" and a reason of "EvictionByEvictionAPI",
164
+ "PreemptionByScheduler", or "TerminationByKubelet".
165
+
166
+ The logic for the CRD is similar to the logic for the job, but we inspect
167
+ both the phase of the CRD and the conditions since some CRDs do not have a
168
+ phase field.
169
+
170
+ Arguments:
171
+ job_field_selector: The field selector for the job or crd.
172
+ pod_label_selector: The label selector for the pods.
173
+ namespace: The namespace to monitor.
174
+ batch_api: The batch api client.
175
+ core_api: The core api client.
176
+ custom_api: The custom api client.
177
+ group: The group of the CRD.
178
+ version: The version of the CRD.
179
+ plural: The plural of the CRD.
180
+
181
+ Returns:
182
+ None.
183
+ """
184
+ self.pod_label_selector = pod_label_selector
185
+ self.job_field_selector = job_field_selector
186
+ self.namespace = namespace
187
+ self.batch_api = batch_api
188
+ self.core_api = core_api
189
+ self.custom_api = custom_api
190
+ self.group = group
191
+ self.version = version
192
+ self.plural = plural
193
+
194
+ self._status_lock = Lock()
195
+ self._status = Status("starting")
196
+
197
+ # Only one of the job or crd watchers will be used.
198
+ self._watch_job_thread = Thread(target=self._watch_job, daemon=True)
199
+ self._watch_crd_thread = Thread(target=self._watch_crd, daemon=True)
200
+
201
+ self._watch_pods_thread = Thread(target=self._watch_pods, daemon=True)
202
+
203
+ self._job_watcher = SafeWatch(watch.Watch())
204
+ self._pod_watcher = SafeWatch(watch.Watch())
205
+
206
+ def start(self) -> None:
207
+ """Start the run monitor."""
208
+ if self.custom_api is None:
209
+ self._watch_job_thread.start()
210
+ else:
211
+ self._watch_crd_thread.start()
212
+ self._watch_pods_thread.start()
213
+
214
+ def stop(self) -> None:
215
+ """Stop the run monitor."""
216
+ self._job_watcher.stop()
217
+ self._pod_watcher.stop()
218
+
219
+ def _set_status(self, status: Status) -> None:
220
+ """Set the run status."""
221
+ with self._status_lock:
222
+ self._status = status
223
+
224
+ def get_status(self) -> Status:
225
+ """Get the run status."""
226
+ with self._status_lock:
227
+ # Each time this is called we verify that our watchers are active.
228
+ if self._status.state in ["running", "starting"]:
229
+ if self.custom_api is None:
230
+ if not self._watch_job_thread.is_alive():
231
+ wandb.termwarn(
232
+ f"Job watcher thread is dead for {self.job_field_selector}"
233
+ )
234
+ self._watch_job_thread = Thread(
235
+ target=self._watch_job, daemon=True
236
+ )
237
+ self._watch_job_thread.start()
238
+ else:
239
+ if not self._watch_crd_thread.is_alive():
240
+ wandb.termwarn(
241
+ f"CRD watcher thread is dead for {self.job_field_selector}"
242
+ )
243
+ self._watch_crd_thread = Thread(
244
+ target=self._watch_crd, daemon=True
245
+ )
246
+ self._watch_crd_thread.start()
247
+ if not self._watch_pods_thread.is_alive():
248
+ wandb.termwarn(
249
+ f"Pod watcher thread is dead for {self.pod_label_selector}"
250
+ )
251
+ self._watch_pods_thread = Thread(
252
+ target=self._watch_pods, daemon=True
253
+ )
254
+ self._watch_pods_thread.start()
255
+ return self._status
256
+
257
+ def _watch_pods(self) -> None:
258
+ """Watch for pods created matching the jobname."""
259
+ # Stream with no timeout polling for pod status updates
260
+ for event in self._pod_watcher.stream(
261
+ self.core_api.list_namespaced_pod,
262
+ namespace=self.namespace,
263
+ label_selector=self.pod_label_selector,
264
+ ):
265
+ object = event.get("object")
266
+ # Sometimes ADDED events will be missing field.
267
+ if not hasattr(object, "status"):
268
+ continue
269
+ if object.status.phase == "Running":
270
+ self._set_status(Status("running"))
271
+ if _is_preempted(object.status):
272
+ self._set_status(Status("preempted"))
273
+ self.stop()
274
+ break
275
+ if _is_container_creating(object.status):
276
+ self._set_status(Status("running"))
277
+
278
+ def _watch_job(self) -> None:
279
+ """Watch for job matching the jobname."""
280
+ for event in self._job_watcher.stream(
281
+ self.batch_api.list_namespaced_job,
282
+ namespace=self.namespace,
283
+ field_selector=self.job_field_selector,
284
+ ):
285
+ object = event.get("object")
286
+ if object.status.succeeded == 1:
287
+ self._set_status(Status("finished"))
288
+ self.stop()
289
+ break
290
+ elif object.status.failed is not None and object.status.failed >= 1:
291
+ self._set_status(Status("failed"))
292
+ self.stop()
293
+ break
294
+
295
+ def _watch_crd(self) -> None:
296
+ """Watch for CRD matching the jobname."""
297
+ for event in self._job_watcher.stream(
298
+ self.custom_api.list_namespaced_custom_object,
299
+ namespace=self.namespace,
300
+ field_selector=self.job_field_selector,
301
+ group=self.group,
302
+ version=self.version,
303
+ plural=self.plural,
304
+ ):
305
+ object = event.get("object")
306
+ status = object.get("status")
307
+ if status is None:
308
+ continue
309
+ state = status.get("state")
310
+ if isinstance(state, dict):
311
+ raw_state = state.get("phase", "")
312
+ state = CRD_STATE_DICT.get(raw_state)
313
+ else:
314
+ conditions = status.get("conditions")
315
+ if isinstance(conditions, list):
316
+ state = _state_from_conditions(conditions)
317
+ else:
318
+ # This should never happen.
319
+ _logger.warning(
320
+ f"Unexpected conditions type {type(conditions)} "
321
+ f"for CRD {self.job_field_selector}: {conditions}"
322
+ )
323
+ if state is None:
324
+ continue
325
+ status = Status(state)
326
+ self._set_status(status)
327
+ if status.state in ["finished", "failed", "preempted"]:
328
+ self.stop()
329
+ break