wandb 0.16.5__py3-none-any.whl → 0.16.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import base64
3
+ import copy
3
4
  import json
4
5
  import logging
5
6
  import os
@@ -8,7 +9,7 @@ import tarfile
8
9
  import tempfile
9
10
  import time
10
11
  import traceback
11
- from typing import Optional
12
+ from typing import Any, Dict, Optional
12
13
 
13
14
  import wandb
14
15
  from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
@@ -105,6 +106,7 @@ class KanikoBuilder(AbstractBuilder):
105
106
  secret_name: str = "",
106
107
  secret_key: str = "",
107
108
  image: str = "gcr.io/kaniko-project/executor:v1.11.0",
109
+ config: Optional[dict] = None,
108
110
  ):
109
111
  """Initialize a KanikoBuilder.
110
112
 
@@ -125,6 +127,7 @@ class KanikoBuilder(AbstractBuilder):
125
127
  self.secret_name = secret_name
126
128
  self.secret_key = secret_key
127
129
  self.image = image
130
+ self.kaniko_config = config or {}
128
131
 
129
132
  @classmethod
130
133
  def from_config(
@@ -170,6 +173,7 @@ class KanikoBuilder(AbstractBuilder):
170
173
  image_uri = config.get("destination")
171
174
  if image_uri is not None:
172
175
  registry = registry_from_uri(image_uri)
176
+ kaniko_config = config.get("kaniko-config", {})
173
177
 
174
178
  return cls(
175
179
  environment,
@@ -179,6 +183,7 @@ class KanikoBuilder(AbstractBuilder):
179
183
  secret_name=secret_name,
180
184
  secret_key=secret_key,
181
185
  image=kaniko_image,
186
+ config=kaniko_config,
182
187
  )
183
188
 
184
189
  async def verify(self) -> None:
@@ -289,7 +294,7 @@ class KanikoBuilder(AbstractBuilder):
289
294
 
290
295
  build_context = await self._upload_build_context(run_id, context_path)
291
296
  build_job = await self._create_kaniko_job(
292
- build_job_name, repo_uri, image_uri, build_context, core_v1
297
+ build_job_name, repo_uri, image_uri, build_context, core_v1, api_client
293
298
  )
294
299
  wandb.termlog(f"{LOG_PREFIX}Created kaniko job {build_job_name}")
295
300
 
@@ -324,7 +329,9 @@ class KanikoBuilder(AbstractBuilder):
324
329
  ):
325
330
  if job_tracker:
326
331
  job_tracker.set_err_stage("build")
327
- raise Exception(f"Failed to build image in kaniko for job {run_id}")
332
+ raise Exception(
333
+ f"Failed to build image in kaniko for job {run_id}. View logs with `kubectl logs -n {NAMESPACE} {build_job_name}`."
334
+ )
328
335
  try:
329
336
  pods_from_job = await core_v1.list_namespaced_pod(
330
337
  namespace=NAMESPACE, label_selector=f"job-name={build_job_name}"
@@ -371,23 +378,32 @@ class KanikoBuilder(AbstractBuilder):
371
378
  image_tag: str,
372
379
  build_context_path: str,
373
380
  core_client: client.CoreV1Api,
374
- ) -> "client.V1Job":
375
- env = []
376
- volume_mounts = []
377
- volumes = []
381
+ api_client,
382
+ ) -> Dict[str, Any]:
383
+ job = copy.deepcopy(self.kaniko_config)
384
+ job_metadata = job.get("metadata", {})
385
+ job_labels = job_metadata.get("labels", {})
386
+ job_spec = job.get("spec", {})
387
+ pod_template = job_spec.get("template", {})
388
+ pod_metadata = pod_template.get("metadata", {})
389
+ pod_labels = pod_metadata.get("labels", {})
390
+ pod_spec = pod_template.get("spec", {})
391
+ volumes = pod_spec.get("volumes", [])
392
+ containers = pod_spec.get("containers") or [{}]
393
+ if len(containers) > 1:
394
+ raise LaunchError(
395
+ "Multiple container configs not supported for kaniko builder."
396
+ )
397
+ container = containers[0]
398
+ volume_mounts = container.get("volumeMounts", [])
399
+ env = container.get("env", [])
400
+ custom_args = container.get("args", [])
378
401
 
379
402
  if PVC_MOUNT_PATH:
380
403
  volumes.append(
381
- client.V1Volume(
382
- name="kaniko-pvc",
383
- persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
384
- claim_name=PVC_NAME
385
- ),
386
- )
387
- )
388
- volume_mounts.append(
389
- client.V1VolumeMount(name="kaniko-pvc", mount_path="/context")
404
+ {"name": "kaniko-pvc", "persistentVolumeClaim": {"claimName": PVC_NAME}}
390
405
  )
406
+ volume_mounts.append({"name": "kaniko-pvc", "mountPath": "/context"})
391
407
 
392
408
  if bool(self.secret_name) != bool(self.secret_key):
393
409
  raise LaunchError(
@@ -395,13 +411,13 @@ class KanikoBuilder(AbstractBuilder):
395
411
  "for kaniko build. You provided only one of them."
396
412
  )
397
413
  if isinstance(self.registry, ElasticContainerRegistry):
398
- env += [
399
- client.V1EnvVar(
400
- name="AWS_REGION",
401
- value=self.registry.region,
402
- )
403
- ]
404
- # TODO: Refactor all of this environment/registry
414
+ env.append(
415
+ {
416
+ "name": "AWS_REGION",
417
+ "value": self.registry.region,
418
+ }
419
+ )
420
+ # TODO(ben): Refactor all of this environment/registry
405
421
  # specific stuff into methods of those classes.
406
422
  if isinstance(self.environment, AzureEnvironment):
407
423
  # Use the core api to check if the secret exists
@@ -416,52 +432,46 @@ class KanikoBuilder(AbstractBuilder):
416
432
  "namespace wandb. Please create it with the key password "
417
433
  "set to your azure storage access key."
418
434
  ) from e
419
- env += [
420
- client.V1EnvVar(
421
- name="AZURE_STORAGE_ACCESS_KEY",
422
- value_from=client.V1EnvVarSource(
423
- secret_key_ref=client.V1SecretKeySelector(
424
- name="azure-storage-access-key",
425
- key="password",
426
- )
427
- ),
428
- )
429
- ]
435
+ env.append(
436
+ {
437
+ "name": "AZURE_STORAGE_ACCESS_KEY",
438
+ "valueFrom": {
439
+ "secretKeyRef": {
440
+ "name": "azure-storage-access-key",
441
+ "key": "password",
442
+ }
443
+ },
444
+ }
445
+ )
430
446
  if DOCKER_CONFIG_SECRET:
431
447
  volumes.append(
432
- client.V1Volume(
433
- name="kaniko-docker-config",
434
- secret=client.V1SecretVolumeSource(
435
- secret_name=DOCKER_CONFIG_SECRET,
436
- items=[
437
- client.V1KeyToPath(
438
- key=".dockerconfigjson", path="config.json"
439
- )
448
+ {
449
+ "name": "kaniko-docker-config",
450
+ "secret": {
451
+ "secretName": DOCKER_CONFIG_SECRET,
452
+ "items": [
453
+ {
454
+ "key": ".dockerconfigjson",
455
+ "path": "config.json",
456
+ }
440
457
  ],
441
- ),
442
- )
458
+ },
459
+ }
443
460
  )
444
461
  volume_mounts.append(
445
- client.V1VolumeMount(
446
- name="kaniko-docker-config",
447
- mount_path="/kaniko/.docker",
448
- )
462
+ {"name": "kaniko-docker-config", "mountPath": "/kaniko/.docker"}
449
463
  )
450
464
  elif self.secret_name and self.secret_key:
451
- volumes += [
452
- client.V1Volume(
453
- name="docker-config",
454
- config_map=client.V1ConfigMapVolumeSource(
455
- name=f"docker-config-{job_name}",
456
- ),
457
- ),
458
- ]
459
- volume_mounts += [
460
- client.V1VolumeMount(
461
- name="docker-config", mount_path="/kaniko/.docker/"
462
- ),
463
- ]
464
- # TODO: I don't like conditioning on the registry type here. As a
465
+ volumes.append(
466
+ {
467
+ "name": "docker-config",
468
+ "configMap": {"name": f"docker-config-{job_name}"},
469
+ }
470
+ )
471
+ volume_mounts.append(
472
+ {"name": "docker-config", "mountPath": "/kaniko/.docker"}
473
+ )
474
+ # TODO(ben): I don't like conditioning on the registry type here. As a
465
475
  # future change I want the registry and environment classes to provide
466
476
  # a list of environment variables and volume mounts that need to be
467
477
  # added to the job. The environment class provides credentials for
@@ -475,90 +485,95 @@ class KanikoBuilder(AbstractBuilder):
475
485
  elif isinstance(self.registry, GoogleArtifactRegistry):
476
486
  mount_path = "/kaniko/.config/gcloud"
477
487
  key = "config.json"
478
- env += [
479
- client.V1EnvVar(
480
- name="GOOGLE_APPLICATION_CREDENTIALS",
481
- value="/kaniko/.config/gcloud/config.json",
482
- )
483
- ]
488
+ env.append(
489
+ {
490
+ "name": "GOOGLE_APPLICATION_CREDENTIALS",
491
+ "value": "/kaniko/.config/gcloud/config.json",
492
+ }
493
+ )
484
494
  else:
485
495
  raise LaunchError(
486
496
  f"Registry type {type(self.registry)} not supported by kaniko"
487
497
  )
488
- volume_mounts += [
489
- client.V1VolumeMount(
490
- name=self.secret_name,
491
- mount_path=mount_path,
492
- read_only=True,
493
- )
494
- ]
495
- volumes += [
496
- client.V1Volume(
497
- name=self.secret_name,
498
- secret=client.V1SecretVolumeSource(
499
- secret_name=self.secret_name,
500
- items=[client.V1KeyToPath(key=self.secret_key, path=key)],
501
- ),
502
- )
503
- ]
498
+ volumes.append(
499
+ {
500
+ "name": self.secret_name,
501
+ "secret": {
502
+ "secretName": self.secret_name,
503
+ "items": [{"key": self.secret_key, "path": key}],
504
+ },
505
+ }
506
+ )
507
+ volume_mounts.append(
508
+ {
509
+ "name": self.secret_name,
510
+ "mountPath": mount_path,
511
+ "readOnly": True,
512
+ }
513
+ )
504
514
  if isinstance(self.registry, AzureContainerRegistry):
505
- # ADd the docker config map
506
- volume_mounts += [
507
- client.V1VolumeMount(
508
- name="docker-config", mount_path="/kaniko/.docker/"
509
- ),
510
- ]
511
- volumes += [
512
- client.V1Volume(
513
- name="docker-config",
514
- config_map=client.V1ConfigMapVolumeSource(
515
- name=f"docker-config-{job_name}",
516
- ),
517
- ),
518
- ]
515
+ # Add the docker config map
516
+ volumes.append(
517
+ {
518
+ "name": "docker-config",
519
+ "configMap": {"name": f"docker-config-{job_name}"},
520
+ }
521
+ )
522
+ volume_mounts.append(
523
+ {"name": "docker-config", "mountPath": "/kaniko/.docker/"}
524
+ )
519
525
  # Kaniko doesn't want https:// at the begining of the image tag.
520
526
  destination = image_tag
521
527
  if destination.startswith("https://"):
522
528
  destination = destination.replace("https://", "")
523
- args = [
524
- f"--context={build_context_path}",
525
- f"--dockerfile={_WANDB_DOCKERFILE_NAME}",
526
- f"--destination={destination}",
527
- "--cache=true",
528
- f"--cache-repo={repository.replace('https://', '')}",
529
- "--snapshotMode=redo",
530
- "--compressed-caching=false",
529
+ args = {
530
+ "--context": build_context_path,
531
+ "--dockerfile": _WANDB_DOCKERFILE_NAME,
532
+ "--destination": destination,
533
+ "--cache": "true",
534
+ "--cache-repo": repository.replace("https://", ""),
535
+ "--snapshot-mode": "redo",
536
+ "--compressed-caching": "false",
537
+ }
538
+ for custom_arg in custom_args:
539
+ arg_name, arg_value = custom_arg.split("=", 1)
540
+ args[arg_name] = arg_value
541
+ parsed_args = [
542
+ f"{arg_name}={arg_value}" for arg_name, arg_value in args.items()
531
543
  ]
532
- container = client.V1Container(
533
- name="wandb-container-build",
534
- image=self.image,
535
- args=args,
536
- volume_mounts=volume_mounts,
537
- env=env if env else None,
538
- )
539
- # Create and configure a spec section
540
- labels = {"wandb": "launch"}
544
+ container["args"] = parsed_args
545
+
546
+ # Apply the rest of our defaults
547
+ pod_labels["wandb"] = "launch"
541
548
  # This annotation is required to enable azure workload identity.
542
549
  if isinstance(self.registry, AzureContainerRegistry):
543
- labels["azure.workload.identity/use"] = "true"
544
- template = client.V1PodTemplateSpec(
545
- metadata=client.V1ObjectMeta(labels=labels),
546
- spec=client.V1PodSpec(
547
- restart_policy="Never",
548
- active_deadline_seconds=_DEFAULT_BUILD_TIMEOUT_SECS,
549
- containers=[container],
550
- volumes=volumes,
551
- service_account_name=SERVICE_ACCOUNT_NAME,
552
- ),
550
+ pod_labels["azure.workload.identity/use"] = "true"
551
+ pod_spec["restartPolicy"] = pod_spec.get("restartPolicy", "Never")
552
+ pod_spec["activeDeadlineSeconds"] = pod_spec.get(
553
+ "activeDeadlineSeconds", _DEFAULT_BUILD_TIMEOUT_SECS
553
554
  )
554
- # Create the specification of job
555
- spec = client.V1JobSpec(template=template, backoff_limit=0)
556
- job = client.V1Job(
557
- api_version="batch/v1",
558
- kind="Job",
559
- metadata=client.V1ObjectMeta(
560
- name=job_name, namespace=NAMESPACE, labels={"wandb": "launch"}
561
- ),
562
- spec=spec,
555
+ pod_spec["serviceAccountName"] = pod_spec.get(
556
+ "serviceAccountName", SERVICE_ACCOUNT_NAME
563
557
  )
558
+ job_spec["backoffLimit"] = job_spec.get("backoffLimit", 0)
559
+ job_labels["wandb"] = "launch"
560
+ job_metadata["namespace"] = job_metadata.get("namespace", NAMESPACE)
561
+ job_metadata["name"] = job_metadata.get("name", job_name)
562
+ job["apiVersion"] = "batch/v1"
563
+ job["kind"] = "Job"
564
+
565
+ # Apply all nested configs from the bottom up
566
+ pod_metadata["labels"] = pod_labels
567
+ pod_template["metadata"] = pod_metadata
568
+ container["name"] = container.get("name", "wandb-container-build")
569
+ container["image"] = container.get("image", self.image)
570
+ container["volumeMounts"] = volume_mounts
571
+ container["env"] = env
572
+ pod_spec["containers"] = [container]
573
+ pod_spec["volumes"] = volumes
574
+ pod_template["spec"] = pod_spec
575
+ job_spec["template"] = pod_template
576
+ job_metadata["labels"] = job_labels
577
+ job["metadata"] = job_metadata
578
+ job["spec"] = job_spec
564
579
  return job
@@ -11,7 +11,7 @@ from wandb.sdk.artifacts.artifact import Artifact
11
11
  from wandb.sdk.internal.job_builder import JobBuilder
12
12
  from wandb.sdk.launch.builder.build import get_current_python_version
13
13
  from wandb.sdk.launch.git_reference import GitReference
14
- from wandb.sdk.launch.utils import _is_git_uri
14
+ from wandb.sdk.launch.utils import _is_git_uri, get_entrypoint_file
15
15
  from wandb.sdk.lib import filesystem
16
16
  from wandb.util import make_artifact_name_safe
17
17
 
@@ -145,6 +145,7 @@ def _create_job(
145
145
 
146
146
  job_builder = _configure_job_builder_for_partial(tempdir.name, job_source=job_type)
147
147
  if job_type == "code":
148
+ assert entrypoint is not None
148
149
  job_name = _make_code_artifact(
149
150
  api=api,
150
151
  job_builder=job_builder,
@@ -233,7 +234,6 @@ def _make_metadata_for_partial_job(
233
234
  return metadata, None
234
235
 
235
236
  if job_type == "code":
236
- path, entrypoint = _handle_artifact_entrypoint(path, entrypoint)
237
237
  if not entrypoint:
238
238
  wandb.termerror(
239
239
  "Artifact jobs must have an entrypoint, either included in the path or specified with -E"
@@ -304,15 +304,22 @@ def _create_repo_metadata(
304
304
  with open(os.path.join(local_dir, ".python-version")) as f:
305
305
  python_version = f.read().strip().splitlines()[0]
306
306
  else:
307
- major, minor = get_current_python_version()
308
- python_version = f"{major}.{minor}"
307
+ _, python_version = get_current_python_version()
309
308
 
310
309
  python_version = _clean_python_version(python_version)
311
310
 
312
311
  # check if entrypoint is valid
313
312
  assert entrypoint is not None
314
- if not os.path.exists(os.path.join(local_dir, entrypoint)):
315
- wandb.termerror(f"Entrypoint {entrypoint} not found in git repo")
313
+ entrypoint_list = entrypoint.split(" ")
314
+ entrypoint_file = get_entrypoint_file(entrypoint_list)
315
+ if not entrypoint_file:
316
+ wandb.termerror(
317
+ f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
318
+ )
319
+ return None
320
+
321
+ if not os.path.exists(os.path.join(local_dir, entrypoint_file)):
322
+ wandb.termerror(f"Entrypoint file {entrypoint_file} not found in git repo")
316
323
  return None
317
324
 
318
325
  metadata = {
@@ -320,9 +327,9 @@ def _create_repo_metadata(
320
327
  "commit": commit,
321
328
  "remote": ref.url,
322
329
  },
323
- "codePathLocal": entrypoint, # not in git context, optionally also set local
324
- "codePath": entrypoint,
325
- "entrypoint": [f"python{python_version}", entrypoint],
330
+ "codePathLocal": entrypoint_file, # not in git context, optionally also set local
331
+ "codePath": entrypoint_file,
332
+ "entrypoint": entrypoint_list,
326
333
  "python": python_version, # used to build container
327
334
  "notebook": False, # partial jobs from notebooks not supported
328
335
  }
@@ -332,10 +339,17 @@ def _create_repo_metadata(
332
339
 
333
340
  def _create_artifact_metadata(
334
341
  path: str, entrypoint: str, runtime: Optional[str] = None
335
- ) -> Tuple[Dict[str, Any], List[str]]:
342
+ ) -> Tuple[Optional[Dict[str, Any]], Optional[List[str]]]:
336
343
  if not os.path.isdir(path):
337
344
  wandb.termerror("Path must be a valid file or directory")
338
345
  return {}, []
346
+ entrypoint_list = entrypoint.split(" ")
347
+ entrypoint_file = get_entrypoint_file(entrypoint_list)
348
+ if not entrypoint_file:
349
+ wandb.termerror(
350
+ f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
351
+ )
352
+ return None, None
339
353
 
340
354
  # read local requirements.txt and dump to temp dir for builder
341
355
  requirements = []
@@ -347,41 +361,17 @@ def _create_artifact_metadata(
347
361
  if runtime:
348
362
  python_version = _clean_python_version(runtime)
349
363
  else:
350
- python_version = ".".join(get_current_python_version())
364
+ python_version, _ = get_current_python_version()
365
+ python_version = _clean_python_version(python_version)
351
366
 
352
- metadata = {"python": python_version, "codePath": entrypoint}
367
+ metadata = {
368
+ "python": python_version,
369
+ "codePath": entrypoint_file,
370
+ "entrypoint": entrypoint_list,
371
+ }
353
372
  return metadata, requirements
354
373
 
355
374
 
356
- def _handle_artifact_entrypoint(
357
- path: str, entrypoint: Optional[str] = None
358
- ) -> Tuple[str, Optional[str]]:
359
- if os.path.isfile(path):
360
- if entrypoint and path.endswith(entrypoint):
361
- path = path.replace(entrypoint, "")
362
- wandb.termwarn(
363
- f"Both entrypoint provided and path contains file. Using provided entrypoint: {entrypoint}, path is now: {path}"
364
- )
365
- elif entrypoint:
366
- wandb.termwarn(
367
- f"Ignoring passed in entrypoint as it does not match file path found in 'path'. Path entrypoint: {path.split('/')[-1]}"
368
- )
369
- entrypoint = path.split("/")[-1]
370
- path = "/".join(path.split("/")[:-1])
371
- elif not entrypoint:
372
- wandb.termerror("Entrypoint not valid")
373
- return "", None
374
- path = path or "." # when path is just an entrypoint, use cdw
375
-
376
- if not os.path.exists(os.path.join(path, entrypoint)):
377
- wandb.termerror(
378
- f"Could not find execution point: {os.path.join(path, entrypoint)}"
379
- )
380
- return "", None
381
-
382
- return path, entrypoint
383
-
384
-
385
375
  def _configure_job_builder_for_partial(tmpdir: str, job_source: str) -> JobBuilder:
386
376
  """Configure job builder with temp dir and job source."""
387
377
  # adjust git source to repo
@@ -411,7 +401,7 @@ def _make_code_artifact(
411
401
  job_builder: JobBuilder,
412
402
  run: "wandb.sdk.wandb_run.Run",
413
403
  path: str,
414
- entrypoint: Optional[str],
404
+ entrypoint: str,
415
405
  entity: Optional[str],
416
406
  project: Optional[str],
417
407
  name: Optional[str],
@@ -420,17 +410,22 @@ def _make_code_artifact(
420
410
 
421
411
  Returns the name of the eventual job.
422
412
  """
423
- artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint or ""), name)
413
+ assert entrypoint is not None
414
+ entrypoint_list = entrypoint.split(" ")
415
+ entrypoint_file = get_entrypoint_file(entrypoint_list)
416
+ if not entrypoint_file:
417
+ wandb.termerror(
418
+ f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
419
+ )
420
+ return None
421
+
422
+ artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint_file), name)
424
423
  code_artifact = wandb.Artifact(
425
424
  name=artifact_name,
426
425
  type="code",
427
426
  description="Code artifact for job",
428
427
  )
429
428
 
430
- # Update path and entrypoint vars to match metadata
431
- # TODO(gst): consolidate into one place
432
- path, entrypoint = _handle_artifact_entrypoint(path, entrypoint)
433
-
434
429
  try:
435
430
  code_artifact.add_dir(path)
436
431
  except Exception as e:
@@ -451,7 +446,7 @@ def _make_code_artifact(
451
446
  project_name=project,
452
447
  run_name=run.id, # run will be deleted after creation
453
448
  description="Code artifact for job",
454
- metadata={"codePath": path, "entrypoint": entrypoint},
449
+ metadata={"codePath": path, "entrypoint": entrypoint_file},
455
450
  is_user_created=True,
456
451
  aliases=[
457
452
  {"artifactCollectionName": artifact_name, "alias": a} for a in ["latest"]
@@ -433,6 +433,8 @@ class SafeWatch:
433
433
  del kwargs["resource_version"]
434
434
  self._last_seen_resource_version = None
435
435
  except Exception as E:
436
+ exc_type = type(E).__name__
437
+ stack_trace = traceback.format_exc()
436
438
  wandb.termerror(
437
- f"Unknown exception in event stream: {E}, attempting to recover"
439
+ f"Unknown exception in event stream of type {exc_type}: {E}, attempting to recover. Stack trace: {stack_trace}"
438
440
  )
@@ -157,7 +157,9 @@ class Scheduler(ABC):
157
157
  self._runs: Dict[str, SweepRun] = {}
158
158
  # Threading lock to ensure thread-safe access to the runs dictionary
159
159
  self._threading_lock: threading.Lock = threading.Lock()
160
- self._polling_sleep = polling_sleep or DEFAULT_POLLING_SLEEP
160
+ self._polling_sleep = (
161
+ polling_sleep if polling_sleep is not None else DEFAULT_POLLING_SLEEP
162
+ )
161
163
  self._project_queue = project_queue
162
164
  # Optionally run multiple workers in (pseudo-)parallel. Workers do not
163
165
  # actually run training workloads, they simply send heartbeat messages
wandb/sdk/launch/utils.py CHANGED
@@ -846,3 +846,21 @@ def fetch_and_validate_template_variables(
846
846
  raise LaunchError(f"Value for {key} must be of type {field_type}.")
847
847
  template_variables[key] = val
848
848
  return template_variables
849
+
850
+
851
+ def get_entrypoint_file(entrypoint: List[str]) -> Optional[str]:
852
+ """Get the entrypoint file from the given command.
853
+
854
+ Args:
855
+ entrypoint (List[str]): List of command and arguments.
856
+
857
+ Returns:
858
+ Optional[str]: The entrypoint file if found, otherwise None.
859
+ """
860
+ if not entrypoint:
861
+ return None
862
+ if entrypoint[0].endswith(".py") or entrypoint[0].endswith(".sh"):
863
+ return entrypoint[0]
864
+ if len(entrypoint) < 2:
865
+ return None
866
+ return entrypoint[1]
@@ -22,6 +22,7 @@ _Setting = Literal[
22
22
  "_disable_service",
23
23
  "_disable_setproctitle",
24
24
  "_disable_stats",
25
+ "_disable_update_check",
25
26
  "_disable_viewer",
26
27
  "_disable_machine_info",
27
28
  "_except_exit",
@@ -1,7 +1,13 @@
1
+ import sys
1
2
  from dataclasses import dataclass
2
- from typing import Literal, Union, cast
3
+ from typing import Union, cast
3
4
  from urllib import parse
4
5
 
6
+ if sys.version_info >= (3, 8):
7
+ from typing import Literal
8
+ else:
9
+ from typing_extensions import Literal
10
+
5
11
  _STEP = Literal["_step"]
6
12
 
7
13
 
wandb/sdk/wandb_init.py CHANGED
@@ -195,12 +195,6 @@ class _WandbInit:
195
195
  # Start with settings from wandb library singleton
196
196
  settings: Settings = self._wl.settings.copy()
197
197
 
198
- # when using launch, we don't want to reuse the same run id from the singleton
199
- # since users might launch multiple runs in the same process
200
- # TODO(kdg): allow users to control this via launch settings
201
- if settings.launch and singleton is not None:
202
- settings.update({"run_id": None}, source=Source.INIT)
203
-
204
198
  settings_param = kwargs.pop("settings", None)
205
199
  if settings_param is not None and isinstance(settings_param, (Settings, dict)):
206
200
  settings.update(settings_param, source=Source.INIT)
@@ -1124,10 +1118,10 @@ def init(
1124
1118
  for saving hyperparameters to compare across runs. The ID cannot
1125
1119
  contain the following special characters: `/\#?%:`.
1126
1120
  See [our guide to resuming runs](https://docs.wandb.com/guides/runs/resuming).
1127
- fork_from: (str, optional) A string with the format <run_id>?_step=<step> describing
1121
+ fork_from: (str, optional) A string with the format {run_id}?_step={step} describing
1128
1122
  a moment in a previous run to fork a new run from. Creates a new run that picks up
1129
1123
  logging history from the specified run at the specified moment. The target run must
1130
- be in the current project.
1124
+ be in the current project. Example: `fork_from="my-run-id?_step=1234"`.
1131
1125
 
1132
1126
  Examples:
1133
1127
  ### Set where the run is logged