wandb 0.16.5__py3-none-any.whl → 0.17.0rc1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. package_readme.md +95 -0
  2. wandb/__init__.py +2 -2
  3. wandb/agents/pyagent.py +0 -1
  4. wandb/analytics/sentry.py +2 -1
  5. wandb/apis/importers/internals/protocols.py +30 -56
  6. wandb/apis/importers/mlflow.py +13 -26
  7. wandb/apis/importers/wandb.py +8 -14
  8. wandb/apis/public/api.py +1 -0
  9. wandb/apis/public/artifacts.py +1 -0
  10. wandb/apis/public/files.py +1 -0
  11. wandb/apis/public/history.py +1 -0
  12. wandb/apis/public/jobs.py +1 -0
  13. wandb/apis/public/projects.py +1 -0
  14. wandb/apis/public/reports.py +1 -0
  15. wandb/apis/public/runs.py +1 -0
  16. wandb/apis/public/sweeps.py +1 -0
  17. wandb/apis/public/teams.py +1 -0
  18. wandb/apis/public/users.py +1 -0
  19. wandb/apis/reports/v1/_blocks.py +2 -6
  20. wandb/apis/reports/v2/gql.py +1 -0
  21. wandb/apis/reports/v2/interface.py +3 -4
  22. wandb/apis/reports/v2/internal.py +5 -8
  23. wandb/cli/cli.py +7 -4
  24. wandb/data_types.py +3 -3
  25. wandb/env.py +35 -5
  26. wandb/errors/__init__.py +5 -0
  27. wandb/integration/catboost/catboost.py +1 -1
  28. wandb/integration/fastai/__init__.py +1 -0
  29. wandb/integration/keras/__init__.py +1 -0
  30. wandb/integration/keras/keras.py +6 -6
  31. wandb/integration/langchain/wandb_tracer.py +1 -0
  32. wandb/integration/lightning/fabric/logger.py +1 -3
  33. wandb/integration/metaflow/metaflow.py +41 -6
  34. wandb/integration/openai/fine_tuning.py +77 -40
  35. wandb/keras/__init__.py +1 -0
  36. wandb/proto/v3/wandb_internal_pb2.py +364 -332
  37. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  38. wandb/proto/v4/wandb_internal_pb2.py +322 -316
  39. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  40. wandb/proto/wandb_internal_codegen.py +0 -25
  41. wandb/sdk/artifacts/artifact.py +41 -13
  42. wandb/sdk/artifacts/artifact_download_logger.py +1 -0
  43. wandb/sdk/artifacts/artifact_file_cache.py +18 -4
  44. wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
  45. wandb/sdk/artifacts/artifact_manifest.py +1 -0
  46. wandb/sdk/artifacts/artifact_manifest_entry.py +1 -0
  47. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
  48. wandb/sdk/artifacts/artifact_saver.py +21 -21
  49. wandb/sdk/artifacts/artifact_state.py +1 -0
  50. wandb/sdk/artifacts/artifact_ttl.py +1 -0
  51. wandb/sdk/artifacts/exceptions.py +1 -0
  52. wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
  53. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
  54. wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
  55. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
  56. wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
  57. wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
  58. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
  59. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
  60. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
  61. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +1 -0
  62. wandb/sdk/artifacts/storage_policy.py +1 -0
  63. wandb/sdk/data_types/base_types/media.py +3 -6
  64. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
  65. wandb/sdk/integration_utils/auto_logging.py +5 -6
  66. wandb/sdk/integration_utils/data_logging.py +5 -1
  67. wandb/sdk/interface/interface.py +72 -37
  68. wandb/sdk/interface/interface_shared.py +7 -13
  69. wandb/sdk/internal/datastore.py +1 -1
  70. wandb/sdk/internal/handler.py +18 -2
  71. wandb/sdk/internal/internal.py +0 -1
  72. wandb/sdk/internal/internal_util.py +0 -1
  73. wandb/sdk/internal/job_builder.py +4 -3
  74. wandb/sdk/internal/profiler.py +1 -0
  75. wandb/sdk/internal/run.py +1 -0
  76. wandb/sdk/internal/sender.py +1 -1
  77. wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
  78. wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
  79. wandb/sdk/internal/system/assets/interfaces.py +6 -8
  80. wandb/sdk/internal/system/assets/open_metrics.py +2 -2
  81. wandb/sdk/internal/system/assets/trainium.py +1 -3
  82. wandb/sdk/launch/_launch.py +5 -0
  83. wandb/sdk/launch/_project_spec.py +10 -23
  84. wandb/sdk/launch/agent/agent.py +81 -37
  85. wandb/sdk/launch/agent/config.py +80 -11
  86. wandb/sdk/launch/builder/abstract.py +1 -0
  87. wandb/sdk/launch/builder/build.py +28 -1
  88. wandb/sdk/launch/builder/docker_builder.py +1 -0
  89. wandb/sdk/launch/builder/kaniko_builder.py +149 -134
  90. wandb/sdk/launch/builder/noop.py +1 -0
  91. wandb/sdk/launch/create_job.py +61 -48
  92. wandb/sdk/launch/environment/abstract.py +1 -0
  93. wandb/sdk/launch/environment/gcp_environment.py +1 -0
  94. wandb/sdk/launch/environment/local_environment.py +1 -0
  95. wandb/sdk/launch/loader.py +1 -0
  96. wandb/sdk/launch/registry/abstract.py +1 -0
  97. wandb/sdk/launch/registry/azure_container_registry.py +1 -0
  98. wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
  99. wandb/sdk/launch/registry/google_artifact_registry.py +1 -0
  100. wandb/sdk/launch/registry/local_registry.py +1 -0
  101. wandb/sdk/launch/runner/abstract.py +1 -0
  102. wandb/sdk/launch/runner/kubernetes_monitor.py +4 -1
  103. wandb/sdk/launch/runner/kubernetes_runner.py +4 -3
  104. wandb/sdk/launch/runner/sagemaker_runner.py +11 -10
  105. wandb/sdk/launch/sweeps/scheduler.py +4 -1
  106. wandb/sdk/launch/sweeps/scheduler_sweep.py +1 -0
  107. wandb/sdk/launch/sweeps/utils.py +1 -1
  108. wandb/sdk/launch/utils.py +21 -3
  109. wandb/sdk/lib/_settings_toposort_generated.py +1 -0
  110. wandb/sdk/lib/fsm.py +8 -12
  111. wandb/sdk/lib/gitlib.py +4 -4
  112. wandb/sdk/lib/lazyloader.py +0 -1
  113. wandb/sdk/lib/proto_util.py +1 -1
  114. wandb/sdk/lib/retry.py +3 -2
  115. wandb/sdk/lib/run_moment.py +7 -1
  116. wandb/sdk/service/service.py +17 -15
  117. wandb/sdk/verify/verify.py +2 -1
  118. wandb/sdk/wandb_init.py +2 -8
  119. wandb/sdk/wandb_manager.py +2 -2
  120. wandb/sdk/wandb_require.py +5 -0
  121. wandb/sdk/wandb_run.py +64 -46
  122. wandb/sdk/wandb_settings.py +2 -1
  123. wandb/sklearn/__init__.py +1 -0
  124. wandb/sklearn/plot/__init__.py +1 -0
  125. wandb/sklearn/plot/classifier.py +1 -0
  126. wandb/sklearn/plot/clusterer.py +1 -0
  127. wandb/sklearn/plot/regressor.py +1 -0
  128. wandb/sklearn/plot/shared.py +1 -0
  129. wandb/sklearn/utils.py +1 -0
  130. wandb/testing/relay.py +4 -4
  131. wandb/trigger.py +1 -0
  132. wandb/util.py +40 -17
  133. wandb/wandb_controller.py +0 -1
  134. wandb/wandb_torch.py +1 -2
  135. {wandb-0.16.5.dist-info → wandb-0.17.0rc1.dist-info}/METADATA +68 -69
  136. {wandb-0.16.5.dist-info → wandb-0.17.0rc1.dist-info}/RECORD +139 -140
  137. {wandb-0.16.5.dist-info → wandb-0.17.0rc1.dist-info}/WHEEL +1 -2
  138. wandb/bin/apple_gpu_stats +0 -0
  139. wandb-0.16.5.dist-info/top_level.txt +0 -1
  140. {wandb-0.16.5.dist-info → wandb-0.17.0rc1.dist-info}/entry_points.txt +0 -0
  141. {wandb-0.16.5.dist-info → wandb-0.17.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import base64
3
+ import copy
3
4
  import json
4
5
  import logging
5
6
  import os
@@ -8,7 +9,7 @@ import tarfile
8
9
  import tempfile
9
10
  import time
10
11
  import traceback
11
- from typing import Optional
12
+ from typing import Any, Dict, Optional
12
13
 
13
14
  import wandb
14
15
  from wandb.sdk.launch.agent.job_status_tracker import JobAndRunStatusTracker
@@ -105,6 +106,7 @@ class KanikoBuilder(AbstractBuilder):
105
106
  secret_name: str = "",
106
107
  secret_key: str = "",
107
108
  image: str = "gcr.io/kaniko-project/executor:v1.11.0",
109
+ config: Optional[dict] = None,
108
110
  ):
109
111
  """Initialize a KanikoBuilder.
110
112
 
@@ -125,6 +127,7 @@ class KanikoBuilder(AbstractBuilder):
125
127
  self.secret_name = secret_name
126
128
  self.secret_key = secret_key
127
129
  self.image = image
130
+ self.kaniko_config = config or {}
128
131
 
129
132
  @classmethod
130
133
  def from_config(
@@ -170,6 +173,7 @@ class KanikoBuilder(AbstractBuilder):
170
173
  image_uri = config.get("destination")
171
174
  if image_uri is not None:
172
175
  registry = registry_from_uri(image_uri)
176
+ kaniko_config = config.get("kaniko-config", {})
173
177
 
174
178
  return cls(
175
179
  environment,
@@ -179,6 +183,7 @@ class KanikoBuilder(AbstractBuilder):
179
183
  secret_name=secret_name,
180
184
  secret_key=secret_key,
181
185
  image=kaniko_image,
186
+ config=kaniko_config,
182
187
  )
183
188
 
184
189
  async def verify(self) -> None:
@@ -289,7 +294,7 @@ class KanikoBuilder(AbstractBuilder):
289
294
 
290
295
  build_context = await self._upload_build_context(run_id, context_path)
291
296
  build_job = await self._create_kaniko_job(
292
- build_job_name, repo_uri, image_uri, build_context, core_v1
297
+ build_job_name, repo_uri, image_uri, build_context, core_v1, api_client
293
298
  )
294
299
  wandb.termlog(f"{LOG_PREFIX}Created kaniko job {build_job_name}")
295
300
 
@@ -324,7 +329,9 @@ class KanikoBuilder(AbstractBuilder):
324
329
  ):
325
330
  if job_tracker:
326
331
  job_tracker.set_err_stage("build")
327
- raise Exception(f"Failed to build image in kaniko for job {run_id}")
332
+ raise Exception(
333
+ f"Failed to build image in kaniko for job {run_id}. View logs with `kubectl logs -n {NAMESPACE} {build_job_name}`."
334
+ )
328
335
  try:
329
336
  pods_from_job = await core_v1.list_namespaced_pod(
330
337
  namespace=NAMESPACE, label_selector=f"job-name={build_job_name}"
@@ -371,23 +378,32 @@ class KanikoBuilder(AbstractBuilder):
371
378
  image_tag: str,
372
379
  build_context_path: str,
373
380
  core_client: client.CoreV1Api,
374
- ) -> "client.V1Job":
375
- env = []
376
- volume_mounts = []
377
- volumes = []
381
+ api_client,
382
+ ) -> Dict[str, Any]:
383
+ job = copy.deepcopy(self.kaniko_config)
384
+ job_metadata = job.get("metadata", {})
385
+ job_labels = job_metadata.get("labels", {})
386
+ job_spec = job.get("spec", {})
387
+ pod_template = job_spec.get("template", {})
388
+ pod_metadata = pod_template.get("metadata", {})
389
+ pod_labels = pod_metadata.get("labels", {})
390
+ pod_spec = pod_template.get("spec", {})
391
+ volumes = pod_spec.get("volumes", [])
392
+ containers = pod_spec.get("containers") or [{}]
393
+ if len(containers) > 1:
394
+ raise LaunchError(
395
+ "Multiple container configs not supported for kaniko builder."
396
+ )
397
+ container = containers[0]
398
+ volume_mounts = container.get("volumeMounts", [])
399
+ env = container.get("env", [])
400
+ custom_args = container.get("args", [])
378
401
 
379
402
  if PVC_MOUNT_PATH:
380
403
  volumes.append(
381
- client.V1Volume(
382
- name="kaniko-pvc",
383
- persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
384
- claim_name=PVC_NAME
385
- ),
386
- )
387
- )
388
- volume_mounts.append(
389
- client.V1VolumeMount(name="kaniko-pvc", mount_path="/context")
404
+ {"name": "kaniko-pvc", "persistentVolumeClaim": {"claimName": PVC_NAME}}
390
405
  )
406
+ volume_mounts.append({"name": "kaniko-pvc", "mountPath": "/context"})
391
407
 
392
408
  if bool(self.secret_name) != bool(self.secret_key):
393
409
  raise LaunchError(
@@ -395,13 +411,13 @@ class KanikoBuilder(AbstractBuilder):
395
411
  "for kaniko build. You provided only one of them."
396
412
  )
397
413
  if isinstance(self.registry, ElasticContainerRegistry):
398
- env += [
399
- client.V1EnvVar(
400
- name="AWS_REGION",
401
- value=self.registry.region,
402
- )
403
- ]
404
- # TODO: Refactor all of this environment/registry
414
+ env.append(
415
+ {
416
+ "name": "AWS_REGION",
417
+ "value": self.registry.region,
418
+ }
419
+ )
420
+ # TODO(ben): Refactor all of this environment/registry
405
421
  # specific stuff into methods of those classes.
406
422
  if isinstance(self.environment, AzureEnvironment):
407
423
  # Use the core api to check if the secret exists
@@ -416,52 +432,46 @@ class KanikoBuilder(AbstractBuilder):
416
432
  "namespace wandb. Please create it with the key password "
417
433
  "set to your azure storage access key."
418
434
  ) from e
419
- env += [
420
- client.V1EnvVar(
421
- name="AZURE_STORAGE_ACCESS_KEY",
422
- value_from=client.V1EnvVarSource(
423
- secret_key_ref=client.V1SecretKeySelector(
424
- name="azure-storage-access-key",
425
- key="password",
426
- )
427
- ),
428
- )
429
- ]
435
+ env.append(
436
+ {
437
+ "name": "AZURE_STORAGE_ACCESS_KEY",
438
+ "valueFrom": {
439
+ "secretKeyRef": {
440
+ "name": "azure-storage-access-key",
441
+ "key": "password",
442
+ }
443
+ },
444
+ }
445
+ )
430
446
  if DOCKER_CONFIG_SECRET:
431
447
  volumes.append(
432
- client.V1Volume(
433
- name="kaniko-docker-config",
434
- secret=client.V1SecretVolumeSource(
435
- secret_name=DOCKER_CONFIG_SECRET,
436
- items=[
437
- client.V1KeyToPath(
438
- key=".dockerconfigjson", path="config.json"
439
- )
448
+ {
449
+ "name": "kaniko-docker-config",
450
+ "secret": {
451
+ "secretName": DOCKER_CONFIG_SECRET,
452
+ "items": [
453
+ {
454
+ "key": ".dockerconfigjson",
455
+ "path": "config.json",
456
+ }
440
457
  ],
441
- ),
442
- )
458
+ },
459
+ }
443
460
  )
444
461
  volume_mounts.append(
445
- client.V1VolumeMount(
446
- name="kaniko-docker-config",
447
- mount_path="/kaniko/.docker",
448
- )
462
+ {"name": "kaniko-docker-config", "mountPath": "/kaniko/.docker"}
449
463
  )
450
464
  elif self.secret_name and self.secret_key:
451
- volumes += [
452
- client.V1Volume(
453
- name="docker-config",
454
- config_map=client.V1ConfigMapVolumeSource(
455
- name=f"docker-config-{job_name}",
456
- ),
457
- ),
458
- ]
459
- volume_mounts += [
460
- client.V1VolumeMount(
461
- name="docker-config", mount_path="/kaniko/.docker/"
462
- ),
463
- ]
464
- # TODO: I don't like conditioning on the registry type here. As a
465
+ volumes.append(
466
+ {
467
+ "name": "docker-config",
468
+ "configMap": {"name": f"docker-config-{job_name}"},
469
+ }
470
+ )
471
+ volume_mounts.append(
472
+ {"name": "docker-config", "mountPath": "/kaniko/.docker"}
473
+ )
474
+ # TODO(ben): I don't like conditioning on the registry type here. As a
465
475
  # future change I want the registry and environment classes to provide
466
476
  # a list of environment variables and volume mounts that need to be
467
477
  # added to the job. The environment class provides credentials for
@@ -475,90 +485,95 @@ class KanikoBuilder(AbstractBuilder):
475
485
  elif isinstance(self.registry, GoogleArtifactRegistry):
476
486
  mount_path = "/kaniko/.config/gcloud"
477
487
  key = "config.json"
478
- env += [
479
- client.V1EnvVar(
480
- name="GOOGLE_APPLICATION_CREDENTIALS",
481
- value="/kaniko/.config/gcloud/config.json",
482
- )
483
- ]
488
+ env.append(
489
+ {
490
+ "name": "GOOGLE_APPLICATION_CREDENTIALS",
491
+ "value": "/kaniko/.config/gcloud/config.json",
492
+ }
493
+ )
484
494
  else:
485
495
  raise LaunchError(
486
496
  f"Registry type {type(self.registry)} not supported by kaniko"
487
497
  )
488
- volume_mounts += [
489
- client.V1VolumeMount(
490
- name=self.secret_name,
491
- mount_path=mount_path,
492
- read_only=True,
493
- )
494
- ]
495
- volumes += [
496
- client.V1Volume(
497
- name=self.secret_name,
498
- secret=client.V1SecretVolumeSource(
499
- secret_name=self.secret_name,
500
- items=[client.V1KeyToPath(key=self.secret_key, path=key)],
501
- ),
502
- )
503
- ]
498
+ volumes.append(
499
+ {
500
+ "name": self.secret_name,
501
+ "secret": {
502
+ "secretName": self.secret_name,
503
+ "items": [{"key": self.secret_key, "path": key}],
504
+ },
505
+ }
506
+ )
507
+ volume_mounts.append(
508
+ {
509
+ "name": self.secret_name,
510
+ "mountPath": mount_path,
511
+ "readOnly": True,
512
+ }
513
+ )
504
514
  if isinstance(self.registry, AzureContainerRegistry):
505
- # ADd the docker config map
506
- volume_mounts += [
507
- client.V1VolumeMount(
508
- name="docker-config", mount_path="/kaniko/.docker/"
509
- ),
510
- ]
511
- volumes += [
512
- client.V1Volume(
513
- name="docker-config",
514
- config_map=client.V1ConfigMapVolumeSource(
515
- name=f"docker-config-{job_name}",
516
- ),
517
- ),
518
- ]
515
+ # Add the docker config map
516
+ volumes.append(
517
+ {
518
+ "name": "docker-config",
519
+ "configMap": {"name": f"docker-config-{job_name}"},
520
+ }
521
+ )
522
+ volume_mounts.append(
523
+ {"name": "docker-config", "mountPath": "/kaniko/.docker/"}
524
+ )
519
525
  # Kaniko doesn't want https:// at the begining of the image tag.
520
526
  destination = image_tag
521
527
  if destination.startswith("https://"):
522
528
  destination = destination.replace("https://", "")
523
- args = [
524
- f"--context={build_context_path}",
525
- f"--dockerfile={_WANDB_DOCKERFILE_NAME}",
526
- f"--destination={destination}",
527
- "--cache=true",
528
- f"--cache-repo={repository.replace('https://', '')}",
529
- "--snapshotMode=redo",
530
- "--compressed-caching=false",
529
+ args = {
530
+ "--context": build_context_path,
531
+ "--dockerfile": _WANDB_DOCKERFILE_NAME,
532
+ "--destination": destination,
533
+ "--cache": "true",
534
+ "--cache-repo": repository.replace("https://", ""),
535
+ "--snapshot-mode": "redo",
536
+ "--compressed-caching": "false",
537
+ }
538
+ for custom_arg in custom_args:
539
+ arg_name, arg_value = custom_arg.split("=", 1)
540
+ args[arg_name] = arg_value
541
+ parsed_args = [
542
+ f"{arg_name}={arg_value}" for arg_name, arg_value in args.items()
531
543
  ]
532
- container = client.V1Container(
533
- name="wandb-container-build",
534
- image=self.image,
535
- args=args,
536
- volume_mounts=volume_mounts,
537
- env=env if env else None,
538
- )
539
- # Create and configure a spec section
540
- labels = {"wandb": "launch"}
544
+ container["args"] = parsed_args
545
+
546
+ # Apply the rest of our defaults
547
+ pod_labels["wandb"] = "launch"
541
548
  # This annotation is required to enable azure workload identity.
542
549
  if isinstance(self.registry, AzureContainerRegistry):
543
- labels["azure.workload.identity/use"] = "true"
544
- template = client.V1PodTemplateSpec(
545
- metadata=client.V1ObjectMeta(labels=labels),
546
- spec=client.V1PodSpec(
547
- restart_policy="Never",
548
- active_deadline_seconds=_DEFAULT_BUILD_TIMEOUT_SECS,
549
- containers=[container],
550
- volumes=volumes,
551
- service_account_name=SERVICE_ACCOUNT_NAME,
552
- ),
550
+ pod_labels["azure.workload.identity/use"] = "true"
551
+ pod_spec["restartPolicy"] = pod_spec.get("restartPolicy", "Never")
552
+ pod_spec["activeDeadlineSeconds"] = pod_spec.get(
553
+ "activeDeadlineSeconds", _DEFAULT_BUILD_TIMEOUT_SECS
553
554
  )
554
- # Create the specification of job
555
- spec = client.V1JobSpec(template=template, backoff_limit=0)
556
- job = client.V1Job(
557
- api_version="batch/v1",
558
- kind="Job",
559
- metadata=client.V1ObjectMeta(
560
- name=job_name, namespace=NAMESPACE, labels={"wandb": "launch"}
561
- ),
562
- spec=spec,
555
+ pod_spec["serviceAccountName"] = pod_spec.get(
556
+ "serviceAccountName", SERVICE_ACCOUNT_NAME
563
557
  )
558
+ job_spec["backoffLimit"] = job_spec.get("backoffLimit", 0)
559
+ job_labels["wandb"] = "launch"
560
+ job_metadata["namespace"] = job_metadata.get("namespace", NAMESPACE)
561
+ job_metadata["name"] = job_metadata.get("name", job_name)
562
+ job["apiVersion"] = "batch/v1"
563
+ job["kind"] = "Job"
564
+
565
+ # Apply all nested configs from the bottom up
566
+ pod_metadata["labels"] = pod_labels
567
+ pod_template["metadata"] = pod_metadata
568
+ container["name"] = container.get("name", "wandb-container-build")
569
+ container["image"] = container.get("image", self.image)
570
+ container["volumeMounts"] = volume_mounts
571
+ container["env"] = env
572
+ pod_spec["containers"] = [container]
573
+ pod_spec["volumes"] = volumes
574
+ pod_template["spec"] = pod_spec
575
+ job_spec["template"] = pod_template
576
+ job_metadata["labels"] = job_labels
577
+ job["metadata"] = job_metadata
578
+ job["spec"] = job_spec
564
579
  return job
@@ -1,4 +1,5 @@
1
1
  """NoOp builder implementation."""
2
+
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  from wandb.sdk.launch.builder.abstract import AbstractBuilder
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ import re
4
5
  import sys
5
6
  import tempfile
6
7
  from typing import Any, Dict, List, Optional, Tuple
@@ -11,7 +12,7 @@ from wandb.sdk.artifacts.artifact import Artifact
11
12
  from wandb.sdk.internal.job_builder import JobBuilder
12
13
  from wandb.sdk.launch.builder.build import get_current_python_version
13
14
  from wandb.sdk.launch.git_reference import GitReference
14
- from wandb.sdk.launch.utils import _is_git_uri
15
+ from wandb.sdk.launch.utils import _is_git_uri, get_entrypoint_file
15
16
  from wandb.sdk.lib import filesystem
16
17
  from wandb.util import make_artifact_name_safe
17
18
 
@@ -19,6 +20,9 @@ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
19
20
  _logger = logging.getLogger("wandb")
20
21
 
21
22
 
23
+ CODE_ARTIFACT_EXCLUDE_PATHS = ["wandb", ".git"]
24
+
25
+
22
26
  def create_job(
23
27
  path: str,
24
28
  job_type: str,
@@ -107,6 +111,13 @@ def _create_job(
107
111
  )
108
112
  return None, "", []
109
113
 
114
+ if runtime is not None:
115
+ if not re.match(r"^3\.\d+$", runtime):
116
+ wandb.termerror(
117
+ f"Runtime (-r, --runtime) must be a minor version of Python 3, "
118
+ f"e.g. 3.9 or 3.10, received {runtime}"
119
+ )
120
+ return None, "", []
110
121
  aliases = aliases or []
111
122
  tempdir = tempfile.TemporaryDirectory()
112
123
  try:
@@ -145,6 +156,7 @@ def _create_job(
145
156
 
146
157
  job_builder = _configure_job_builder_for_partial(tempdir.name, job_source=job_type)
147
158
  if job_type == "code":
159
+ assert entrypoint is not None
148
160
  job_name = _make_code_artifact(
149
161
  api=api,
150
162
  job_builder=job_builder,
@@ -233,7 +245,6 @@ def _make_metadata_for_partial_job(
233
245
  return metadata, None
234
246
 
235
247
  if job_type == "code":
236
- path, entrypoint = _handle_artifact_entrypoint(path, entrypoint)
237
248
  if not entrypoint:
238
249
  wandb.termerror(
239
250
  "Artifact jobs must have an entrypoint, either included in the path or specified with -E"
@@ -304,15 +315,22 @@ def _create_repo_metadata(
304
315
  with open(os.path.join(local_dir, ".python-version")) as f:
305
316
  python_version = f.read().strip().splitlines()[0]
306
317
  else:
307
- major, minor = get_current_python_version()
308
- python_version = f"{major}.{minor}"
318
+ _, python_version = get_current_python_version()
309
319
 
310
320
  python_version = _clean_python_version(python_version)
311
321
 
312
322
  # check if entrypoint is valid
313
323
  assert entrypoint is not None
314
- if not os.path.exists(os.path.join(local_dir, entrypoint)):
315
- wandb.termerror(f"Entrypoint {entrypoint} not found in git repo")
324
+ entrypoint_list = entrypoint.split(" ")
325
+ entrypoint_file = get_entrypoint_file(entrypoint_list)
326
+ if not entrypoint_file:
327
+ wandb.termerror(
328
+ f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
329
+ )
330
+ return None
331
+
332
+ if not os.path.exists(os.path.join(local_dir, entrypoint_file)):
333
+ wandb.termerror(f"Entrypoint file {entrypoint_file} not found in git repo")
316
334
  return None
317
335
 
318
336
  metadata = {
@@ -320,9 +338,9 @@ def _create_repo_metadata(
320
338
  "commit": commit,
321
339
  "remote": ref.url,
322
340
  },
323
- "codePathLocal": entrypoint, # not in git context, optionally also set local
324
- "codePath": entrypoint,
325
- "entrypoint": [f"python{python_version}", entrypoint],
341
+ "codePathLocal": entrypoint_file, # not in git context, optionally also set local
342
+ "codePath": entrypoint_file,
343
+ "entrypoint": entrypoint_list,
326
344
  "python": python_version, # used to build container
327
345
  "notebook": False, # partial jobs from notebooks not supported
328
346
  }
@@ -332,10 +350,17 @@ def _create_repo_metadata(
332
350
 
333
351
  def _create_artifact_metadata(
334
352
  path: str, entrypoint: str, runtime: Optional[str] = None
335
- ) -> Tuple[Dict[str, Any], List[str]]:
353
+ ) -> Tuple[Optional[Dict[str, Any]], Optional[List[str]]]:
336
354
  if not os.path.isdir(path):
337
355
  wandb.termerror("Path must be a valid file or directory")
338
356
  return {}, []
357
+ entrypoint_list = entrypoint.split(" ")
358
+ entrypoint_file = get_entrypoint_file(entrypoint_list)
359
+ if not entrypoint_file:
360
+ wandb.termerror(
361
+ f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
362
+ )
363
+ return None, None
339
364
 
340
365
  # read local requirements.txt and dump to temp dir for builder
341
366
  requirements = []
@@ -347,41 +372,17 @@ def _create_artifact_metadata(
347
372
  if runtime:
348
373
  python_version = _clean_python_version(runtime)
349
374
  else:
350
- python_version = ".".join(get_current_python_version())
375
+ python_version, _ = get_current_python_version()
376
+ python_version = _clean_python_version(python_version)
351
377
 
352
- metadata = {"python": python_version, "codePath": entrypoint}
378
+ metadata = {
379
+ "python": python_version,
380
+ "codePath": entrypoint_file,
381
+ "entrypoint": entrypoint_list,
382
+ }
353
383
  return metadata, requirements
354
384
 
355
385
 
356
- def _handle_artifact_entrypoint(
357
- path: str, entrypoint: Optional[str] = None
358
- ) -> Tuple[str, Optional[str]]:
359
- if os.path.isfile(path):
360
- if entrypoint and path.endswith(entrypoint):
361
- path = path.replace(entrypoint, "")
362
- wandb.termwarn(
363
- f"Both entrypoint provided and path contains file. Using provided entrypoint: {entrypoint}, path is now: {path}"
364
- )
365
- elif entrypoint:
366
- wandb.termwarn(
367
- f"Ignoring passed in entrypoint as it does not match file path found in 'path'. Path entrypoint: {path.split('/')[-1]}"
368
- )
369
- entrypoint = path.split("/")[-1]
370
- path = "/".join(path.split("/")[:-1])
371
- elif not entrypoint:
372
- wandb.termerror("Entrypoint not valid")
373
- return "", None
374
- path = path or "." # when path is just an entrypoint, use cdw
375
-
376
- if not os.path.exists(os.path.join(path, entrypoint)):
377
- wandb.termerror(
378
- f"Could not find execution point: {os.path.join(path, entrypoint)}"
379
- )
380
- return "", None
381
-
382
- return path, entrypoint
383
-
384
-
385
386
  def _configure_job_builder_for_partial(tmpdir: str, job_source: str) -> JobBuilder:
386
387
  """Configure job builder with temp dir and job source."""
387
388
  # adjust git source to repo
@@ -411,7 +412,7 @@ def _make_code_artifact(
411
412
  job_builder: JobBuilder,
412
413
  run: "wandb.sdk.wandb_run.Run",
413
414
  path: str,
414
- entrypoint: Optional[str],
415
+ entrypoint: str,
415
416
  entity: Optional[str],
416
417
  project: Optional[str],
417
418
  name: Optional[str],
@@ -420,17 +421,22 @@ def _make_code_artifact(
420
421
 
421
422
  Returns the name of the eventual job.
422
423
  """
423
- artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint or ""), name)
424
+ assert entrypoint is not None
425
+ entrypoint_list = entrypoint.split(" ")
426
+ entrypoint_file = get_entrypoint_file(entrypoint_list)
427
+ if not entrypoint_file:
428
+ wandb.termerror(
429
+ f"Entrypoint {entrypoint} is invalid. An entrypoint should include both an executable and a file, for example 'python train.py'"
430
+ )
431
+ return None
432
+
433
+ artifact_name = _make_code_artifact_name(os.path.join(path, entrypoint_file), name)
424
434
  code_artifact = wandb.Artifact(
425
435
  name=artifact_name,
426
436
  type="code",
427
437
  description="Code artifact for job",
428
438
  )
429
439
 
430
- # Update path and entrypoint vars to match metadata
431
- # TODO(gst): consolidate into one place
432
- path, entrypoint = _handle_artifact_entrypoint(path, entrypoint)
433
-
434
440
  try:
435
441
  code_artifact.add_dir(path)
436
442
  except Exception as e:
@@ -441,6 +447,13 @@ def _make_code_artifact(
441
447
  wandb.termerror(f"Error adding to code artifact: {e}")
442
448
  return None
443
449
 
450
+ # Remove paths we don't want to include, if present
451
+ for item in CODE_ARTIFACT_EXCLUDE_PATHS:
452
+ try:
453
+ code_artifact.remove(item)
454
+ except FileNotFoundError:
455
+ pass
456
+
444
457
  res, _ = api.create_artifact(
445
458
  artifact_type_name="code",
446
459
  artifact_collection_name=artifact_name,
@@ -451,7 +464,7 @@ def _make_code_artifact(
451
464
  project_name=project,
452
465
  run_name=run.id, # run will be deleted after creation
453
466
  description="Code artifact for job",
454
- metadata={"codePath": path, "entrypoint": entrypoint},
467
+ metadata={"codePath": path, "entrypoint": entrypoint_file},
455
468
  is_user_created=True,
456
469
  aliases=[
457
470
  {"artifactCollectionName": artifact_name, "alias": a} for a in ["latest"]
@@ -1,4 +1,5 @@
1
1
  """Abstract base class for environments."""
2
+
2
3
  from abc import ABC, abstractmethod
3
4
 
4
5
 
@@ -1,4 +1,5 @@
1
1
  """Implementation of the GCP environment for wandb launch."""
2
+
2
3
  import logging
3
4
  import os
4
5
  import subprocess
@@ -1,4 +1,5 @@
1
1
  """Dummy local environment implementation. This is the default environment."""
2
+
2
3
  from typing import Any, Dict, Union
3
4
 
4
5
  from wandb.sdk.launch.errors import LaunchError
@@ -1,4 +1,5 @@
1
1
  """Utilities for the agent."""
2
+
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  import wandb
@@ -1,4 +1,5 @@
1
1
  """Abstract base class for registries."""
2
+
2
3
  from abc import ABC, abstractmethod
3
4
  from typing import Tuple
4
5
 
@@ -1,4 +1,5 @@
1
1
  """Implementation of AzureContainerRegistry class."""
2
+
2
3
  import re
3
4
  from typing import TYPE_CHECKING, Optional, Tuple
4
5
 
@@ -1,4 +1,5 @@
1
1
  """Implementation of Elastic Container Registry class for wandb launch."""
2
+
2
3
  import base64
3
4
  import logging
4
5
  from typing import Dict, Optional, Tuple
@@ -1,4 +1,5 @@
1
1
  """Implementation of Google Artifact Registry for wandb launch."""
2
+
2
3
  import logging
3
4
  from typing import Optional, Tuple
4
5