wandb 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. wandb/__init__.py +5 -1
  2. wandb/apis/public.py +137 -17
  3. wandb/apis/reports/_panels.py +1 -1
  4. wandb/apis/reports/blocks.py +1 -0
  5. wandb/apis/reports/report.py +27 -5
  6. wandb/cli/cli.py +52 -41
  7. wandb/docker/__init__.py +17 -0
  8. wandb/docker/auth.py +1 -1
  9. wandb/env.py +24 -4
  10. wandb/filesync/step_checksum.py +3 -3
  11. wandb/integration/openai/openai.py +3 -0
  12. wandb/integration/ultralytics/__init__.py +9 -0
  13. wandb/integration/ultralytics/bbox_utils.py +196 -0
  14. wandb/integration/ultralytics/callback.py +458 -0
  15. wandb/integration/ultralytics/classification_utils.py +66 -0
  16. wandb/integration/ultralytics/mask_utils.py +141 -0
  17. wandb/integration/ultralytics/pose_utils.py +92 -0
  18. wandb/integration/xgboost/xgboost.py +3 -3
  19. wandb/integration/yolov8/__init__.py +0 -7
  20. wandb/integration/yolov8/yolov8.py +22 -3
  21. wandb/old/settings.py +7 -0
  22. wandb/plot/line_series.py +0 -1
  23. wandb/proto/v3/wandb_internal_pb2.py +353 -300
  24. wandb/proto/v3/wandb_server_pb2.py +37 -41
  25. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  26. wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
  27. wandb/proto/v4/wandb_internal_pb2.py +272 -260
  28. wandb/proto/v4/wandb_server_pb2.py +37 -40
  29. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  30. wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
  31. wandb/proto/wandb_internal_codegen.py +7 -31
  32. wandb/sdk/artifacts/artifact.py +321 -189
  33. wandb/sdk/artifacts/artifact_cache.py +14 -0
  34. wandb/sdk/artifacts/artifact_manifest.py +5 -4
  35. wandb/sdk/artifacts/artifact_manifest_entry.py +37 -9
  36. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -9
  37. wandb/sdk/artifacts/artifact_saver.py +13 -50
  38. wandb/sdk/artifacts/artifact_ttl.py +6 -0
  39. wandb/sdk/artifacts/artifacts_cache.py +119 -93
  40. wandb/sdk/artifacts/staging.py +25 -0
  41. wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
  42. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -3
  43. wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
  44. wandb/sdk/artifacts/storage_policies/register.py +1 -0
  45. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +4 -3
  46. wandb/sdk/artifacts/storage_policy.py +4 -2
  47. wandb/sdk/backend/backend.py +0 -16
  48. wandb/sdk/data_types/image.py +3 -1
  49. wandb/sdk/integration_utils/auto_logging.py +38 -13
  50. wandb/sdk/interface/interface.py +16 -135
  51. wandb/sdk/interface/interface_shared.py +9 -147
  52. wandb/sdk/interface/interface_sock.py +0 -26
  53. wandb/sdk/internal/file_pusher.py +20 -3
  54. wandb/sdk/internal/file_stream.py +3 -1
  55. wandb/sdk/internal/handler.py +53 -70
  56. wandb/sdk/internal/internal_api.py +220 -130
  57. wandb/sdk/internal/job_builder.py +41 -37
  58. wandb/sdk/internal/sender.py +7 -25
  59. wandb/sdk/internal/system/assets/disk.py +144 -11
  60. wandb/sdk/internal/system/system_info.py +6 -2
  61. wandb/sdk/launch/__init__.py +5 -0
  62. wandb/sdk/launch/{launch.py → _launch.py} +53 -54
  63. wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
  64. wandb/sdk/launch/_project_spec.py +13 -2
  65. wandb/sdk/launch/agent/agent.py +103 -59
  66. wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
  67. wandb/sdk/launch/builder/build.py +19 -1
  68. wandb/sdk/launch/builder/docker_builder.py +5 -1
  69. wandb/sdk/launch/builder/kaniko_builder.py +5 -1
  70. wandb/sdk/launch/create_job.py +20 -5
  71. wandb/sdk/launch/loader.py +14 -5
  72. wandb/sdk/launch/runner/abstract.py +0 -2
  73. wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
  74. wandb/sdk/launch/runner/kubernetes_runner.py +66 -209
  75. wandb/sdk/launch/runner/local_container.py +5 -2
  76. wandb/sdk/launch/runner/local_process.py +4 -1
  77. wandb/sdk/launch/sweeps/scheduler.py +43 -25
  78. wandb/sdk/launch/sweeps/utils.py +5 -3
  79. wandb/sdk/launch/utils.py +3 -1
  80. wandb/sdk/lib/_settings_toposort_generate.py +3 -9
  81. wandb/sdk/lib/_settings_toposort_generated.py +27 -3
  82. wandb/sdk/lib/_wburls_generated.py +1 -0
  83. wandb/sdk/lib/filenames.py +27 -6
  84. wandb/sdk/lib/filesystem.py +181 -7
  85. wandb/sdk/lib/fsm.py +5 -3
  86. wandb/sdk/lib/gql_request.py +3 -0
  87. wandb/sdk/lib/ipython.py +7 -0
  88. wandb/sdk/lib/wburls.py +1 -0
  89. wandb/sdk/service/port_file.py +2 -15
  90. wandb/sdk/service/server.py +7 -55
  91. wandb/sdk/service/service.py +56 -26
  92. wandb/sdk/service/service_base.py +1 -1
  93. wandb/sdk/service/streams.py +11 -5
  94. wandb/sdk/verify/verify.py +2 -2
  95. wandb/sdk/wandb_init.py +8 -2
  96. wandb/sdk/wandb_manager.py +4 -14
  97. wandb/sdk/wandb_run.py +143 -53
  98. wandb/sdk/wandb_settings.py +148 -35
  99. wandb/testing/relay.py +85 -38
  100. wandb/util.py +87 -4
  101. wandb/wandb_torch.py +24 -38
  102. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/METADATA +48 -23
  103. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/RECORD +107 -103
  104. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/WHEEL +1 -1
  105. wandb/proto/v3/wandb_server_pb2_grpc.py +0 -1422
  106. wandb/proto/v4/wandb_server_pb2_grpc.py +0 -1422
  107. wandb/proto/wandb_server_pb2_grpc.py +0 -8
  108. wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +0 -61
  109. wandb/sdk/interface/interface_grpc.py +0 -460
  110. wandb/sdk/service/server_grpc.py +0 -444
  111. wandb/sdk/service/service_grpc.py +0 -73
  112. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
  113. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
  114. {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
wandb/__init__.py CHANGED
@@ -11,7 +11,8 @@ For scripts and interactive notebooks, see https://github.com/wandb/examples.
11
11
 
12
12
  For reference documentation, see https://docs.wandb.com/ref/python.
13
13
  """
14
- __version__ = "0.15.9"
14
+ __version__ = "0.15.11"
15
+ _minimum_nexus_version = "0.16.0b1"
15
16
 
16
17
  # Used with pypi checks and other messages related to pip
17
18
  _wandb_module = "wandb"
@@ -84,6 +85,8 @@ from wandb import plots # deprecating this
84
85
  from wandb.integration.sagemaker import sagemaker_auth
85
86
  from wandb.sdk.internal import profiler
86
87
 
88
+ # Artifact import types
89
+ from wandb.sdk.artifacts.artifact_ttl import ArtifactTTL
87
90
 
88
91
  # Used to make sure we don't use some code in the incorrect process context
89
92
  _IS_INTERNAL_PROCESS = False
@@ -223,4 +226,5 @@ __all__ = (
223
226
  "Object3D",
224
227
  "Molecule",
225
228
  "Histogram",
229
+ "ArtifactTTL",
226
230
  )
wandb/apis/public.py CHANGED
@@ -55,7 +55,7 @@ from wandb.sdk.launch.utils import (
55
55
  apply_patch,
56
56
  convert_jupyter_notebook_to_script,
57
57
  )
58
- from wandb.sdk.lib import ipython, retry, runid
58
+ from wandb.sdk.lib import ipython, json_util, retry, runid
59
59
  from wandb.sdk.lib.gql_request import GraphQLSession
60
60
  from wandb.sdk.lib.paths import LogicalPath
61
61
 
@@ -63,6 +63,8 @@ if TYPE_CHECKING:
63
63
  import wandb.apis.reports
64
64
  import wandb.apis.reports.util
65
65
 
66
+ from wandb.sdk.artifacts.artifact_state import ArtifactState
67
+
66
68
  logger = logging.getLogger(__name__)
67
69
 
68
70
  # Only retry requests for 20 seconds in the public api
@@ -253,7 +255,7 @@ class Api:
253
255
  You can also set defaults for `entity`, `project`, and `run`.
254
256
  """
255
257
 
256
- _HTTP_TIMEOUT = env.get_http_timeout(9)
258
+ _HTTP_TIMEOUT = env.get_http_timeout(19)
257
259
  VIEWER_QUERY = gql(
258
260
  """
259
261
  query Viewer{
@@ -393,6 +395,9 @@ class Api:
393
395
  auth = None
394
396
  if not _thread_local_api_settings.cookies:
395
397
  auth = ("api", self.api_key)
398
+ proxies = self.settings.get("_proxies") or json.loads(
399
+ os.environ.get("WANDB__PROXIES", "{}")
400
+ )
396
401
  self._base_client = Client(
397
402
  transport=GraphQLSession(
398
403
  headers={
@@ -407,6 +412,7 @@ class Api:
407
412
  auth=auth,
408
413
  url="%s/graphql" % self.settings["base_url"],
409
414
  cookies=_thread_local_api_settings.cookies,
415
+ proxies=proxies,
410
416
  )
411
417
  )
412
418
  self._client = RetryingClient(self._base_client)
@@ -507,7 +513,7 @@ class Api:
507
513
  # 2. create default resource config, receive config id
508
514
  config_json = json.dumps({"resource_args": {type: config}})
509
515
  create_config_result = api.create_default_resource_config(
510
- entity, LAUNCH_DEFAULT_PROJECT, type, config_json
516
+ entity, type, config_json
511
517
  )
512
518
  if not create_config_result["success"]:
513
519
  raise wandb.Error("failed to create default resource config")
@@ -1835,6 +1841,7 @@ class Run(Attrs):
1835
1841
  read_only (boolean): Whether the run is editable
1836
1842
  history_keys (str): Keys of the history metrics that have been logged
1837
1843
  with `wandb.log({key: value})`
1844
+ metadata (str): Metadata about the run from wandb-metadata.json
1838
1845
  """
1839
1846
 
1840
1847
  def __init__(
@@ -1867,6 +1874,7 @@ class Run(Attrs):
1867
1874
  except OSError:
1868
1875
  pass
1869
1876
  self._summary = None
1877
+ self._metadata: Optional[Dict[str, Any]] = None
1870
1878
  self._state = _attrs.get("state", "not found")
1871
1879
 
1872
1880
  self.load(force=not _attrs)
@@ -1988,11 +1996,18 @@ class Run(Attrs):
1988
1996
  withRuns=False,
1989
1997
  )
1990
1998
 
1991
- self._attrs["summaryMetrics"] = (
1992
- json.loads(self._attrs["summaryMetrics"])
1993
- if self._attrs.get("summaryMetrics")
1994
- else {}
1995
- )
1999
+ try:
2000
+ self._attrs["summaryMetrics"] = (
2001
+ json.loads(self._attrs["summaryMetrics"])
2002
+ if self._attrs.get("summaryMetrics")
2003
+ else {}
2004
+ )
2005
+ except json.decoder.JSONDecodeError:
2006
+ # ignore invalid utf-8 or control characters
2007
+ self._attrs["summaryMetrics"] = json.loads(
2008
+ self._attrs["summaryMetrics"],
2009
+ strict=False,
2010
+ )
1996
2011
  self._attrs["systemMetrics"] = (
1997
2012
  json.loads(self._attrs["systemMetrics"])
1998
2013
  if self._attrs.get("systemMetrics")
@@ -2385,6 +2400,18 @@ class Run(Attrs):
2385
2400
  path.insert(2, "runs")
2386
2401
  return self.client.app_url + "/".join(path)
2387
2402
 
2403
+ @property
2404
+ def metadata(self):
2405
+ if self._metadata is None:
2406
+ try:
2407
+ f = self.file("wandb-metadata.json")
2408
+ contents = util.download_file_into_memory(f.url, Api().api_key)
2409
+ self._metadata = json_util.loads(contents)
2410
+ except: # noqa: E722
2411
+ # file doesn't exist, or can't be downloaded, or can't be parsed
2412
+ pass
2413
+ return self._metadata
2414
+
2388
2415
  @property
2389
2416
  def lastHistoryStep(self): # noqa: N802
2390
2417
  query = gql(
@@ -2603,8 +2630,6 @@ class QueuedRun:
2603
2630
  def wait_until_running(self):
2604
2631
  if self._run is not None:
2605
2632
  return self._run
2606
- if self.container_job:
2607
- raise LaunchError("Container jobs cannot be waited on")
2608
2633
 
2609
2634
  while True:
2610
2635
  # sleep here to hide an ugly warning
@@ -2656,6 +2681,7 @@ class RunQueue:
2656
2681
  self._default_resource_config = _default_resource_config
2657
2682
  self._type = None
2658
2683
  self._items = None
2684
+ self._id = None
2659
2685
 
2660
2686
  @property
2661
2687
  def name(self):
@@ -2687,6 +2713,12 @@ class RunQueue:
2687
2713
  self._get_default_resource_config()
2688
2714
  return self._default_resource_config
2689
2715
 
2716
+ @property
2717
+ def id(self) -> str:
2718
+ if self._id is None:
2719
+ self._get_metadata()
2720
+ return self._id
2721
+
2690
2722
  @property
2691
2723
  def items(self) -> List[QueuedRun]:
2692
2724
  """Up to the first 100 queued runs. Modifying this list will not modify the queue or any enqueued items!"""
@@ -2695,6 +2727,30 @@ class RunQueue:
2695
2727
  self._get_items()
2696
2728
  return self._items
2697
2729
 
2730
+ @normalize_exceptions
2731
+ def delete(self):
2732
+ """Delete the run queue from the wandb backend."""
2733
+ query = gql(
2734
+ """
2735
+ mutation DeleteRunQueue($id: ID!) {
2736
+ deleteRunQueues(input: {queueIDs: [$id]}) {
2737
+ success
2738
+ clientMutationId
2739
+ }
2740
+ }
2741
+ """
2742
+ )
2743
+ variable_values = {"id": self.id}
2744
+ res = self._client.execute(query, variable_values)
2745
+ if res["deleteRunQueues"]["success"]:
2746
+ self._id = None
2747
+ self._access = None
2748
+ self._default_resource_config_id = None
2749
+ self._default_resource_config = None
2750
+ self._items = None
2751
+ else:
2752
+ raise CommError(f"Failed to delete run queue {self.name}")
2753
+
2698
2754
  def __repr__(self):
2699
2755
  return f"<RunQueue {self._entity}/{self._name}>"
2700
2756
 
@@ -2705,6 +2761,7 @@ class RunQueue:
2705
2761
  query GetRunQueueMetadata($projectName: String!, $entityName: String!, $runQueue: String!) {
2706
2762
  project(name: $projectName, entityName: $entityName) {
2707
2763
  runQueue(name: $runQueue) {
2764
+ id
2708
2765
  access
2709
2766
  defaultResourceConfigID
2710
2767
  }
@@ -2718,6 +2775,7 @@ class RunQueue:
2718
2775
  "runQueue": self._name,
2719
2776
  }
2720
2777
  res = self._client.execute(query, variable_values)
2778
+ self._id = res["project"]["runQueue"]["id"]
2721
2779
  self._access = res["project"]["runQueue"]["access"]
2722
2780
  self._default_resource_config_id = res["project"]["runQueue"][
2723
2781
  "defaultResourceConfigID"
@@ -4158,9 +4216,8 @@ class RunArtifacts(Paginator):
4158
4216
  }
4159
4217
  }
4160
4218
  }
4161
- %s
4162
4219
  """
4163
- % wandb.Artifact._GQL_FRAGMENT
4220
+ + wandb.Artifact._get_gql_artifact_fragment()
4164
4221
  )
4165
4222
 
4166
4223
  input_query = gql(
@@ -4186,9 +4243,8 @@ class RunArtifacts(Paginator):
4186
4243
  }
4187
4244
  }
4188
4245
  }
4189
- %s
4190
4246
  """
4191
- % wandb.Artifact._GQL_FRAGMENT
4247
+ + wandb.Artifact._get_gql_artifact_fragment()
4192
4248
  )
4193
4249
 
4194
4250
  self.run = run
@@ -4425,6 +4481,66 @@ class ArtifactCollection:
4425
4481
  self._attrs = response["project"]["artifactType"]["artifactCollection"]
4426
4482
  return self._attrs
4427
4483
 
4484
+ @normalize_exceptions
4485
+ def is_sequence(self) -> bool:
4486
+ """Return True if this is a sequence."""
4487
+ query = gql(
4488
+ """
4489
+ query FindSequence($entity: String!, $project: String!, $collection: String!, $type: String!) {
4490
+ project(name: $project, entityName: $entity) {
4491
+ artifactType(name: $type) {
4492
+ __typename
4493
+ artifactSequence(name: $collection) {
4494
+ __typename
4495
+ }
4496
+ }
4497
+ }
4498
+ }
4499
+ """
4500
+ )
4501
+ variables = {
4502
+ "entity": self.entity,
4503
+ "project": self.project,
4504
+ "collection": self.name,
4505
+ "type": self.type,
4506
+ }
4507
+ res = self.client.execute(query, variable_values=variables)
4508
+ sequence = res["project"]["artifactType"]["artifactSequence"]
4509
+ return sequence is not None and sequence["__typename"] == "ArtifactSequence"
4510
+
4511
+ @normalize_exceptions
4512
+ def delete(self):
4513
+ """Delete the entire artifact collection."""
4514
+ if self.is_sequence():
4515
+ mutation = gql(
4516
+ """
4517
+ mutation deleteArtifactSequence($id: ID!) {
4518
+ deleteArtifactSequence(input: {
4519
+ artifactSequenceID: $id
4520
+ }) {
4521
+ artifactCollection {
4522
+ state
4523
+ }
4524
+ }
4525
+ }
4526
+ """
4527
+ )
4528
+ else:
4529
+ mutation = gql(
4530
+ """
4531
+ mutation deleteArtifactPortfolio($id: ID!) {
4532
+ deleteArtifactPortfolio(input: {
4533
+ artifactPortfolioID: $id
4534
+ }) {
4535
+ artifactCollection {
4536
+ state
4537
+ }
4538
+ }
4539
+ }
4540
+ """
4541
+ )
4542
+ self.client.execute(mutation, variable_values={"id": self.id})
4543
+
4428
4544
  def __repr__(self):
4429
4545
  return f"<ArtifactCollection {self.name} ({self.type})>"
4430
4546
 
@@ -4490,7 +4606,7 @@ class ArtifactVersions(Paginator):
4490
4606
  artifact_collection_edge_name(
4491
4607
  server_supports_artifact_collections_gql_edges(client)
4492
4608
  ),
4493
- wandb.Artifact._GQL_FRAGMENT,
4609
+ wandb.Artifact._get_gql_artifact_fragment(),
4494
4610
  )
4495
4611
  )
4496
4612
  super().__init__(client, variables, per_page)
@@ -4687,6 +4803,10 @@ class Job:
4687
4803
  code_artifact = self._api.artifact(name=artifact_string, type="code")
4688
4804
  if code_artifact is None:
4689
4805
  raise LaunchError("No code artifact found")
4806
+ if code_artifact.state == ArtifactState.DELETED:
4807
+ raise LaunchError(
4808
+ f"Job {self.name} references deleted code artifact {code_artifact.name}"
4809
+ )
4690
4810
  return code_artifact
4691
4811
 
4692
4812
  def _configure_launch_project_notebook(self, launch_project):
@@ -4752,7 +4872,7 @@ class Job:
4752
4872
  resource_args=None,
4753
4873
  project_queue=None,
4754
4874
  ):
4755
- from wandb.sdk.launch import launch_add
4875
+ from wandb.sdk.launch import _launch_add
4756
4876
 
4757
4877
  run_config = {}
4758
4878
  for key, item in config.items():
@@ -4772,7 +4892,7 @@ class Job:
4772
4892
  if isinstance(assigned_config_type, InvalidType):
4773
4893
  raise TypeError(self._input_types.explain(run_config))
4774
4894
 
4775
- queued_run = launch_add.launch_add(
4895
+ queued_run = _launch_add.launch_add(
4776
4896
  job=self._name,
4777
4897
  config={"overrides": {"run_config": run_config}},
4778
4898
  project=project or self._project,
@@ -650,7 +650,7 @@ class RunComparer(Panel):
650
650
 
651
651
  class MediaBrowser(Panel):
652
652
  num_columns: Optional[int] = Attr(json_path="spec.config.columnCount")
653
- media_keys: Optional[str] = Attr(json_path="spec.config.media_keys")
653
+ media_keys: Optional[str] = Attr(json_path="spec.config.mediaKeys")
654
654
  # Attr(json_path="spec.config.chartTitle")
655
655
  # Attr(json_path="spec.config.stepIndex")
656
656
  # Attr(json_path="spec.config.mediaIndex")
@@ -23,4 +23,5 @@ from ._blocks import (
23
23
  WeaveBlockArtifact,
24
24
  WeaveBlockArtifactVersionedFile,
25
25
  WeaveBlockSummaryTable,
26
+ Twitter,
26
27
  )
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import inspect
2
3
  import json
3
4
  import re
@@ -67,13 +68,34 @@ class Report(Base):
67
68
 
68
69
  @staticmethod
69
70
  def _url_to_report_id(url):
71
+ path_msg = "Path must be `entity/project/reports/report_title--report_id`"
70
72
  try:
71
- report, *_ = url.split("?")
72
- # If the report title ends in trailing space
73
- report = report.replace("---", "--")
74
- *_, report_id = report.split("--")
73
+ report_path, *_ = url.split("?")
74
+ report_path = report_path.replace("---", "--")
75
+
76
+ if "--" not in report_path:
77
+ raise ValueError(path_msg)
78
+
79
+ *_, report_id = report_path.split("--")
80
+ if len(report_id) == 0:
81
+ raise ValueError("Invalid report id")
82
+
83
+ report_id = report_id.strip()
84
+
85
+ """
86
+ Server does not generate IDs with correct padding, so decode with default validate=False.
87
+ Then re-encode it with correct padding.
88
+ https://stackoverflow.com/questions/2941995/python-ignore-incorrect-padding-error-when-base64-decoding
89
+
90
+ Corresponding core app logic that strips the padding in url
91
+ https://github.com/wandb/core/blob/b563437c1f3237ec35b1fb388ac14abbab7b4279/frontends/app/src/util/url/shared.ts#L33-L78
92
+ """
93
+ report_id = base64.b64encode(base64.b64decode(report_id + "==")).decode(
94
+ "utf-8"
95
+ )
96
+
75
97
  except ValueError as e:
76
- raise ValueError("Path must be `entity/project/reports/report_id`") from e
98
+ raise ValueError(path_msg) from e
77
99
  else:
78
100
  return report_id
79
101
 
wandb/cli/cli.py CHANGED
@@ -34,8 +34,8 @@ from wandb.apis import InternalApi, PublicApi
34
34
  from wandb.integration.magic import magic_install
35
35
  from wandb.sdk.artifacts.artifacts_cache import get_artifacts_cache
36
36
  from wandb.sdk.launch import utils as launch_utils
37
+ from wandb.sdk.launch._launch_add import _launch_add
37
38
  from wandb.sdk.launch.errors import ExecutionError, LaunchError
38
- from wandb.sdk.launch.launch_add import _launch_add
39
39
  from wandb.sdk.launch.sweeps import utils as sweep_utils
40
40
  from wandb.sdk.launch.sweeps.scheduler import Scheduler
41
41
  from wandb.sdk.lib import filesystem
@@ -249,9 +249,6 @@ def login(key, host, cloud, relogin, anonymously, no_offline=False):
249
249
  @cli.command(
250
250
  context_settings=CONTEXT, help="Run a wandb service", name="service", hidden=True
251
251
  )
252
- @click.option(
253
- "--grpc-port", default=None, type=int, help="The host port to bind grpc service."
254
- )
255
252
  @click.option(
256
253
  "--sock-port", default=None, type=int, help="The host port to bind socket service."
257
254
  )
@@ -260,29 +257,24 @@ def login(key, host, cloud, relogin, anonymously, no_offline=False):
260
257
  @click.option("--pid", default=None, type=int, help="The parent process id to monitor.")
261
258
  @click.option("--debug", is_flag=True, help="log debug info")
262
259
  @click.option("--serve-sock", is_flag=True, help="use socket mode")
263
- @click.option("--serve-grpc", is_flag=True, help="use grpc mode")
264
260
  @display_error
265
261
  def service(
266
- grpc_port=None,
267
262
  sock_port=None,
268
263
  port_filename=None,
269
264
  address=None,
270
265
  pid=None,
271
266
  debug=False,
272
267
  serve_sock=False,
273
- serve_grpc=False,
274
268
  ):
275
269
  from wandb.sdk.service.server import WandbServer
276
270
 
277
271
  server = WandbServer(
278
- grpc_port=grpc_port,
279
272
  sock_port=sock_port,
280
273
  port_fname=port_filename,
281
274
  address=address,
282
275
  pid=pid,
283
276
  debug=debug,
284
277
  serve_sock=serve_sock,
285
- serve_grpc=serve_grpc,
286
278
  )
287
279
  server.serve()
288
280
 
@@ -1250,6 +1242,12 @@ def launch_sweep(
1250
1242
  hidden=True,
1251
1243
  help="Name of the project containing the queue to push to. If none, defaults to entity level queues.",
1252
1244
  )
1245
+ @click.option(
1246
+ "--dockerfile",
1247
+ "-D",
1248
+ default=None,
1249
+ help="Path to the Dockerfile used to build the job, relative to the job's root",
1250
+ )
1253
1251
  @display_error
1254
1252
  def launch(
1255
1253
  uri,
@@ -1268,6 +1266,7 @@ def launch(
1268
1266
  build,
1269
1267
  repository,
1270
1268
  project_queue,
1269
+ dockerfile,
1271
1270
  ):
1272
1271
  """Start a W&B run from the given URI.
1273
1272
 
@@ -1282,7 +1281,7 @@ def launch(
1282
1281
  logger.info(
1283
1282
  f"=== Launch called with kwargs {locals()} CLI Version: {wandb.__version__}==="
1284
1283
  )
1285
- from wandb.sdk.launch import launch as wandb_launch
1284
+ from wandb.sdk.launch._launch import _launch
1286
1285
 
1287
1286
  api = _get_cling_api()
1288
1287
  wandb._sentry.configure_scope(process_context="launch_cli")
@@ -1292,6 +1291,11 @@ def launch(
1292
1291
  "Cannot use both --async and --queue with wandb launch, see help for details."
1293
1292
  )
1294
1293
 
1294
+ if queue and docker_image and not project:
1295
+ raise LaunchError(
1296
+ "Cannot use --queue and --docker together without a project. Please specify a project with --project or -p."
1297
+ )
1298
+
1295
1299
  if resource_args is not None:
1296
1300
  resource_args = util.load_json_yaml_dict(resource_args)
1297
1301
  if resource_args is None:
@@ -1321,26 +1325,35 @@ def launch(
1321
1325
 
1322
1326
  run_id = config.get("run_id")
1323
1327
 
1328
+ if dockerfile:
1329
+ if "overrides" in config:
1330
+ config["overrides"]["dockerfile"] = dockerfile
1331
+ else:
1332
+ config["overrides"] = {"dockerfile": dockerfile}
1333
+
1324
1334
  if queue is None:
1325
1335
  # direct launch
1326
1336
  try:
1327
- wandb_launch.run(
1337
+ run = _launch(
1328
1338
  api,
1329
1339
  uri,
1330
1340
  job,
1331
- entry_point,
1332
- git_version,
1333
1341
  project=project,
1334
1342
  entity=entity,
1335
1343
  docker_image=docker_image,
1336
1344
  name=name,
1345
+ entry_point=entry_point,
1346
+ version=git_version,
1337
1347
  resource=resource,
1338
1348
  resource_args=resource_args,
1339
- config=config,
1349
+ launch_config=config,
1340
1350
  synchronous=(not run_async),
1341
1351
  run_id=run_id,
1342
1352
  repository=repository,
1343
1353
  )
1354
+ if run.get_status().state in ["failed", "stopped", "preempted"]:
1355
+ wandb.termerror("Launched run exited with non-zero status")
1356
+ sys.exit(1)
1344
1357
  except LaunchError as e:
1345
1358
  logger.error("=== %s ===", e)
1346
1359
  wandb._sentry.exception(e)
@@ -1436,11 +1449,11 @@ def launch_agent(
1436
1449
  "--url is not supported in this version, upgrade with: pip install -u wandb"
1437
1450
  )
1438
1451
 
1439
- from wandb.sdk.launch import launch as wandb_launch
1452
+ import wandb.sdk.launch._launch as _launch
1440
1453
 
1441
1454
  api = _get_cling_api()
1442
1455
  wandb._sentry.configure_scope(process_context="launch_agent")
1443
- agent_config, api = wandb_launch.resolve_agent_config(
1456
+ agent_config, api = _launch.resolve_agent_config(
1444
1457
  entity, project, max_jobs, queues, config
1445
1458
  )
1446
1459
  if agent_config.get("project") is None:
@@ -1457,7 +1470,7 @@ def launch_agent(
1457
1470
 
1458
1471
  wandb.termlog("Starting launch agent ✨")
1459
1472
  try:
1460
- wandb_launch.create_and_run_agent(api, agent_config)
1473
+ _launch.create_and_run_agent(api, agent_config)
1461
1474
  except Exception as e:
1462
1475
  wandb._sentry.exception(e)
1463
1476
  raise e
@@ -1497,11 +1510,11 @@ def scheduler(
1497
1510
  ctx,
1498
1511
  sweep_id,
1499
1512
  ):
1500
- api = _get_cling_api()
1513
+ api = InternalApi()
1501
1514
  if api.api_key is None:
1502
1515
  wandb.termlog("Login to W&B to use the sweep scheduler feature")
1503
1516
  ctx.invoke(login, no_offline=True)
1504
- api = _get_cling_api(reset=True)
1517
+ api = InternalApi(reset=True)
1505
1518
 
1506
1519
  wandb._sentry.configure_scope(process_context="sweep_scheduler")
1507
1520
  wandb.termlog("Starting a Launch Scheduler 🚀")
@@ -1531,12 +1544,12 @@ def scheduler(
1531
1544
  raise e
1532
1545
 
1533
1546
 
1534
- @cli.group("job")
1547
+ @cli.group(help="Commands for managing and viewing W&B jobs")
1535
1548
  def job() -> None:
1536
1549
  pass
1537
1550
 
1538
1551
 
1539
- @job.command("list")
1552
+ @job.command("list", help="List jobs in a project")
1540
1553
  @click.option(
1541
1554
  "--project",
1542
1555
  "-p",
@@ -1578,7 +1591,9 @@ def _list(project, entity):
1578
1591
  wandb.termlog(f"{name} -- versions ({len(aliases)}): {aliases_str}")
1579
1592
 
1580
1593
 
1581
- @job.command()
1594
+ @job.command(
1595
+ help="Describe a launch job. Provide the launch job in the form of: entity/project/job-name:alias-or-version"
1596
+ )
1582
1597
  @click.argument("job")
1583
1598
  def describe(job):
1584
1599
  public_api = PublicApi()
@@ -2058,8 +2073,15 @@ def artifact():
2058
2073
  multiple=True,
2059
2074
  help="An alias to apply to this artifact",
2060
2075
  )
2076
+ @click.option("--id", "run_id", help="The run you want to upload to.")
2077
+ @click.option(
2078
+ "--resume",
2079
+ is_flag=True,
2080
+ default=None,
2081
+ help="Resume the last run from your current directory.",
2082
+ )
2061
2083
  @display_error
2062
- def put(path, name, description, type, alias):
2084
+ def put(path, name, description, type, alias, run_id, resume):
2063
2085
  if name is None:
2064
2086
  name = os.path.basename(path)
2065
2087
  public_api = PublicApi()
@@ -2087,32 +2109,21 @@ def put(path, name, description, type, alias):
2087
2109
  raise ClickException("Path argument must be a file or directory")
2088
2110
 
2089
2111
  run = wandb.init(
2090
- entity=entity, project=project, config={"path": path}, job_type="cli_put"
2091
- )
2092
- # We create the artifact manually to get the current version
2093
- res, _ = api.create_artifact(
2094
- type,
2095
- artifact_name,
2096
- artifact.digest,
2097
- client_id=artifact._client_id,
2098
- sequence_client_id=artifact._sequence_client_id,
2099
- entity_name=entity,
2100
- project_name=project,
2101
- run_name=run.id,
2102
- description=description,
2103
- aliases=[{"artifactCollectionName": artifact_name, "alias": a} for a in alias],
2112
+ entity=entity,
2113
+ project=project,
2114
+ config={"path": path},
2115
+ job_type="cli_put",
2116
+ id=run_id,
2117
+ resume=resume,
2104
2118
  )
2105
- artifact_path = artifact_path.split(":")[0] + ":" + res.get("version", "latest")
2106
- # Re-create the artifact and actually upload any files needed
2107
2119
  run.log_artifact(artifact, aliases=alias)
2108
2120
  artifact.wait()
2109
2121
 
2110
2122
  wandb.termlog(
2111
2123
  "Artifact uploaded, use this artifact in a run by adding:\n", prefix=False
2112
2124
  )
2113
-
2114
2125
  wandb.termlog(
2115
- f' artifact = run.use_artifact("{artifact_path}")\n',
2126
+ f' artifact = run.use_artifact("{artifact.source_qualified_name}")\n',
2116
2127
  prefix=False,
2117
2128
  )
2118
2129
 
wandb/docker/__init__.py CHANGED
@@ -82,6 +82,22 @@ def is_buildx_installed() -> bool:
82
82
  return _buildx_installed
83
83
 
84
84
 
85
+ def is_docker_installed() -> bool:
86
+ """Return `True` if docker is installed and working, else `False`."""
87
+ try:
88
+ # Run the docker --version command
89
+ result = subprocess.run(
90
+ ["docker", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
91
+ )
92
+ if result.returncode == 0:
93
+ return True
94
+ else:
95
+ return False
96
+ except FileNotFoundError:
97
+ # If docker command is not found
98
+ return False
99
+
100
+
85
101
  def build(
86
102
  tags: List[str], file: str, context_path: str, platform: Optional[str] = None
87
103
  ) -> str:
@@ -304,6 +320,7 @@ __all__ = [
304
320
  "run",
305
321
  "image_id",
306
322
  "image_id_from_registry",
323
+ "is_docker_installed",
307
324
  "auth_token",
308
325
  "parse",
309
326
  "parse_repository_tag",
wandb/docker/auth.py CHANGED
@@ -106,7 +106,7 @@ def resolve_repository_name(repo_name: str) -> Tuple[str, str]:
106
106
  index_name, remote_name = split_repo_name(repo_name)
107
107
  if index_name[0] == "-" or index_name[-1] == "-":
108
108
  raise InvalidRepositoryError(
109
- f"Invalid index name ({index_name}). Cannot begin or end with a" " hyphen."
109
+ f"Invalid index name ({index_name}). Cannot begin or end with a hyphen."
110
110
  )
111
111
  return resolve_index_name(index_name), remote_name
112
112