wandb 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (194) hide show
  1. package_readme.md +95 -0
  2. wandb/__init__.py +2 -3
  3. wandb/agents/pyagent.py +0 -1
  4. wandb/analytics/sentry.py +2 -1
  5. wandb/apis/importers/internals/internal.py +0 -1
  6. wandb/apis/importers/internals/protocols.py +30 -56
  7. wandb/apis/importers/mlflow.py +13 -26
  8. wandb/apis/importers/wandb.py +8 -14
  9. wandb/apis/internal.py +0 -3
  10. wandb/apis/public/api.py +55 -3
  11. wandb/apis/public/artifacts.py +1 -0
  12. wandb/apis/public/files.py +1 -0
  13. wandb/apis/public/history.py +1 -0
  14. wandb/apis/public/jobs.py +17 -4
  15. wandb/apis/public/projects.py +1 -0
  16. wandb/apis/public/reports.py +1 -0
  17. wandb/apis/public/runs.py +15 -17
  18. wandb/apis/public/sweeps.py +1 -0
  19. wandb/apis/public/teams.py +1 -0
  20. wandb/apis/public/users.py +1 -0
  21. wandb/apis/reports/v1/_blocks.py +3 -7
  22. wandb/apis/reports/v2/gql.py +1 -0
  23. wandb/apis/reports/v2/interface.py +3 -4
  24. wandb/apis/reports/v2/internal.py +5 -8
  25. wandb/cli/cli.py +95 -22
  26. wandb/data_types.py +9 -6
  27. wandb/docker/__init__.py +1 -1
  28. wandb/env.py +38 -8
  29. wandb/errors/__init__.py +5 -0
  30. wandb/errors/term.py +10 -2
  31. wandb/filesync/step_checksum.py +1 -4
  32. wandb/filesync/step_prepare.py +4 -24
  33. wandb/filesync/step_upload.py +4 -106
  34. wandb/filesync/upload_job.py +0 -76
  35. wandb/integration/catboost/catboost.py +1 -1
  36. wandb/integration/fastai/__init__.py +1 -0
  37. wandb/integration/huggingface/resolver.py +2 -2
  38. wandb/integration/keras/__init__.py +1 -0
  39. wandb/integration/keras/callbacks/metrics_logger.py +1 -1
  40. wandb/integration/keras/keras.py +7 -7
  41. wandb/integration/langchain/wandb_tracer.py +1 -0
  42. wandb/integration/lightning/fabric/logger.py +1 -3
  43. wandb/integration/metaflow/metaflow.py +41 -6
  44. wandb/integration/openai/fine_tuning.py +77 -40
  45. wandb/integration/prodigy/prodigy.py +1 -1
  46. wandb/old/summary.py +1 -1
  47. wandb/plot/confusion_matrix.py +1 -1
  48. wandb/plot/pr_curve.py +2 -1
  49. wandb/plot/roc_curve.py +2 -1
  50. wandb/{plots → plot}/utils.py +13 -25
  51. wandb/proto/v3/wandb_internal_pb2.py +364 -332
  52. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  53. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  54. wandb/proto/v4/wandb_internal_pb2.py +322 -316
  55. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  56. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  57. wandb/proto/wandb_deprecated.py +7 -1
  58. wandb/proto/wandb_internal_codegen.py +3 -29
  59. wandb/sdk/artifacts/artifact.py +51 -20
  60. wandb/sdk/artifacts/artifact_download_logger.py +1 -0
  61. wandb/sdk/artifacts/artifact_file_cache.py +18 -4
  62. wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
  63. wandb/sdk/artifacts/artifact_manifest.py +1 -0
  64. wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
  65. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
  66. wandb/sdk/artifacts/artifact_saver.py +18 -27
  67. wandb/sdk/artifacts/artifact_state.py +1 -0
  68. wandb/sdk/artifacts/artifact_ttl.py +1 -0
  69. wandb/sdk/artifacts/exceptions.py +1 -0
  70. wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
  71. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
  72. wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
  73. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
  74. wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
  75. wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
  76. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
  77. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
  78. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
  79. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
  80. wandb/sdk/artifacts/storage_policy.py +2 -12
  81. wandb/sdk/data_types/_dtypes.py +8 -8
  82. wandb/sdk/data_types/base_types/media.py +3 -6
  83. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
  84. wandb/sdk/data_types/image.py +1 -1
  85. wandb/sdk/data_types/video.py +1 -1
  86. wandb/sdk/integration_utils/auto_logging.py +5 -6
  87. wandb/sdk/integration_utils/data_logging.py +10 -6
  88. wandb/sdk/interface/interface.py +86 -38
  89. wandb/sdk/interface/interface_shared.py +7 -13
  90. wandb/sdk/internal/datastore.py +1 -1
  91. wandb/sdk/internal/file_pusher.py +2 -5
  92. wandb/sdk/internal/file_stream.py +5 -18
  93. wandb/sdk/internal/handler.py +18 -2
  94. wandb/sdk/internal/internal.py +0 -1
  95. wandb/sdk/internal/internal_api.py +1 -129
  96. wandb/sdk/internal/internal_util.py +0 -1
  97. wandb/sdk/internal/job_builder.py +159 -45
  98. wandb/sdk/internal/profiler.py +1 -0
  99. wandb/sdk/internal/progress.py +0 -28
  100. wandb/sdk/internal/run.py +1 -0
  101. wandb/sdk/internal/sender.py +1 -2
  102. wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
  103. wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
  104. wandb/sdk/internal/system/assets/interfaces.py +6 -8
  105. wandb/sdk/internal/system/assets/open_metrics.py +2 -2
  106. wandb/sdk/internal/system/assets/trainium.py +1 -3
  107. wandb/sdk/launch/__init__.py +9 -1
  108. wandb/sdk/launch/_launch.py +9 -24
  109. wandb/sdk/launch/_launch_add.py +1 -3
  110. wandb/sdk/launch/_project_spec.py +188 -241
  111. wandb/sdk/launch/agent/agent.py +115 -48
  112. wandb/sdk/launch/agent/config.py +80 -14
  113. wandb/sdk/launch/builder/abstract.py +69 -1
  114. wandb/sdk/launch/builder/build.py +156 -555
  115. wandb/sdk/launch/builder/context_manager.py +235 -0
  116. wandb/sdk/launch/builder/docker_builder.py +8 -23
  117. wandb/sdk/launch/builder/kaniko_builder.py +161 -159
  118. wandb/sdk/launch/builder/noop.py +1 -0
  119. wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
  120. wandb/sdk/launch/create_job.py +68 -63
  121. wandb/sdk/launch/environment/abstract.py +1 -0
  122. wandb/sdk/launch/environment/gcp_environment.py +1 -0
  123. wandb/sdk/launch/environment/local_environment.py +1 -0
  124. wandb/sdk/launch/inputs/files.py +148 -0
  125. wandb/sdk/launch/inputs/internal.py +217 -0
  126. wandb/sdk/launch/inputs/manage.py +95 -0
  127. wandb/sdk/launch/loader.py +1 -0
  128. wandb/sdk/launch/registry/abstract.py +1 -0
  129. wandb/sdk/launch/registry/azure_container_registry.py +1 -0
  130. wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
  131. wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
  132. wandb/sdk/launch/registry/local_registry.py +1 -0
  133. wandb/sdk/launch/runner/abstract.py +1 -0
  134. wandb/sdk/launch/runner/kubernetes_monitor.py +4 -1
  135. wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
  136. wandb/sdk/launch/runner/local_container.py +2 -3
  137. wandb/sdk/launch/runner/local_process.py +8 -29
  138. wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
  139. wandb/sdk/launch/runner/vertex_runner.py +8 -7
  140. wandb/sdk/launch/sweeps/scheduler.py +7 -4
  141. wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
  142. wandb/sdk/launch/sweeps/utils.py +3 -3
  143. wandb/sdk/launch/utils.py +33 -140
  144. wandb/sdk/lib/_settings_toposort_generated.py +1 -5
  145. wandb/sdk/lib/fsm.py +8 -12
  146. wandb/sdk/lib/gitlib.py +4 -4
  147. wandb/sdk/lib/import_hooks.py +1 -1
  148. wandb/sdk/lib/lazyloader.py +0 -1
  149. wandb/sdk/lib/proto_util.py +23 -2
  150. wandb/sdk/lib/redirect.py +19 -14
  151. wandb/sdk/lib/retry.py +3 -2
  152. wandb/sdk/lib/run_moment.py +7 -1
  153. wandb/sdk/lib/tracelog.py +1 -1
  154. wandb/sdk/service/service.py +19 -16
  155. wandb/sdk/verify/verify.py +2 -1
  156. wandb/sdk/wandb_init.py +16 -63
  157. wandb/sdk/wandb_manager.py +2 -2
  158. wandb/sdk/wandb_require.py +5 -0
  159. wandb/sdk/wandb_run.py +164 -90
  160. wandb/sdk/wandb_settings.py +2 -48
  161. wandb/sdk/wandb_setup.py +1 -1
  162. wandb/sklearn/__init__.py +1 -0
  163. wandb/sklearn/plot/__init__.py +1 -0
  164. wandb/sklearn/plot/classifier.py +11 -12
  165. wandb/sklearn/plot/clusterer.py +2 -1
  166. wandb/sklearn/plot/regressor.py +1 -0
  167. wandb/sklearn/plot/shared.py +1 -0
  168. wandb/sklearn/utils.py +1 -0
  169. wandb/testing/relay.py +4 -4
  170. wandb/trigger.py +1 -0
  171. wandb/util.py +67 -54
  172. wandb/wandb_controller.py +2 -3
  173. wandb/wandb_torch.py +1 -2
  174. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
  175. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/RECORD +178 -188
  176. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
  177. wandb/bin/apple_gpu_stats +0 -0
  178. wandb/catboost/__init__.py +0 -9
  179. wandb/fastai/__init__.py +0 -9
  180. wandb/keras/__init__.py +0 -18
  181. wandb/lightgbm/__init__.py +0 -9
  182. wandb/plots/__init__.py +0 -6
  183. wandb/plots/explain_text.py +0 -36
  184. wandb/plots/heatmap.py +0 -81
  185. wandb/plots/named_entity.py +0 -43
  186. wandb/plots/part_of_speech.py +0 -50
  187. wandb/plots/plot_definitions.py +0 -768
  188. wandb/plots/precision_recall.py +0 -121
  189. wandb/plots/roc.py +0 -103
  190. wandb/sacred/__init__.py +0 -3
  191. wandb/xgboost/__init__.py +0 -9
  192. wandb-0.16.5.dist-info/top_level.txt +0 -1
  193. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
  194. {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
@@ -100,6 +100,10 @@ class InterfaceShared(InterfaceBase):
100
100
  rec = self._make_record(telemetry=telem)
101
101
  self._publish(rec)
102
102
 
103
+ def _publish_job_input(self, job_input: pb.JobInputRequest) -> MailboxHandle:
104
+ record = self._make_request(job_input=job_input)
105
+ return self._deliver_record(record)
106
+
103
107
  def _make_stats(self, stats_dict: dict) -> pb.StatsRecord:
104
108
  stats = pb.StatsRecord()
105
109
  stats.stats_type = pb.StatsRecord.StatsType.SYSTEM
@@ -147,6 +151,7 @@ class InterfaceShared(InterfaceBase):
147
151
  telemetry_record: Optional[pb.TelemetryRecordRequest] = None,
148
152
  get_system_metrics: Optional[pb.GetSystemMetricsRequest] = None,
149
153
  python_packages: Optional[pb.PythonPackagesRequest] = None,
154
+ job_input: Optional[pb.JobInputRequest] = None,
150
155
  ) -> pb.Record:
151
156
  request = pb.Request()
152
157
  if login:
@@ -207,6 +212,8 @@ class InterfaceShared(InterfaceBase):
207
212
  request.sync.CopyFrom(sync)
208
213
  elif python_packages:
209
214
  request.python_packages.CopyFrom(python_packages)
215
+ elif job_input:
216
+ request.job_input.CopyFrom(job_input)
210
217
  else:
211
218
  raise Exception("Invalid request")
212
219
  record = self._make_record(request=request)
@@ -239,9 +246,6 @@ class InterfaceShared(InterfaceBase):
239
246
  use_artifact: Optional[pb.UseArtifactRecord] = None,
240
247
  output: Optional[pb.OutputRecord] = None,
241
248
  output_raw: Optional[pb.OutputRawRecord] = None,
242
- launch_wandb_config_parameters: Optional[
243
- pb.LaunchWandbConfigParametersRecord
244
- ] = None,
245
249
  ) -> pb.Record:
246
250
  record = pb.Record()
247
251
  if run:
@@ -286,8 +290,6 @@ class InterfaceShared(InterfaceBase):
286
290
  record.output.CopyFrom(output)
287
291
  elif output_raw:
288
292
  record.output_raw.CopyFrom(output_raw)
289
- elif launch_wandb_config_parameters:
290
- record.wandb_config_parameters.CopyFrom(launch_wandb_config_parameters)
291
293
  else:
292
294
  raise Exception("Invalid record")
293
295
  return record
@@ -417,14 +419,6 @@ class InterfaceShared(InterfaceBase):
417
419
  rec = self._make_record(alert=proto_alert)
418
420
  self._publish(rec)
419
421
 
420
- def _publish_launch_wandb_config_parameters(
421
- self, launch_wandb_config_parameters: pb.LaunchWandbConfigParametersRecord
422
- ) -> None:
423
- rec = self._make_record(
424
- launch_wandb_config_parameters=launch_wandb_config_parameters
425
- )
426
- self._publish(rec)
427
-
428
422
  def _communicate_status(
429
423
  self, status: pb.StatusRequest
430
424
  ) -> Optional[pb.StatusResponse]:
@@ -52,7 +52,7 @@ try:
52
52
  bytes("", "ascii")
53
53
 
54
54
  def strtobytes(x):
55
- """strtobytes."""
55
+ """Strtobytes."""
56
56
  return bytes(x, "iso8859-1")
57
57
 
58
58
  # def bytestostr(x):
@@ -14,7 +14,7 @@ from wandb.sdk.lib.paths import LogicalPath
14
14
 
15
15
  if TYPE_CHECKING:
16
16
  from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
17
- from wandb.sdk.artifacts.artifact_saver import SaveFn, SaveFnAsync
17
+ from wandb.sdk.artifacts.artifact_saver import SaveFn
18
18
  from wandb.sdk.internal import file_stream, internal_api
19
19
  from wandb.sdk.internal.settings_static import SettingsStatic
20
20
 
@@ -148,11 +148,8 @@ class FilePusher:
148
148
  manifest: "ArtifactManifest",
149
149
  artifact_id: str,
150
150
  save_fn: "SaveFn",
151
- save_fn_async: "SaveFnAsync",
152
151
  ) -> None:
153
- event = step_checksum.RequestStoreManifestFiles(
154
- manifest, artifact_id, save_fn, save_fn_async
155
- )
152
+ event = step_checksum.RequestStoreManifestFiles(manifest, artifact_id, save_fn)
156
153
  self._incoming_queue.put(event)
157
154
 
158
155
  def commit_artifact(
@@ -1,4 +1,3 @@
1
- import base64
2
1
  import functools
3
2
  import itertools
4
3
  import json
@@ -53,7 +52,7 @@ logger = logging.getLogger(__name__)
53
52
 
54
53
  class Chunk(NamedTuple):
55
54
  filename: str
56
- data: Any
55
+ data: str
57
56
 
58
57
 
59
58
  class DefaultFilePolicy:
@@ -227,7 +226,7 @@ class CRDedupeFilePolicy(DefaultFilePolicy):
227
226
  prefix += token + " "
228
227
  return prefix, rest
229
228
 
230
- def process_chunks(self, chunks: List) -> List["ProcessedChunk"]:
229
+ def process_chunks(self, chunks: List[Chunk]) -> List["ProcessedChunk"]:
231
230
  r"""Process chunks.
232
231
 
233
232
  Args:
@@ -300,18 +299,6 @@ class CRDedupeFilePolicy(DefaultFilePolicy):
300
299
  return ret
301
300
 
302
301
 
303
- class BinaryFilePolicy(DefaultFilePolicy):
304
- def __init__(self) -> None:
305
- super().__init__()
306
- self._offset: int = 0
307
-
308
- def process_chunks(self, chunks: List[Chunk]) -> "ProcessedBinaryChunk":
309
- data = b"".join([c.data for c in chunks])
310
- enc = base64.b64encode(data).decode("ascii")
311
- self._offset += len(data)
312
- return {"offset": self._offset, "content": enc, "encoding": "base64"}
313
-
314
-
315
302
  class FileStreamApi:
316
303
  """Pushes chunks of files to our streaming endpoint.
317
304
 
@@ -585,12 +572,12 @@ class FileStreamApi:
585
572
  def enqueue_preempting(self) -> None:
586
573
  self._queue.put(self.Preempting())
587
574
 
588
- def push(self, filename: str, data: Any) -> None:
575
+ def push(self, filename: str, data: str) -> None:
589
576
  """Push a chunk of a file to the streaming endpoint.
590
577
 
591
578
  Arguments:
592
- filename: Name of file that this is a chunk of.
593
- data: File data.
579
+ filename: Name of file to append to.
580
+ data: Text to append to the file.
594
581
  """
595
582
  self._queue.put(Chunk(filename, data))
596
583
 
@@ -50,6 +50,18 @@ SummaryDict = Dict[str, Any]
50
50
 
51
51
  logger = logging.getLogger(__name__)
52
52
 
53
+ # Update (March 5, 2024): Since ~2020/2021, when constructing the summary
54
+ # object, we had replaced the artifact path for media types with the latest
55
+ # artifact path. The primary purpose of this was to support live updating of
56
+ # media objects in the UI (since the default artifact path was fully qualified
57
+ # and would not update). However, in March of 2024, a bug was discovered with
58
+ # this approach which causes this path to be incorrect in cases where the media
59
+ # object is logged to another artifact before being logged to the run. Setting
60
+ # this to `False` disables this copy behavior. The impact is that users will
61
+ # need to refresh to see updates. Ironically, this updating behavior is not
62
+ # currently supported in the UI, so the impact of this change is minimal.
63
+ REPLACE_SUMMARY_ART_PATH_WITH_LATEST = False
64
+
53
65
 
54
66
  def _dict_nested_set(target: Dict[str, Any], key_list: Sequence[str], v: Any) -> None:
55
67
  # recurse down the dictionary structure:
@@ -371,7 +383,11 @@ class HandleManager:
371
383
  updated = True
372
384
  return updated
373
385
  # If the dict is a media object, update the pointer to the latest alias
374
- elif isinstance(v, dict) and handler_util.metric_is_wandb_dict(v):
386
+ elif (
387
+ REPLACE_SUMMARY_ART_PATH_WITH_LATEST
388
+ and isinstance(v, dict)
389
+ and handler_util.metric_is_wandb_dict(v)
390
+ ):
375
391
  if "_latest_artifact_path" in v and "artifact_path" in v:
376
392
  # TODO: Make non-destructive?
377
393
  v["artifact_path"] = v["_latest_artifact_path"]
@@ -381,7 +397,7 @@ class HandleManager:
381
397
  def _update_summary_media_objects(self, v: Dict[str, Any]) -> Dict[str, Any]:
382
398
  # For now, non-recursive - just top level
383
399
  for nk, nv in v.items():
384
- if (
400
+ if REPLACE_SUMMARY_ART_PATH_WITH_LATEST and (
385
401
  isinstance(nv, dict)
386
402
  and handler_util.metric_is_wandb_dict(nv)
387
403
  and "_latest_artifact_path" in nv
@@ -12,7 +12,6 @@ Threads:
12
12
 
13
13
  """
14
14
 
15
-
16
15
  import atexit
17
16
  import logging
18
17
  import os
@@ -1,5 +1,4 @@
1
1
  import ast
2
- import asyncio
3
2
  import base64
4
3
  import datetime
5
4
  import functools
@@ -49,7 +48,7 @@ from ..lib import retry
49
48
  from ..lib.filenames import DIFF_FNAME, METADATA_FNAME
50
49
  from ..lib.gitlib import GitRepo
51
50
  from . import context
52
- from .progress import AsyncProgress, Progress
51
+ from .progress import Progress
53
52
 
54
53
  logger = logging.getLogger(__name__)
55
54
 
@@ -121,13 +120,6 @@ if TYPE_CHECKING:
121
120
  SweepState = Literal["RUNNING", "PAUSED", "CANCELED", "FINISHED"]
122
121
  Number = Union[int, float]
123
122
 
124
- # This funny if/else construction is the simplest thing I've found that
125
- # works at runtime, satisfies Mypy, and gives autocomplete in VSCode:
126
- if TYPE_CHECKING:
127
- import httpx
128
- else:
129
- httpx = util.get_module("httpx")
130
-
131
123
  # class _MappingSupportsCopy(Protocol):
132
124
  # def copy(self) -> "_MappingSupportsCopy": ...
133
125
  # def keys(self) -> Iterable: ...
@@ -161,23 +153,6 @@ def check_httpclient_logger_handler() -> None:
161
153
  httpclient_logger.addHandler(root_logger.handlers[0])
162
154
 
163
155
 
164
- def check_httpx_exc_retriable(exc: Exception) -> bool:
165
- retriable_codes = (308, 408, 409, 429, 500, 502, 503, 504)
166
- return (
167
- isinstance(exc, (httpx.TimeoutException, httpx.NetworkError))
168
- or (
169
- isinstance(exc, httpx.HTTPStatusError)
170
- and exc.response.status_code in retriable_codes
171
- )
172
- or (
173
- isinstance(exc, httpx.HTTPStatusError)
174
- and exc.response.status_code == 400
175
- and "x-amz-meta-md5" in exc.request.headers
176
- and "RequestTimeout" in str(exc.response.content)
177
- )
178
- )
179
-
180
-
181
156
  class _ThreadLocalData(threading.local):
182
157
  context: Optional[context.Context]
183
158
 
@@ -286,10 +261,6 @@ class Api:
286
261
  )
287
262
  )
288
263
 
289
- # httpx is an optional dependency, so we lazily instantiate the client
290
- # only when we need it
291
- self._async_httpx_client: Optional[httpx.AsyncClient] = None
292
-
293
264
  self.retry_callback = retry_callback
294
265
  self._retry_gql = retry.Retry(
295
266
  self.execute,
@@ -2794,105 +2765,6 @@ class Api:
2794
2765
 
2795
2766
  return response
2796
2767
 
2797
- async def upload_file_async(
2798
- self,
2799
- url: str,
2800
- file: IO[bytes],
2801
- callback: Optional["ProgressFn"] = None,
2802
- extra_headers: Optional[Dict[str, str]] = None,
2803
- ) -> None:
2804
- """An async not-quite-equivalent version of `upload_file`.
2805
-
2806
- Differences from `upload_file`:
2807
- - This method doesn't implement Azure uploads. (The Azure SDK supports
2808
- async, but it's nontrivial to use it here.) If the upload looks like
2809
- it's destined for Azure, this method will delegate to the sync impl.
2810
- - Consequently, this method doesn't return the response object.
2811
- (Because it might fall back to the sync impl, it would sometimes
2812
- return a `requests.Response` and sometimes an `httpx.Response`.)
2813
- - This method doesn't wrap retryable errors in `TransientError`.
2814
- It leaves that determination to the caller.
2815
- """
2816
- check_httpclient_logger_handler()
2817
- must_delegate = False
2818
-
2819
- if httpx is None:
2820
- wandb.termwarn( # type: ignore[unreachable]
2821
- "async file-uploads require `pip install wandb[async]`; falling back to sync implementation",
2822
- repeat=False,
2823
- )
2824
- must_delegate = True
2825
-
2826
- if extra_headers is not None and "x-ms-blob-type" in extra_headers:
2827
- wandb.termwarn(
2828
- "async file-uploads don't support Azure; falling back to sync implementation",
2829
- repeat=False,
2830
- )
2831
- must_delegate = True
2832
-
2833
- if must_delegate:
2834
- await asyncio.get_event_loop().run_in_executor(
2835
- None,
2836
- lambda: self.upload_file_retry(
2837
- url=url,
2838
- file=file,
2839
- callback=callback,
2840
- extra_headers=extra_headers,
2841
- ),
2842
- )
2843
- return
2844
-
2845
- if self._async_httpx_client is None:
2846
- self._async_httpx_client = httpx.AsyncClient()
2847
-
2848
- progress = AsyncProgress(Progress(file, callback=callback))
2849
-
2850
- try:
2851
- response = await self._async_httpx_client.put(
2852
- url=url,
2853
- content=progress,
2854
- headers={
2855
- "Content-Length": str(len(progress)),
2856
- **(extra_headers if extra_headers is not None else {}),
2857
- },
2858
- )
2859
- response.raise_for_status()
2860
- except Exception as e:
2861
- progress.rewind()
2862
- logger.error(f"upload_file_async exception {url}: {e}")
2863
- if isinstance(e, httpx.RequestError):
2864
- logger.error(f"upload_file_async request headers: {e.request.headers}")
2865
- if isinstance(e, httpx.HTTPStatusError):
2866
- logger.error(f"upload_file_async response body: {e.response.content!r}")
2867
- raise
2868
-
2869
- async def upload_file_retry_async(
2870
- self,
2871
- url: str,
2872
- file: IO[bytes],
2873
- callback: Optional["ProgressFn"] = None,
2874
- extra_headers: Optional[Dict[str, str]] = None,
2875
- num_retries: int = 100,
2876
- ) -> None:
2877
- backoff = retry.FilteredBackoff(
2878
- filter=check_httpx_exc_retriable,
2879
- wrapped=retry.ExponentialBackoff(
2880
- initial_sleep=datetime.timedelta(seconds=1),
2881
- max_sleep=datetime.timedelta(seconds=60),
2882
- max_retries=num_retries,
2883
- timeout_at=datetime.datetime.now() + datetime.timedelta(days=7),
2884
- ),
2885
- )
2886
-
2887
- await retry.retry_async(
2888
- backoff=backoff,
2889
- fn=self.upload_file_async,
2890
- url=url,
2891
- file=file,
2892
- callback=callback,
2893
- extra_headers=extra_headers,
2894
- )
2895
-
2896
2768
  @normalize_exceptions
2897
2769
  def register_agent(
2898
2770
  self,
@@ -4,7 +4,6 @@ Collection of classes to support the internal process.
4
4
 
5
5
  """
6
6
 
7
-
8
7
  import logging
9
8
  import queue
10
9
  import sys
@@ -1,4 +1,5 @@
1
1
  """job builder."""
2
+
2
3
  import json
3
4
  import logging
4
5
  import os
@@ -31,6 +32,43 @@ JOB_ARTIFACT_TYPE = "job"
31
32
  LOG_LEVEL = Literal["log", "warn", "error"]
32
33
 
33
34
 
35
+ class Version:
36
+ def __init__(self, major: int, minor: int, patch: int):
37
+ self._major = major
38
+ self._minor = minor
39
+ self._patch = patch
40
+
41
+ def __repr__(self) -> str:
42
+ return f"{self._major}.{self._minor}.{self._patch}"
43
+
44
+ def __lt__(self, other: "Version") -> bool:
45
+ if self._major < other._major:
46
+ return True
47
+ elif self._major == other._major:
48
+ if self._minor < other._minor:
49
+ return True
50
+ elif self._minor == other._minor:
51
+ if self._patch < other._patch:
52
+ return True
53
+ return False
54
+
55
+ def __eq__(self, other: object) -> bool:
56
+ if not isinstance(other, Version):
57
+ return NotImplemented
58
+ return (
59
+ self._major == other._major
60
+ and self._minor == other._minor
61
+ and self._patch == other._patch
62
+ )
63
+
64
+
65
+ # Minimum supported wandb version for keys in the source dict of wandb-job.json
66
+ SOURCE_KEYS_MIN_SUPPORTED_VERSION = {
67
+ "dockerfile": Version(0, 17, 0),
68
+ "build_context": Version(0, 17, 0),
69
+ }
70
+
71
+
34
72
  class GitInfo(TypedDict):
35
73
  remote: str
36
74
  commit: str
@@ -40,12 +78,16 @@ class GitSourceDict(TypedDict):
40
78
  git: GitInfo
41
79
  entrypoint: List[str]
42
80
  notebook: bool
81
+ build_context: Optional[str]
82
+ dockerfile: Optional[str]
43
83
 
44
84
 
45
85
  class ArtifactSourceDict(TypedDict):
46
86
  artifact: str
47
87
  entrypoint: List[str]
48
88
  notebook: bool
89
+ build_context: Optional[str]
90
+ dockerfile: Optional[str]
49
91
 
50
92
 
51
93
  class ImageSourceDict(TypedDict):
@@ -72,6 +114,19 @@ class ArtifactInfoForJob(TypedDict):
72
114
  name: str
73
115
 
74
116
 
117
+ def get_min_supported_for_source_dict(
118
+ source: Union[GitSourceDict, ArtifactSourceDict, ImageSourceDict],
119
+ ) -> Optional[Version]:
120
+ """Get the minimum supported wandb version the source dict of wandb-job.json."""
121
+ min_seen = None
122
+ for key in source:
123
+ new_ver = SOURCE_KEYS_MIN_SUPPORTED_VERSION.get(key)
124
+ if new_ver:
125
+ if min_seen is None or new_ver < min_seen:
126
+ min_seen = new_ver
127
+ return min_seen
128
+
129
+
75
130
  class JobArtifact(Artifact):
76
131
  def __init__(self, name: str, *args: Any, **kwargs: Any):
77
132
  super().__init__(name, "placeholder", *args, **kwargs)
@@ -105,9 +160,9 @@ class JobBuilder:
105
160
  self._disable = settings.disable_job_creation
106
161
  self._partial_source = None
107
162
  self._aliases = []
108
- self._source_type: Optional[
109
- Literal["repo", "artifact", "image"]
110
- ] = settings.job_source # type: ignore[assignment]
163
+ self._source_type: Optional[Literal["repo", "artifact", "image"]] = (
164
+ settings.job_source # type: ignore[assignment]
165
+ )
111
166
  self._is_notebook_run = self._get_is_notebook_run()
112
167
  self._verbose = verbose
113
168
 
@@ -196,6 +251,8 @@ class JobBuilder:
196
251
  "git": {"remote": remote, "commit": commit},
197
252
  "entrypoint": entrypoint,
198
253
  "notebook": self._is_notebook_run,
254
+ "build_context": metadata.get("build_context"),
255
+ "dockerfile": metadata.get("dockerfile"),
199
256
  }
200
257
  name = self._make_job_name(f"{remote}_{program_relpath}")
201
258
 
@@ -246,6 +303,8 @@ class JobBuilder:
246
303
  "entrypoint": entrypoint,
247
304
  "notebook": self._is_notebook_run,
248
305
  "artifact": f"wandb-artifact://_id/{self._logged_code_artifact['id']}",
306
+ "build_context": metadata.get("build_context"),
307
+ "dockerfile": metadata.get("dockerfile"),
249
308
  }
250
309
  name = self._make_job_name(self._logged_code_artifact["name"])
251
310
 
@@ -275,7 +334,7 @@ class JobBuilder:
275
334
  return source, name
276
335
 
277
336
  def _make_job_name(self, input_str: str) -> str:
278
- """Use job name from settings if provided, else use programatic name."""
337
+ """Use job name from settings if provided, else use programmatic name."""
279
338
  if self._settings.job_name:
280
339
  return self._settings.job_name
281
340
 
@@ -292,17 +351,6 @@ class JobBuilder:
292
351
  if metadata.get("entrypoint"):
293
352
  entrypoint: List[str] = metadata["entrypoint"]
294
353
  return entrypoint
295
-
296
- # if entrypoint is not in metadata, then construct from python
297
- assert metadata.get("python")
298
-
299
- python = metadata["python"]
300
- if python.count(".") > 1:
301
- python = ".".join(python.split(".")[:2])
302
-
303
- entrypoint = [f"python{python}", program_relpath]
304
- return entrypoint
305
-
306
354
  # job is being built from a run
307
355
  entrypoint = [os.path.basename(sys.executable), program_relpath]
308
356
 
@@ -314,7 +362,73 @@ class JobBuilder:
314
362
  def _is_colab_run(self) -> bool:
315
363
  return hasattr(self._settings, "_colab") and bool(self._settings._colab)
316
364
 
317
- def build(self) -> Optional[Artifact]:
365
+ def _build_job_source(
366
+ self,
367
+ source_type: str,
368
+ program_relpath: Optional[str],
369
+ metadata: Dict[str, Any],
370
+ ) -> Tuple[
371
+ Union[GitSourceDict, ArtifactSourceDict, ImageSourceDict, None],
372
+ Optional[str],
373
+ ]:
374
+ """Construct a job source dict and name from the current run.
375
+
376
+ Arguments:
377
+ source_type (str): The type of source to build the job from. One of
378
+ "repo", "artifact", or "image".
379
+ """
380
+ source: Union[
381
+ GitSourceDict,
382
+ ArtifactSourceDict,
383
+ ImageSourceDict,
384
+ None,
385
+ ] = None
386
+
387
+ if source_type == "repo":
388
+ source, name = self._build_repo_job_source(
389
+ program_relpath or "",
390
+ metadata,
391
+ )
392
+ elif source_type == "artifact":
393
+ source, name = self._build_artifact_job_source(
394
+ program_relpath or "",
395
+ metadata,
396
+ )
397
+ elif source_type == "image" and self._has_image_job_ingredients(metadata):
398
+ source, name = self._build_image_job_source(metadata)
399
+ else:
400
+ source = None
401
+
402
+ if source is None:
403
+ if source_type:
404
+ self._log_if_verbose(
405
+ f"Source type is set to '{source_type}' but some required information is missing "
406
+ "from the environment. A job will not be created from this run. See "
407
+ "https://docs.wandb.ai/guides/launch/create-job",
408
+ "warn",
409
+ )
410
+ return None, None
411
+
412
+ return source, name
413
+
414
+ def build(
415
+ self,
416
+ build_context: Optional[str] = None,
417
+ dockerfile: Optional[str] = None,
418
+ ) -> Optional[Artifact]:
419
+ """Build a job artifact from the current run.
420
+
421
+ Arguments:
422
+ build_context (Optional[str]): Path within the job source code to
423
+ the image build context. Saved as part of the job for future
424
+ builds.
425
+ dockerfile (Optional[str]): Path within the build context the
426
+ Dockerfile. Saved as part of the job for future builds.
427
+
428
+ Returns:
429
+ Optional[Artifact]: The job artifact if it was successfully built,
430
+ otherwise None.
431
+ """
318
432
  _logger.info("Attempting to build job artifact")
319
433
  if not os.path.exists(
320
434
  os.path.join(self._settings.files_dir, REQUIREMENTS_FNAME)
@@ -331,6 +445,8 @@ class JobBuilder:
331
445
  "warn",
332
446
  )
333
447
  return None
448
+ metadata["dockerfile"] = dockerfile
449
+ metadata["build_context"] = build_context
334
450
 
335
451
  runtime: Optional[str] = metadata.get("python")
336
452
  # can't build a job without a python version
@@ -374,45 +490,39 @@ class JobBuilder:
374
490
  return None
375
491
 
376
492
  program_relpath = self._get_program_relpath(source_type, metadata)
377
- if source_type != "image" and not program_relpath:
493
+ if (
494
+ not metadata.get("_partial")
495
+ and source_type != "image"
496
+ and not program_relpath
497
+ ):
378
498
  self._log_if_verbose(
379
499
  "No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job",
380
500
  "warn",
381
501
  )
382
502
  return None
383
503
 
384
- source: Union[
385
- Optional[GitSourceDict],
386
- Optional[ArtifactSourceDict],
387
- Optional[ImageSourceDict],
388
- ] = None
389
-
390
- # make source dict
391
- if source_type == "repo":
392
- assert program_relpath
393
- source, name = self._build_repo_job_source(program_relpath, metadata)
394
- elif source_type == "artifact":
395
- assert program_relpath
396
- source, name = self._build_artifact_job_source(
397
- program_relpath, metadata
398
- )
399
- elif source_type == "image" and self._has_image_job_ingredients(metadata):
400
- source, name = self._build_image_job_source(metadata)
401
- else:
402
- source = None
403
-
504
+ source, name = self._build_job_source(
505
+ source_type,
506
+ program_relpath,
507
+ metadata,
508
+ )
404
509
  if source is None:
405
- if source_type:
406
- self._log_if_verbose(
407
- f"Source type is set to '{source_type}' but some required information is missing "
408
- "from the environment. A job will not be created from this run. See "
409
- "https://docs.wandb.ai/guides/launch/create-job",
410
- "warn",
411
- )
412
510
  return None
413
511
 
512
+ if build_context:
513
+ source["build_context"] = build_context # type: ignore[typeddict-item]
514
+ if dockerfile:
515
+ source["dockerfile"] = dockerfile # type: ignore[typeddict-item]
516
+
517
+ # Pop any keys that are initialized to None. The current TypedDict
518
+ # system for source dicts requires all keys to be present, but we
519
+ # don't want to include keys that are None in the final dict.
520
+ for key in list(source.keys()):
521
+ if source[key] is None: # type: ignore[literal-required]
522
+ source.pop(key) # type: ignore[literal-require,misc]
523
+
414
524
  source_info = {
415
- "_version": "v0",
525
+ "_version": str(get_min_supported_for_source_dict(source) or "v0"),
416
526
  "source_type": source_type,
417
527
  "source": source,
418
528
  "input_types": input_types,
@@ -530,6 +640,8 @@ def convert_use_artifact_to_job_source(
530
640
  },
531
641
  "entrypoint": entrypoint,
532
642
  "notebook": source_info.source.git.notebook,
643
+ "build_context": None,
644
+ "dockerfile": None,
533
645
  }
534
646
  source_info_dict.update({"source": git_source})
535
647
  elif source_info.source_type == "artifact":
@@ -538,6 +650,8 @@ def convert_use_artifact_to_job_source(
538
650
  "artifact": source_info.source.artifact.artifact,
539
651
  "entrypoint": entrypoint,
540
652
  "notebook": source_info.source.artifact.notebook,
653
+ "build_context": None,
654
+ "dockerfile": None,
541
655
  }
542
656
  source_info_dict.update({"source": artifact_source})
543
657
  elif source_info.source_type == "image":