wandb 0.15.3__py3-none-any.whl → 0.15.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. wandb/__init__.py +1 -1
  2. wandb/analytics/sentry.py +1 -0
  3. wandb/apis/importers/base.py +20 -5
  4. wandb/apis/importers/mlflow.py +7 -1
  5. wandb/apis/internal.py +12 -0
  6. wandb/apis/public.py +247 -1387
  7. wandb/apis/reports/_panels.py +58 -35
  8. wandb/beta/workflows.py +6 -7
  9. wandb/cli/cli.py +130 -60
  10. wandb/data_types.py +3 -1
  11. wandb/filesync/dir_watcher.py +21 -27
  12. wandb/filesync/step_checksum.py +8 -8
  13. wandb/filesync/step_prepare.py +23 -10
  14. wandb/filesync/step_upload.py +13 -13
  15. wandb/filesync/upload_job.py +4 -8
  16. wandb/integration/cohere/__init__.py +3 -0
  17. wandb/integration/cohere/cohere.py +21 -0
  18. wandb/integration/cohere/resolver.py +347 -0
  19. wandb/integration/gym/__init__.py +4 -6
  20. wandb/integration/huggingface/__init__.py +3 -0
  21. wandb/integration/huggingface/huggingface.py +18 -0
  22. wandb/integration/huggingface/resolver.py +213 -0
  23. wandb/integration/langchain/wandb_tracer.py +16 -179
  24. wandb/integration/openai/__init__.py +1 -3
  25. wandb/integration/openai/openai.py +11 -143
  26. wandb/integration/openai/resolver.py +111 -38
  27. wandb/integration/sagemaker/config.py +2 -2
  28. wandb/integration/tensorboard/log.py +4 -4
  29. wandb/old/settings.py +24 -7
  30. wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
  31. wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
  32. wandb/proto/wandb_deprecated.py +3 -1
  33. wandb/sdk/__init__.py +1 -1
  34. wandb/sdk/artifacts/__init__.py +0 -0
  35. wandb/sdk/artifacts/artifact.py +2101 -0
  36. wandb/sdk/artifacts/artifact_download_logger.py +42 -0
  37. wandb/sdk/artifacts/artifact_manifest.py +67 -0
  38. wandb/sdk/artifacts/artifact_manifest_entry.py +159 -0
  39. wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
  40. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +91 -0
  41. wandb/sdk/{internal → artifacts}/artifact_saver.py +6 -5
  42. wandb/sdk/artifacts/artifact_state.py +10 -0
  43. wandb/sdk/{interface/artifacts/artifact_cache.py → artifacts/artifacts_cache.py} +22 -12
  44. wandb/sdk/artifacts/exceptions.py +55 -0
  45. wandb/sdk/artifacts/storage_handler.py +59 -0
  46. wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
  47. wandb/sdk/artifacts/storage_handlers/azure_handler.py +192 -0
  48. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +224 -0
  49. wandb/sdk/artifacts/storage_handlers/http_handler.py +112 -0
  50. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +134 -0
  51. wandb/sdk/artifacts/storage_handlers/multi_handler.py +53 -0
  52. wandb/sdk/artifacts/storage_handlers/s3_handler.py +301 -0
  53. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +67 -0
  54. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +132 -0
  55. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +72 -0
  56. wandb/sdk/artifacts/storage_layout.py +6 -0
  57. wandb/sdk/artifacts/storage_policies/__init__.py +0 -0
  58. wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +61 -0
  59. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +386 -0
  60. wandb/sdk/{interface/artifacts/artifact_storage.py → artifacts/storage_policy.py} +5 -57
  61. wandb/sdk/data_types/_dtypes.py +7 -12
  62. wandb/sdk/data_types/base_types/json_metadata.py +3 -2
  63. wandb/sdk/data_types/base_types/media.py +8 -8
  64. wandb/sdk/data_types/base_types/wb_value.py +12 -13
  65. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +5 -6
  66. wandb/sdk/data_types/helper_types/classes.py +6 -8
  67. wandb/sdk/data_types/helper_types/image_mask.py +5 -6
  68. wandb/sdk/data_types/histogram.py +4 -3
  69. wandb/sdk/data_types/html.py +3 -4
  70. wandb/sdk/data_types/image.py +11 -9
  71. wandb/sdk/data_types/molecule.py +5 -3
  72. wandb/sdk/data_types/object_3d.py +7 -5
  73. wandb/sdk/data_types/plotly.py +3 -2
  74. wandb/sdk/data_types/saved_model.py +11 -11
  75. wandb/sdk/data_types/trace_tree.py +5 -4
  76. wandb/sdk/data_types/utils.py +3 -5
  77. wandb/sdk/data_types/video.py +5 -4
  78. wandb/sdk/integration_utils/auto_logging.py +215 -0
  79. wandb/sdk/interface/interface.py +15 -15
  80. wandb/sdk/internal/file_pusher.py +8 -16
  81. wandb/sdk/internal/file_stream.py +5 -11
  82. wandb/sdk/internal/handler.py +13 -1
  83. wandb/sdk/internal/internal_api.py +287 -13
  84. wandb/sdk/internal/job_builder.py +119 -30
  85. wandb/sdk/internal/sender.py +6 -26
  86. wandb/sdk/internal/settings_static.py +2 -0
  87. wandb/sdk/internal/system/assets/__init__.py +2 -0
  88. wandb/sdk/internal/system/assets/gpu.py +42 -0
  89. wandb/sdk/internal/system/assets/gpu_amd.py +216 -0
  90. wandb/sdk/internal/system/env_probe_helpers.py +13 -0
  91. wandb/sdk/internal/system/system_info.py +3 -3
  92. wandb/sdk/internal/tb_watcher.py +32 -22
  93. wandb/sdk/internal/thread_local_settings.py +18 -0
  94. wandb/sdk/launch/_project_spec.py +57 -11
  95. wandb/sdk/launch/agent/agent.py +147 -65
  96. wandb/sdk/launch/agent/job_status_tracker.py +34 -0
  97. wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
  98. wandb/sdk/launch/builder/abstract.py +5 -1
  99. wandb/sdk/launch/builder/build.py +21 -18
  100. wandb/sdk/launch/builder/docker_builder.py +10 -4
  101. wandb/sdk/launch/builder/kaniko_builder.py +113 -23
  102. wandb/sdk/launch/builder/noop.py +6 -3
  103. wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +46 -14
  104. wandb/sdk/launch/environment/aws_environment.py +3 -2
  105. wandb/sdk/launch/environment/azure_environment.py +124 -0
  106. wandb/sdk/launch/environment/gcp_environment.py +2 -4
  107. wandb/sdk/launch/environment/local_environment.py +1 -1
  108. wandb/sdk/launch/errors.py +19 -0
  109. wandb/sdk/launch/github_reference.py +32 -19
  110. wandb/sdk/launch/launch.py +3 -8
  111. wandb/sdk/launch/launch_add.py +6 -2
  112. wandb/sdk/launch/loader.py +21 -2
  113. wandb/sdk/launch/registry/azure_container_registry.py +132 -0
  114. wandb/sdk/launch/registry/elastic_container_registry.py +39 -5
  115. wandb/sdk/launch/registry/google_artifact_registry.py +68 -26
  116. wandb/sdk/launch/registry/local_registry.py +2 -1
  117. wandb/sdk/launch/runner/abstract.py +24 -3
  118. wandb/sdk/launch/runner/kubernetes_runner.py +479 -26
  119. wandb/sdk/launch/runner/local_container.py +103 -51
  120. wandb/sdk/launch/runner/local_process.py +1 -1
  121. wandb/sdk/launch/runner/sagemaker_runner.py +60 -10
  122. wandb/sdk/launch/runner/vertex_runner.py +10 -5
  123. wandb/sdk/launch/sweeps/__init__.py +7 -9
  124. wandb/sdk/launch/sweeps/scheduler.py +307 -77
  125. wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
  126. wandb/sdk/launch/sweeps/utils.py +82 -35
  127. wandb/sdk/launch/utils.py +89 -75
  128. wandb/sdk/lib/_settings_toposort_generated.py +7 -0
  129. wandb/sdk/lib/capped_dict.py +26 -0
  130. wandb/sdk/lib/{git.py → gitlib.py} +76 -59
  131. wandb/sdk/lib/hashutil.py +12 -4
  132. wandb/sdk/lib/paths.py +96 -8
  133. wandb/sdk/lib/sock_client.py +2 -2
  134. wandb/sdk/lib/timer.py +1 -0
  135. wandb/sdk/service/server.py +22 -9
  136. wandb/sdk/service/server_sock.py +1 -1
  137. wandb/sdk/service/service.py +27 -8
  138. wandb/sdk/verify/verify.py +4 -7
  139. wandb/sdk/wandb_config.py +2 -6
  140. wandb/sdk/wandb_init.py +57 -53
  141. wandb/sdk/wandb_require.py +7 -0
  142. wandb/sdk/wandb_run.py +61 -223
  143. wandb/sdk/wandb_settings.py +28 -4
  144. wandb/testing/relay.py +15 -2
  145. wandb/util.py +74 -36
  146. {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/METADATA +15 -9
  147. {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/RECORD +151 -116
  148. {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/entry_points.txt +1 -0
  149. wandb/integration/langchain/util.py +0 -191
  150. wandb/sdk/interface/artifacts/__init__.py +0 -33
  151. wandb/sdk/interface/artifacts/artifact.py +0 -615
  152. wandb/sdk/interface/artifacts/artifact_manifest.py +0 -131
  153. wandb/sdk/wandb_artifacts.py +0 -2226
  154. {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/LICENSE +0 -0
  155. {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/WHEEL +0 -0
  156. {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,42 @@
1
+ """Artifact download logger."""
2
+ import multiprocessing.dummy
3
+ import time
4
+ from typing import Callable
5
+
6
+ from wandb.errors.term import termlog
7
+
8
+
9
+ class ArtifactDownloadLogger:
10
+ def __init__(
11
+ self,
12
+ nfiles: int,
13
+ clock_for_testing: Callable[[], float] = time.monotonic,
14
+ termlog_for_testing: Callable[..., None] = termlog,
15
+ ) -> None:
16
+ self._nfiles = nfiles
17
+ self._clock = clock_for_testing
18
+ self._termlog = termlog_for_testing
19
+
20
+ self._n_files_downloaded = 0
21
+ self._spinner_index = 0
22
+ self._last_log_time = self._clock()
23
+ self._lock = multiprocessing.dummy.Lock()
24
+
25
+ def notify_downloaded(self) -> None:
26
+ with self._lock:
27
+ self._n_files_downloaded += 1
28
+ if self._n_files_downloaded == self._nfiles:
29
+ self._termlog(
30
+ f" {self._nfiles} of {self._nfiles} files downloaded. ",
31
+ # ^ trailing spaces to wipe out ellipsis from previous logs
32
+ newline=True,
33
+ )
34
+ self._last_log_time = self._clock()
35
+ elif self._clock() - self._last_log_time > 0.1:
36
+ self._spinner_index += 1
37
+ spinner = r"-\|/"[self._spinner_index % 4]
38
+ self._termlog(
39
+ f"{spinner} {self._n_files_downloaded} of {self._nfiles} files downloaded...\r",
40
+ newline=False,
41
+ )
42
+ self._last_log_time = self._clock()
@@ -0,0 +1,67 @@
1
+ """Artifact manifest."""
2
+ from typing import TYPE_CHECKING, Dict, List, Mapping, Optional
3
+
4
+ from wandb.sdk.lib.hashutil import HexMD5
5
+
6
+ if TYPE_CHECKING:
7
+ from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
8
+ from wandb.sdk.artifacts.storage_policies.wandb_storage_policy import (
9
+ WandbStoragePolicy,
10
+ )
11
+
12
+
13
+ class ArtifactManifest:
14
+ entries: Dict[str, "ArtifactManifestEntry"]
15
+
16
+ @classmethod
17
+ def from_manifest_json(cls, manifest_json: Dict) -> "ArtifactManifest":
18
+ if "version" not in manifest_json:
19
+ raise ValueError("Invalid manifest format. Must contain version field.")
20
+ version = manifest_json["version"]
21
+ for sub in cls.__subclasses__():
22
+ if sub.version() == version:
23
+ return sub.from_manifest_json(manifest_json)
24
+ raise ValueError("Invalid manifest version.")
25
+
26
+ @classmethod
27
+ def version(cls) -> int:
28
+ raise NotImplementedError
29
+
30
+ def __init__(
31
+ self,
32
+ storage_policy: "WandbStoragePolicy",
33
+ entries: Optional[Mapping[str, "ArtifactManifestEntry"]] = None,
34
+ ) -> None:
35
+ self.storage_policy = storage_policy
36
+ self.entries = dict(entries) if entries else {}
37
+
38
+ def to_manifest_json(self) -> Dict:
39
+ raise NotImplementedError
40
+
41
+ def digest(self) -> HexMD5:
42
+ raise NotImplementedError
43
+
44
+ def add_entry(self, entry: "ArtifactManifestEntry") -> None:
45
+ if (
46
+ entry.path in self.entries
47
+ and entry.digest != self.entries[entry.path].digest
48
+ ):
49
+ raise ValueError("Cannot add the same path twice: %s" % entry.path)
50
+ self.entries[entry.path] = entry
51
+
52
+ def remove_entry(self, entry: "ArtifactManifestEntry") -> None:
53
+ if entry.path not in self.entries:
54
+ raise FileNotFoundError(f"Cannot remove missing entry: '{entry.path}'")
55
+ del self.entries[entry.path]
56
+
57
+ def get_entry_by_path(self, path: str) -> Optional["ArtifactManifestEntry"]:
58
+ return self.entries.get(path)
59
+
60
+ def get_entries_in_directory(self, directory: str) -> List["ArtifactManifestEntry"]:
61
+ return [
62
+ self.entries[entry_key]
63
+ for entry_key in self.entries
64
+ if entry_key.startswith(
65
+ directory + "/"
66
+ ) # entries use forward slash even for windows
67
+ ]
@@ -0,0 +1,159 @@
1
+ """Artifact manifest entry."""
2
+ import os
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING, Dict, Optional, Union
5
+ from urllib.parse import urlparse
6
+
7
+ import wandb
8
+ from wandb import util
9
+ from wandb.errors.term import termwarn
10
+ from wandb.sdk.lib import filesystem
11
+ from wandb.sdk.lib.hashutil import (
12
+ B64MD5,
13
+ ETag,
14
+ b64_to_hex_id,
15
+ hex_to_b64_id,
16
+ md5_file_b64,
17
+ )
18
+ from wandb.sdk.lib.paths import FilePathStr, LogicalPath, StrPath, URIStr
19
+
20
+ if TYPE_CHECKING:
21
+ from wandb.apis.public import RetryingClient
22
+ from wandb.sdk.artifacts.artifact import Artifact
23
+
24
+
25
+ class ArtifactManifestEntry:
26
+ """A single entry in an artifact manifest."""
27
+
28
+ path: LogicalPath
29
+ digest: Union[B64MD5, URIStr, FilePathStr, ETag]
30
+ ref: Optional[Union[FilePathStr, URIStr]]
31
+ birth_artifact_id: Optional[str]
32
+ size: Optional[int]
33
+ extra: Dict
34
+ local_path: Optional[str]
35
+
36
+ _parent_artifact: Optional["Artifact"] = None
37
+ _download_url: Optional[str] = None
38
+
39
+ def __init__(
40
+ self,
41
+ path: StrPath,
42
+ digest: Union[B64MD5, URIStr, FilePathStr, ETag],
43
+ ref: Optional[Union[FilePathStr, URIStr]] = None,
44
+ birth_artifact_id: Optional[str] = None,
45
+ size: Optional[int] = None,
46
+ extra: Optional[Dict] = None,
47
+ local_path: Optional[StrPath] = None,
48
+ ) -> None:
49
+ self.path = LogicalPath(path)
50
+ self.digest = digest
51
+ self.ref = ref
52
+ self.birth_artifact_id = birth_artifact_id
53
+ self.size = size
54
+ self.extra = extra or {}
55
+ self.local_path = str(local_path) if local_path else None
56
+ if self.local_path and self.size is None:
57
+ self.size = Path(self.local_path).stat().st_size
58
+
59
+ @property
60
+ def name(self) -> LogicalPath:
61
+ # TODO(hugh): add telemetry to see if anyone is still using this.
62
+ termwarn("ArtifactManifestEntry.name is deprecated, use .path instead")
63
+ return self.path
64
+
65
+ def parent_artifact(self) -> "Artifact":
66
+ """Get the artifact to which this artifact entry belongs.
67
+
68
+ Returns:
69
+ (PublicArtifact): The parent artifact
70
+ """
71
+ if self._parent_artifact is None:
72
+ raise NotImplementedError
73
+ return self._parent_artifact
74
+
75
+ def download(self, root: Optional[str] = None) -> FilePathStr:
76
+ """Download this artifact entry to the specified root path.
77
+
78
+ Arguments:
79
+ root: (str, optional) The root path in which to download this
80
+ artifact entry. Defaults to the artifact's root.
81
+
82
+ Returns:
83
+ (str): The path of the downloaded artifact entry.
84
+ """
85
+ if self._parent_artifact is None:
86
+ raise NotImplementedError
87
+
88
+ root = root or self._parent_artifact._default_root()
89
+ self._parent_artifact._add_download_root(root)
90
+ dest_path = os.path.join(root, self.path)
91
+
92
+ # Skip checking the cache (and possibly downloading) if the file already exists
93
+ # and has the digest we're expecting.
94
+ if os.path.exists(dest_path) and self.digest == md5_file_b64(dest_path):
95
+ return FilePathStr(dest_path)
96
+
97
+ if self.ref is not None:
98
+ cache_path = self._parent_artifact.manifest.storage_policy.load_reference(
99
+ self, local=True
100
+ )
101
+ else:
102
+ cache_path = self._parent_artifact.manifest.storage_policy.load_file(
103
+ self._parent_artifact, self
104
+ )
105
+ return FilePathStr(
106
+ str(filesystem.copy_or_overwrite_changed(cache_path, dest_path))
107
+ )
108
+
109
+ def ref_target(self) -> Union[FilePathStr, URIStr]:
110
+ """Get the reference URL that is targeted by this artifact entry.
111
+
112
+ Returns:
113
+ (str): The reference URL of this artifact entry.
114
+
115
+ Raises:
116
+ ValueError: If this artifact entry was not a reference.
117
+ """
118
+ if self.ref is None:
119
+ raise ValueError("Only reference entries support ref_target().")
120
+ if self._parent_artifact is None:
121
+ return self.ref
122
+ return self._parent_artifact.manifest.storage_policy.load_reference(
123
+ self._parent_artifact.manifest.entries[self.path], local=False
124
+ )
125
+
126
+ def ref_url(self) -> str:
127
+ """Get a URL to this artifact entry.
128
+
129
+ These URLs can be referenced by another artifact.
130
+
131
+ Returns:
132
+ (str): A URL representing this artifact entry.
133
+
134
+ Examples:
135
+ Basic usage
136
+ ```
137
+ ref_url = source_artifact.get_path('file.txt').ref_url()
138
+ derived_artifact.add_reference(ref_url)
139
+ ```
140
+ """
141
+ if self._parent_artifact is None:
142
+ raise NotImplementedError
143
+ assert self._parent_artifact.id is not None
144
+ return (
145
+ "wandb-artifact://"
146
+ + b64_to_hex_id(B64MD5(self._parent_artifact.id))
147
+ + "/"
148
+ + self.path
149
+ )
150
+
151
+ def _is_artifact_reference(self) -> bool:
152
+ return self.ref is not None and urlparse(self.ref).scheme == "wandb-artifact"
153
+
154
+ def _get_referenced_artifact(self, client: "RetryingClient") -> "Artifact":
155
+ artifact: "Artifact" = wandb.Artifact._from_id(
156
+ hex_to_b64_id(util.host_from_path(self.ref)), client
157
+ )
158
+ assert artifact is not None
159
+ return artifact
File without changes
@@ -0,0 +1,91 @@
1
+ """Artifact manifest v1."""
2
+ from typing import Any, Dict, Mapping, Optional
3
+
4
+ from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
5
+ from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
6
+ from wandb.sdk.artifacts.storage_policies.wandb_storage_policy import WandbStoragePolicy
7
+ from wandb.sdk.artifacts.storage_policy import StoragePolicy
8
+ from wandb.sdk.lib.hashutil import HexMD5, _md5
9
+
10
+
11
+ class ArtifactManifestV1(ArtifactManifest):
12
+ @classmethod
13
+ def version(cls) -> int:
14
+ return 1
15
+
16
+ @classmethod
17
+ def from_manifest_json(cls, manifest_json: Dict) -> "ArtifactManifestV1":
18
+ if manifest_json["version"] != cls.version():
19
+ raise ValueError(
20
+ "Expected manifest version 1, got %s" % manifest_json["version"]
21
+ )
22
+
23
+ storage_policy_name = manifest_json["storagePolicy"]
24
+ storage_policy_config = manifest_json.get("storagePolicyConfig", {})
25
+ storage_policy_cls = StoragePolicy.lookup_by_name(storage_policy_name)
26
+ if storage_policy_cls is None:
27
+ raise ValueError('Failed to find storage policy "%s"' % storage_policy_name)
28
+ if not issubclass(storage_policy_cls, WandbStoragePolicy):
29
+ raise ValueError(
30
+ "No handler found for storage handler of type '%s'"
31
+ % storage_policy_name
32
+ )
33
+
34
+ entries: Mapping[str, ArtifactManifestEntry]
35
+ entries = {
36
+ name: ArtifactManifestEntry(
37
+ path=name,
38
+ digest=val["digest"],
39
+ birth_artifact_id=val.get("birthArtifactID"),
40
+ ref=val.get("ref"),
41
+ size=val.get("size"),
42
+ extra=val.get("extra"),
43
+ local_path=val.get("local_path"),
44
+ )
45
+ for name, val in manifest_json["contents"].items()
46
+ }
47
+
48
+ return cls(storage_policy_cls.from_config(storage_policy_config), entries)
49
+
50
+ def __init__(
51
+ self,
52
+ storage_policy: "WandbStoragePolicy",
53
+ entries: Optional[Mapping[str, ArtifactManifestEntry]] = None,
54
+ ) -> None:
55
+ super().__init__(storage_policy, entries=entries)
56
+
57
+ def to_manifest_json(self) -> Dict:
58
+ """This is the JSON that's stored in wandb_manifest.json.
59
+
60
+ If include_local is True we also include the local paths to files. This is
61
+ used to represent an artifact that's waiting to be saved on the current
62
+ system. We don't need to include the local paths in the artifact manifest
63
+ contents.
64
+ """
65
+ contents = {}
66
+ for entry in sorted(self.entries.values(), key=lambda k: k.path):
67
+ json_entry: Dict[str, Any] = {
68
+ "digest": entry.digest,
69
+ }
70
+ if entry.birth_artifact_id:
71
+ json_entry["birthArtifactID"] = entry.birth_artifact_id
72
+ if entry.ref:
73
+ json_entry["ref"] = entry.ref
74
+ if entry.extra:
75
+ json_entry["extra"] = entry.extra
76
+ if entry.size is not None:
77
+ json_entry["size"] = entry.size
78
+ contents[entry.path] = json_entry
79
+ return {
80
+ "version": self.__class__.version(),
81
+ "storagePolicy": self.storage_policy.name(),
82
+ "storagePolicyConfig": self.storage_policy.config() or {},
83
+ "contents": contents,
84
+ }
85
+
86
+ def digest(self) -> HexMD5:
87
+ hasher = _md5()
88
+ hasher.update(b"wandb-artifact-manifest-v1\n")
89
+ for name, entry in sorted(self.entries.items(), key=lambda kv: kv[0]):
90
+ hasher.update(f"{name}:{entry.digest}\n".encode())
91
+ return HexMD5(hasher.hexdigest())
@@ -1,3 +1,4 @@
1
+ """Artifact saver."""
1
2
  import concurrent.futures
2
3
  import json
3
4
  import os
@@ -8,17 +9,17 @@ from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Sequence
8
9
  import wandb
9
10
  import wandb.filesync.step_prepare
10
11
  from wandb import env, util
11
- from wandb.sdk.interface.artifacts import ArtifactManifest, ArtifactManifestEntry
12
+ from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
12
13
  from wandb.sdk.lib.filesystem import mkdir_exists_ok
13
14
  from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, md5_file_b64
14
15
  from wandb.sdk.lib.paths import FilePathStr, URIStr
15
16
 
16
17
  if TYPE_CHECKING:
18
+ from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
19
+ from wandb.sdk.internal.file_pusher import FilePusher
17
20
  from wandb.sdk.internal.internal_api import Api as InternalApi
18
21
  from wandb.sdk.internal.progress import ProgressFn
19
22
 
20
- from .file_pusher import FilePusher
21
-
22
23
  if sys.version_info >= (3, 8):
23
24
  from typing import Protocol
24
25
  else:
@@ -26,13 +27,13 @@ if TYPE_CHECKING:
26
27
 
27
28
  class SaveFn(Protocol):
28
29
  def __call__(
29
- self, entry: ArtifactManifestEntry, progress_callback: "ProgressFn"
30
+ self, entry: "ArtifactManifestEntry", progress_callback: "ProgressFn"
30
31
  ) -> bool:
31
32
  pass
32
33
 
33
34
  class SaveFnAsync(Protocol):
34
35
  def __call__(
35
- self, entry: ArtifactManifestEntry, progress_callback: "ProgressFn"
36
+ self, entry: "ArtifactManifestEntry", progress_callback: "ProgressFn"
36
37
  ) -> Awaitable[bool]:
37
38
  pass
38
39
 
@@ -0,0 +1,10 @@
1
+ """Artifact state."""
2
+ from enum import Enum
3
+
4
+
5
+ class ArtifactState(Enum):
6
+ PENDING = "PENDING"
7
+ COMMITTED = "COMMITTED"
8
+ DELETED = "DELETED"
9
+ GARBAGE_COLLECTED = "GARBAGE_COLLECTED"
10
+ PENDING_DELETION = "PENDING_DELETION"
@@ -1,11 +1,13 @@
1
+ """Artifact cache."""
1
2
  import contextlib
2
3
  import hashlib
3
4
  import os
4
5
  import secrets
5
6
  from typing import IO, TYPE_CHECKING, ContextManager, Dict, Generator, Optional, Tuple
6
7
 
7
- from wandb import env, util
8
- from wandb.sdk.interface.artifacts import Artifact, ArtifactNotLoggedError
8
+ from wandb import env, termwarn, util
9
+ from wandb.sdk.artifacts.exceptions import ArtifactNotLoggedError
10
+ from wandb.sdk.lib.capped_dict import CappedDict
9
11
  from wandb.sdk.lib.filesystem import mkdir_exists_ok
10
12
  from wandb.sdk.lib.hashutil import B64MD5, ETag, b64_to_hex_id
11
13
  from wandb.sdk.lib.paths import FilePathStr, StrPath, URIStr
@@ -13,7 +15,7 @@ from wandb.sdk.lib.paths import FilePathStr, StrPath, URIStr
13
15
  if TYPE_CHECKING:
14
16
  import sys
15
17
 
16
- from wandb.sdk import wandb_artifacts
18
+ from wandb.sdk.artifacts.artifact import Artifact
17
19
 
18
20
  if sys.version_info >= (3, 8):
19
21
  from typing import Protocol
@@ -33,8 +35,8 @@ class ArtifactsCache:
33
35
  mkdir_exists_ok(self._cache_dir)
34
36
  self._md5_obj_dir = os.path.join(self._cache_dir, "obj", "md5")
35
37
  self._etag_obj_dir = os.path.join(self._cache_dir, "obj", "etag")
36
- self._artifacts_by_id: Dict[str, Artifact] = {}
37
- self._artifacts_by_client_id: Dict[str, "wandb_artifacts.Artifact"] = {}
38
+ self._artifacts_by_id: Dict[str, "Artifact"] = CappedDict()
39
+ self._artifacts_by_client_id: Dict[str, "Artifact"] = CappedDict()
38
40
 
39
41
  def check_md5_obj_path(
40
42
  self, b64_md5: B64MD5, size: int
@@ -74,18 +76,17 @@ class ArtifactsCache:
74
76
  raise ArtifactNotLoggedError(artifact, "store_artifact")
75
77
  self._artifacts_by_id[artifact.id] = artifact
76
78
 
77
- def get_client_artifact(
78
- self, client_id: str
79
- ) -> Optional["wandb_artifacts.Artifact"]:
79
+ def get_client_artifact(self, client_id: str) -> Optional["Artifact"]:
80
80
  return self._artifacts_by_client_id.get(client_id)
81
81
 
82
- def store_client_artifact(self, artifact: "wandb_artifacts.Artifact") -> None:
82
+ def store_client_artifact(self, artifact: "Artifact") -> None:
83
83
  self._artifacts_by_client_id[artifact._client_id] = artifact
84
84
 
85
- def cleanup(self, target_size: int) -> int:
85
+ def cleanup(self, target_size: int, remove_temp: bool = False) -> int:
86
86
  bytes_reclaimed = 0
87
87
  paths = {}
88
88
  total_size = 0
89
+ temp_size = 0
89
90
  for root, _, files in os.walk(self._cache_dir):
90
91
  for file in files:
91
92
  try:
@@ -93,14 +94,23 @@ class ArtifactsCache:
93
94
  stat = os.stat(path)
94
95
 
95
96
  if file.startswith(ArtifactsCache._TMP_PREFIX):
96
- os.remove(path)
97
- bytes_reclaimed += stat.st_size
97
+ if remove_temp:
98
+ os.remove(path)
99
+ bytes_reclaimed += stat.st_size
100
+ else:
101
+ temp_size += stat.st_size
98
102
  continue
99
103
  except OSError:
100
104
  continue
101
105
  paths[path] = stat
102
106
  total_size += stat.st_size
103
107
 
108
+ if temp_size:
109
+ termwarn(
110
+ f"Cache contains {util.to_human_size(temp_size)} of temporary files. "
111
+ "Run `wandb artifact cleanup --remove-temp` to remove them."
112
+ )
113
+
104
114
  sorted_paths = sorted(paths.items(), key=lambda x: x[1].st_atime)
105
115
  for path, stat in sorted_paths:
106
116
  if total_size < target_size:
@@ -0,0 +1,55 @@
1
+ """Artifact exceptions."""
2
+ from typing import TYPE_CHECKING, Optional
3
+
4
+ from wandb import errors
5
+
6
+ if TYPE_CHECKING:
7
+ from wandb.sdk.artifacts.artifact import Artifact
8
+
9
+
10
+ class ArtifactStatusError(AttributeError):
11
+ """Raised when an artifact is in an invalid state for the requested operation."""
12
+
13
+ def __init__(
14
+ self,
15
+ artifact: Optional["Artifact"] = None,
16
+ attr: Optional[str] = None,
17
+ msg: str = "Artifact is in an invalid state for the requested operation.",
18
+ ):
19
+ object_name = artifact.__class__.__name__ if artifact else "Artifact"
20
+ method_id = f"{object_name}.{attr}" if attr else object_name
21
+ super().__init__(msg.format(artifact=artifact, attr=attr, method_id=method_id))
22
+ # Follow the same pattern as AttributeError.
23
+ self.obj = artifact
24
+ self.name = attr or ""
25
+
26
+
27
+ class ArtifactNotLoggedError(ArtifactStatusError):
28
+ """Raised for Artifact methods or attributes only available after logging."""
29
+
30
+ def __init__(
31
+ self, artifact: Optional["Artifact"] = None, attr: Optional[str] = None
32
+ ):
33
+ super().__init__(
34
+ artifact,
35
+ attr,
36
+ "'{method_id}' used prior to logging artifact or while in offline mode. "
37
+ "Call wait() before accessing logged artifact properties.",
38
+ )
39
+
40
+
41
+ class ArtifactFinalizedError(ArtifactStatusError):
42
+ """Raised for Artifact methods or attributes that can't be changed after logging."""
43
+
44
+ def __init__(
45
+ self, artifact: Optional["Artifact"] = None, attr: Optional[str] = None
46
+ ):
47
+ super().__init__(
48
+ artifact,
49
+ attr,
50
+ "'{method_id}' used on logged artifact. Can't modify finalized artifact.",
51
+ )
52
+
53
+
54
+ class WaitTimeoutError(errors.Error):
55
+ """Raised when wait() timeout occurs before process is finished."""
@@ -0,0 +1,59 @@
1
+ """Storage handler."""
2
+ from typing import TYPE_CHECKING, Optional, Sequence, Union
3
+
4
+ from wandb.sdk.lib.paths import FilePathStr, URIStr
5
+
6
+ if TYPE_CHECKING:
7
+ from urllib.parse import ParseResult
8
+
9
+ from wandb.sdk.artifacts.artifact import Artifact
10
+ from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
11
+
12
+ DEFAULT_MAX_OBJECTS = 10000
13
+
14
+
15
+ class StorageHandler:
16
+ def can_handle(self, parsed_url: "ParseResult") -> bool:
17
+ """Checks whether this handler can handle the given url.
18
+
19
+ Returns:
20
+ Whether this handler can handle the given url.
21
+ """
22
+ raise NotImplementedError
23
+
24
+ def load_path(
25
+ self,
26
+ manifest_entry: "ArtifactManifestEntry",
27
+ local: bool = False,
28
+ ) -> Union[URIStr, FilePathStr]:
29
+ """Load a file or directory given the corresponding index entry.
30
+
31
+ Args:
32
+ manifest_entry: The index entry to load
33
+ local: Whether to load the file locally or not
34
+
35
+ Returns:
36
+ A path to the file represented by `index_entry`
37
+ """
38
+ raise NotImplementedError
39
+
40
+ def store_path(
41
+ self,
42
+ artifact: "Artifact",
43
+ path: Union[URIStr, FilePathStr],
44
+ name: Optional[str] = None,
45
+ checksum: bool = True,
46
+ max_objects: Optional[int] = None,
47
+ ) -> Sequence["ArtifactManifestEntry"]:
48
+ """Store the file or directory at the given path to the specified artifact.
49
+
50
+ Args:
51
+ path: The path to store
52
+ name: If specified, the logical name that should map to `path`
53
+ checksum: Whether to compute the checksum of the file
54
+ max_objects: The maximum number of objects to store
55
+
56
+ Returns:
57
+ A list of manifest entries to store within the artifact
58
+ """
59
+ raise NotImplementedError
File without changes