tgzr.snap 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tgzr/snap/__init__.py ADDED
@@ -0,0 +1 @@
1
+
tgzr/snap/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.0.1'
32
+ __version_tuple__ = version_tuple = (0, 0, 1)
33
+
34
+ __commit_id__ = commit_id = None
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+ from typing import TypeVar, Generic, get_args, Any
3
+
4
+ from abc import ABC, abstractmethod
5
+ from pathlib import Path
6
+ import dataclasses
7
+
8
+
9
+ @dataclasses.dataclass
10
+ class VersionInfo:
11
+ name: str
12
+ isdir: bool = False
13
+ size: int | None = None
14
+ nfiles: int | None = None
15
+ isexec: bool = False
16
+ version_id: str | None = None
17
+ md5: str | None = None
18
+ mtime: float | None = None
19
+
20
+ remote_name: str | None = None
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class Remote:
25
+ name: str
26
+ url: str
27
+
28
+
29
+ @dataclasses.dataclass
30
+ class RepositoryConfig:
31
+ remotes: list[Remote]
32
+ default_remote: str
33
+
34
+ @classmethod
35
+ def from_dict(cls, d: dict[str, Any]):
36
+ return cls(**d)
37
+
38
+
39
+ RepositoryConfigType = TypeVar("RepositoryConfigType", bound=RepositoryConfig)
40
+
41
+
42
+ class SnapRepository(ABC, Generic[RepositoryConfigType]):
43
+ @classmethod
44
+ @abstractmethod
45
+ def create_repository(
46
+ cls, path: str | Path, config: RepositoryConfig, force: bool = False
47
+ ):
48
+ """
49
+ Initialize a new repository a that path with this config.
50
+ """
51
+ pass
52
+
53
+ @classmethod
54
+ def get_config_type(cls) -> RepositoryConfigType:
55
+ return get_args(cls.__orig_bases__[0])[0] # type: ignore __orig_bases__ trust me bro.
56
+
57
+ def __init__(self, path: str | Path, config: RepositoryConfigType):
58
+ self._path = Path(path)
59
+ self._config = config
60
+
61
+ @property
62
+ def config(self) -> RepositoryConfigType:
63
+ return self._config
64
+
65
+ @property
66
+ def path(self) -> Path:
67
+ return self._path
68
+
69
+ #
70
+ #
71
+ #
72
+
73
+ @abstractmethod
74
+ def default_remote_name(self) -> str:
75
+ pass
76
+
77
+ @abstractmethod
78
+ def remotes(self) -> list[Remote]:
79
+ pass
80
+
81
+ @abstractmethod
82
+ def get_remote(self, remote_name: str | None) -> Remote:
83
+ """
84
+ Raise KeyError if no such remote is declared.
85
+ """
86
+ pass
87
+
88
+ #
89
+ #
90
+ #
91
+
92
+ @abstractmethod
93
+ def push(self, repo_path: str, remote_name: str | None = None) -> VersionInfo:
94
+ """Uploads the file at `self.path/repo_path` and returns the VersionInfo for the created version."""
95
+ pass
96
+
97
+ @abstractmethod
98
+ def get_versions(self, repo_path: str):
99
+ """Returns a list of VersionInfo for repo_path."""
100
+ pass
101
+
102
+ @abstractmethod
103
+ def status(self, repo_path: str):
104
+ """Returns the status of the file/folder at repo_path."""
105
+ pass
106
+
107
+ @abstractmethod
108
+ def pull(self, repo_path: str | Path, version: str, remote_name: str | None = None):
109
+ """Downloads this version of `repo_path`."""
110
+ pass
111
+
112
+ @abstractmethod
113
+ def unprotect(self, repo_path: str):
114
+ """Make the file writable."""
115
+ pass
tgzr/snap/plugin.py ADDED
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+ from typing import Type
3
+
4
+ from tgzr.package_management.plugin_manager import Plugin, PluginManager
5
+
6
+ from .base_repository import SnapRepository
7
+
8
+
9
+ class SnapPlugin(Plugin):
10
+ @classmethod
11
+ def plugin_type_name(cls) -> str:
12
+ return "SnapRepository"
13
+
14
+ def get_repository_type(self) -> Type[SnapRepository]:
15
+ raise NotImplementedError()
16
+
17
+
18
+ class SnapPluginManager(PluginManager[SnapPlugin]):
19
+ EP_GROUP = "tgzr.snap.plugin"
20
+
21
+
22
+ def test_plugins():
23
+ # NB: we can't use the manager from here or the SnapPlugin qualified name
24
+ # is '__main__.SnapPluginb' instead of 'tgzr.snap.plugin.SnapPlugin'.
25
+ # so:
26
+ from tgzr.snap.plugin import SnapPluginManager
27
+
28
+ pm = SnapPluginManager()
29
+ print("Snap Plugins:")
30
+ for plugin in pm.get_plugins():
31
+ snap_type = plugin.get_repository_type()
32
+ print(" ", snap_type, snap_type.get_config_type())
33
+
34
+
35
+ if __name__ == "__main__":
36
+ test_plugins()
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+ from typing import Type
3
+
4
+ from .dvc import DVCSnap
5
+ from .folder import FolderSnap
6
+
7
+ from ..plugin import SnapPlugin, SnapRepository
8
+
9
+
10
+ class DVCSnapPlugin(SnapPlugin):
11
+
12
+ def get_repository_type(self) -> Type[SnapRepository]:
13
+ return DVCSnap
14
+
15
+
16
+ class FolderSnapPlugin(SnapPlugin):
17
+
18
+ def get_repository_type(self) -> Type[SnapRepository]:
19
+ return FolderSnap
@@ -0,0 +1,363 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+ import dataclasses
7
+
8
+ import rich
9
+ import json
10
+
11
+ from dvc.repo import Repo
12
+ from dvc_data.hashfile.hash_info import HashInfo
13
+ from dvc_data.hashfile.meta import Meta
14
+ from dvc_data.hashfile.build import build
15
+ from dvc_data.hashfile.tree import Tree
16
+
17
+ from dvc_data.hashfile.build import _build_tree
18
+ from dvc_data.hashfile.db import add_update_tree
19
+
20
+ from dvc_data.hashfile.hash import hash_file
21
+ from dvc_data.hashfile.db import HashFileDB
22
+ from dvc_data.hashfile.transfer import transfer
23
+ from dvc_data.hashfile.obj import HashFile
24
+ from dvc_data.hashfile.checkout import checkout
25
+
26
+
27
+ from dvc.stage import Stage
28
+ from dvc.output import Output
29
+
30
+ from ..base_repository import SnapRepository, RepositoryConfig, VersionInfo, Remote
31
+
32
+
33
+ @dataclasses.dataclass
34
+ class DVCRepositoryConfig(RepositoryConfig):
35
+ cache_dir: str | Path
36
+
37
+
38
+ class DVCSnap(SnapRepository[DVCRepositoryConfig]):
39
+
40
+ @classmethod
41
+ def create_repository(
42
+ cls, path: str | Path, config: DVCRepositoryConfig, force: bool = False
43
+ ) -> DVCSnap:
44
+ """
45
+ Create the dvc repository on disk and return a DVCSnap connected to it.
46
+ The remotes defined in config will be configured in the dvc repository.
47
+ """
48
+ repo = Repo.init(str(path), no_scm=True, force=force)
49
+ level = "local"
50
+ with repo.config.edit(level=level) as repo_config:
51
+ repo_config["core"]["analytics"] = False
52
+ repo_config["core"]["hardlink_lock"] = False
53
+ repo_config["core"]["check_update"] = False
54
+
55
+ repo_config["cache"]["dir"] = str(config.cache_dir)
56
+ repo_config["cache"]["type"] = "reflink,hardlink,copy"
57
+ repo_config["cache"]["protected"] = True
58
+
59
+ if "remote" not in repo_config:
60
+ repo_config["remote"] = {}
61
+
62
+ remote_names = []
63
+ for remote in config.remotes:
64
+ repo_config["remote"][remote.name] = {"url": remote.url}
65
+ remote_names.append(remote.name)
66
+
67
+ if config.default_remote:
68
+ if not config.default_remote in remote_names:
69
+ raise ValueError(
70
+ f"Cannot use remote {config.default_remote} as defaut: no such remote in config (got {remote_names})"
71
+ )
72
+ repo_config["core"]["remote"] = config.default_remote
73
+
74
+ # rich.print(repo.config)
75
+
76
+ dvc_snap = cls(path, config)
77
+ return dvc_snap
78
+
79
+ def __init__(self, path: str | Path, config: DVCRepositoryConfig):
80
+ super().__init__(path, config)
81
+
82
+ self._dvc_repo = Repo(str(self.path))
83
+
84
+ #
85
+ #
86
+ #
87
+
88
+ def default_remote_name(self) -> str:
89
+ rich.print(self._dvc_repo.config)
90
+ return self._dvc_repo.config.get("core", {}).get("remote")
91
+
92
+ def remotes(self) -> list[Remote]:
93
+ remotes_config = self._dvc_repo.config.get("remote", {})
94
+
95
+ return [
96
+ Remote(name, options.get("url", None))
97
+ for name, options in remotes_config.items()
98
+ ]
99
+
100
+ def get_remote(self, remote_name: str | None) -> Remote:
101
+ remote_name = remote_name or self.default_remote_name()
102
+ r = self._dvc_repo.config["remote"][remote_name]
103
+ return Remote(name=remote_name, url=r["url"])
104
+
105
+ #
106
+ #
107
+ #
108
+
109
+ def push(
110
+ self, repo_path: str | Path, remote_name: str | None = None
111
+ ) -> VersionInfo:
112
+ path = str(self.path / repo_path)
113
+
114
+ remote = self.get_remote(remote_name)
115
+ local_odb = self._dvc_repo.cache.local
116
+ remote_odb = self._dvc_repo.cloud.get_remote_odb(remote.name)
117
+
118
+ _, meta, tree_or_hashfile_obj = build(local_odb, path, local_odb.fs, "md5")
119
+
120
+ if tree_or_hashfile_obj.hash_info.isdir:
121
+ for entry_path, entry_meta, entry_hi in tree_or_hashfile_obj:
122
+ if not local_odb.exists(entry_hi.value):
123
+ # Resolve the full path for the child file
124
+ full_entry_path = local_odb.fs.join(path, *entry_path)
125
+ local_odb.add(full_entry_path, local_odb.fs, entry_hi.value)
126
+ add_update_tree(local_odb, tree_or_hashfile_obj)
127
+
128
+ # it is a Tree object (Pushing a folder)
129
+ to_push = {tree_or_hashfile_obj.hash_info}
130
+ for _, _, child_hi in tree_or_hashfile_obj:
131
+ to_push.add(child_hi)
132
+ else:
133
+ local_odb.add(path, local_odb.fs, tree_or_hashfile_obj.hash_info.value)
134
+ to_push = {tree_or_hashfile_obj.hash_info}
135
+
136
+ transfer(
137
+ local_odb,
138
+ remote_odb,
139
+ to_push,
140
+ jobs=None,
141
+ )
142
+ print("--->", tree_or_hashfile_obj.hash_info, meta)
143
+ assert (
144
+ tree_or_hashfile_obj.hash_info.value is not None
145
+ ) # should alway be true, makes mypy happy.
146
+
147
+ vi = VersionInfo(
148
+ name=tree_or_hashfile_obj.hash_info.value,
149
+ isdir=meta.isdir,
150
+ size=meta.size,
151
+ nfiles=meta.nfiles,
152
+ isexec=meta.isexec,
153
+ version_id=meta.version_id,
154
+ md5=meta.md5,
155
+ mtime=meta.mtime,
156
+ remote_name=remote_name or self.default_remote_name(),
157
+ )
158
+ return vi
159
+
160
+ def get_versions(self, repo_path: str):
161
+ path = self.path / repo_path
162
+ raise NotImplementedError()
163
+
164
+ def status(self, repo_path: str):
165
+ path = self.path / repo_path
166
+ raise NotImplementedError()
167
+
168
+ def pull(self, repo_path: str | Path, version: str, remote_name: str | None = None):
169
+ path = str(self.path / repo_path)
170
+
171
+ local_odb = self._dvc_repo.cache.local
172
+ remote_odb = self._dvc_repo.cloud.get_remote_odb(remote_name)
173
+
174
+ hash_info = HashInfo("md5", version)
175
+
176
+ if not local_odb.exists(hash_info.value):
177
+ transfer(remote_odb, local_odb, {hash_info})
178
+
179
+ obj = local_odb.get(hash_info.value)
180
+
181
+ if hash_info.isdir:
182
+ # tree_obj = Tree.from_dict(obj.hash_info, obj.fs, obj.path)
183
+ tree_obj = Tree.load(local_odb, obj.hash_info)
184
+ to_pull = {hash_info}
185
+ for _, _, child_hi in tree_obj:
186
+ to_pull.add(child_hi)
187
+ transfer(remote_odb, local_odb, to_pull)
188
+ checkout_obj = tree_obj
189
+ else:
190
+ checkout_obj = local_odb.get(hash_info.value)
191
+
192
+ checkout(
193
+ path,
194
+ local_odb.fs,
195
+ checkout_obj,
196
+ local_odb,
197
+ force=True,
198
+ state=self._dvc_repo.state,
199
+ )
200
+
201
+ if self._dvc_repo.state:
202
+ self._dvc_repo.state.save(path, local_odb.fs, hash_info)
203
+
204
+ def unprotect(self, repo_path: str):
205
+ path = self.path / repo_path
206
+ stage = Stage(self._dvc_repo)
207
+ out = Output(stage, str(path))
208
+ out.unprotect()
209
+
210
+
211
+ #
212
+ #
213
+ #
214
+
215
+
216
+ def test_config() -> DVCRepositoryConfig:
217
+ config = DVCRepositoryConfig(
218
+ remotes=[
219
+ Remote("blessed", "/tmp/snap_tests_dvc/PROD"),
220
+ Remote("review", "/tmp/snap_tests_dvc/REVIEW"),
221
+ ],
222
+ default_remote="review",
223
+ cache_dir="/tmp/snap_tests_dvc/CACHE",
224
+ )
225
+ return config
226
+
227
+
228
+ def create_repo():
229
+ """
230
+ mkdir PROD_REPO
231
+
232
+ mkdir my_data_repo && cd my_data_repo
233
+ dvc init --no-scm
234
+ dvc config cache.type "reflink,hardlink,copy"
235
+ dvc config cache.protected true
236
+ dvc remote add -d PROD ../PROD_REPO
237
+
238
+ echo "content" > bob.txt
239
+ """
240
+ config = test_config()
241
+ path = Path("/tmp/snap_tests_dvc/WORK")
242
+ force = True
243
+ DVCSnap.create_repository(path, config, force=force)
244
+
245
+ # snap = DVCSnap(path, config)
246
+ # rich.print(snap._dvc_repo.config)
247
+
248
+
249
+ def test_push():
250
+ import time
251
+
252
+ config = test_config()
253
+ path = Path("/tmp/snap_tests_dvc/WORK")
254
+ snap = DVCSnap(path, config)
255
+ artifact = "bob.txt"
256
+
257
+ snap.unprotect(artifact)
258
+
259
+ with open(snap.path / artifact, "w") as fp:
260
+ fp.write(f"Change... {time.time()}")
261
+
262
+ version = snap.push(artifact, "blessed")
263
+
264
+ print(f"{version=}")
265
+
266
+
267
+ def test_pull():
268
+
269
+ config = test_config()
270
+ path = Path("/tmp/snap_tests_dvc/WORK")
271
+ snap = DVCSnap(path, config)
272
+ artifact = "bob.txt"
273
+
274
+ version = "ccef8ce5e5b64d56cc72ef9e118df82d"
275
+ version = "b1ef6fd0c3d7fb7ebf4769593b9c445b"
276
+ version = "ac6281fd84ac444e59af60f53da81a48"
277
+
278
+ snap.pull(artifact, version)
279
+ with open(path / artifact, "r") as fp:
280
+ print("CONTENT is:", fp.read())
281
+
282
+
283
+ def test_push_iso():
284
+ config = test_config()
285
+ path = Path("/tmp/snap_tests_dvc/WORK")
286
+ snap = DVCSnap(path, config)
287
+ artifact = "big.iso"
288
+
289
+ Mo = 360000
290
+ with open(path / artifact, "w") as fp:
291
+ fp.write(str(os.urandom(100 * Mo)))
292
+
293
+ version_info = snap.push(artifact)
294
+
295
+ rich.print(f"{version_info}")
296
+ print(f" version='{version_info.name}'")
297
+
298
+
299
+ def test_pull_iso():
300
+ import time
301
+
302
+ config = test_config()
303
+ path = Path("/tmp/snap_tests_dvc/WORK")
304
+ snap = DVCSnap(path, config)
305
+ artifact = "big.iso"
306
+
307
+ version = "1b9cb5c1940958f28552c5ce7be1a3bc" # 928K
308
+ version = "8f33876d8bc03223ff4bcb70b772dec7" # 1010K
309
+ version = "174101e0ee8b63f760d5994b4c2e6380" # 99M
310
+
311
+ t = time.time()
312
+ snap.pull(artifact, version)
313
+ print(time.time() - t)
314
+
315
+
316
+ def test_push_dir():
317
+ import random
318
+
319
+ config = test_config()
320
+ path = Path("/tmp/snap_tests_dvc/WORK")
321
+ snap = DVCSnap(path, config)
322
+ artifact = "data_folder"
323
+
324
+ (path / artifact).mkdir(exist_ok=True)
325
+
326
+ for i in range(random.randrange(5, 10)):
327
+ Mo = 360000
328
+ size = random.randrange(1, 20)
329
+ with open(path / artifact / f"file_{i:03}.data", "w") as fp:
330
+ fp.write(str(os.urandom(size * Mo)))
331
+
332
+ version_info = snap.push(artifact)
333
+
334
+ rich.print(f"{version_info}")
335
+ print(f" version='{version_info.name}'")
336
+
337
+
338
+ def test_pull_dir():
339
+
340
+ import time
341
+
342
+ config = test_config()
343
+ path = Path("/tmp/snap_tests_dvc/WORK")
344
+ snap = DVCSnap(path, config)
345
+ artifact = "data_folder"
346
+
347
+ version = "225b0fbdaf1dbeaa3f0dd7ffeecd716f.dir"
348
+ version = "4ca4e81d1533353d2fe7ed1bc801d39f.dir"
349
+ version = "fd0c0e8ab5c798c61778f63063acc462.dir"
350
+
351
+ t = time.time()
352
+ snap.pull(artifact, version)
353
+ print(time.time() - t)
354
+
355
+
356
+ if __name__ == "__main__":
357
+ # create_repo()
358
+ # test_push()
359
+ # test_pull()
360
+ # test_push_iso()
361
+ # test_pull_iso()
362
+ # test_push_dir()
363
+ test_pull_dir()