vocker 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vocker/cli.py +1 -2
- vocker/dedup.py +913 -676
- vocker/dedup_models.py +204 -49
- vocker/repo/io.py +7 -0
- vocker/system.py +64 -34
- vocker/util.py +6 -4
- {vocker-0.1.0.dist-info → vocker-0.3.0.dist-info}/METADATA +5 -6
- {vocker-0.1.0.dist-info → vocker-0.3.0.dist-info}/RECORD +10 -10
- {vocker-0.1.0.dist-info → vocker-0.3.0.dist-info}/WHEEL +0 -0
- {vocker-0.1.0.dist-info → vocker-0.3.0.dist-info}/top_level.txt +0 -0
vocker/dedup_models.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
4
|
import typing as ty
|
|
3
5
|
|
|
6
|
+
import attr
|
|
4
7
|
import sqlalchemy as sa
|
|
5
8
|
from sqlalchemy import orm as sao
|
|
6
9
|
from sqlalchemy.orm import Mapped as M, mapped_column as mc, relationship, DeclarativeBase
|
|
7
10
|
from sqlalchemy_boltons.orm import RelationshipComparator as Rel
|
|
11
|
+
from sqlalchemy_boltons.temporary import CacheTemporaryMetaData as CacheTmp, MetaDataMaker
|
|
8
12
|
|
|
9
13
|
from . import multihash as mh
|
|
10
14
|
from .util_models import now, rel_kw_basic, rel_kw_cascade
|
|
@@ -14,57 +18,78 @@ class BaseDedup(DeclarativeBase):
|
|
|
14
18
|
pass
|
|
15
19
|
|
|
16
20
|
|
|
21
|
+
_make_file_fk = lambda: sa.ForeignKey("dedup_file.id", ondelete="CASCADE")
|
|
22
|
+
_make_obj_fk = lambda: sa.ForeignKey("dedup_obj.id", ondelete="CASCADE")
|
|
23
|
+
_make_pending_fk = lambda: sa.ForeignKey("dedup_pending.id", ondelete="CASCADE")
|
|
24
|
+
|
|
25
|
+
|
|
17
26
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
18
27
|
class DedupConfig:
|
|
19
28
|
__tablename__ = "dedup_config"
|
|
20
29
|
|
|
21
30
|
key: M[str] = mc(primary_key=True)
|
|
22
|
-
value: M[str] = mc(
|
|
31
|
+
value: M[str] = mc()
|
|
23
32
|
|
|
24
33
|
|
|
25
34
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
26
|
-
class
|
|
35
|
+
class Obj:
|
|
36
|
+
__tablename__ = "dedup_obj"
|
|
27
37
|
"""
|
|
28
|
-
Represents
|
|
38
|
+
Represents an idealized deduplicated file. There may be multiple actual filesystem files
|
|
39
|
+
associated to this idealized object.
|
|
29
40
|
|
|
30
|
-
|
|
31
|
-
"""
|
|
41
|
+
When an Obj has no more links, :attr:`orphaned_at` is set to the current time.
|
|
32
42
|
|
|
33
|
-
|
|
43
|
+
When a File has no more links, and there are other Files with the same content, we should delete
|
|
44
|
+
all but one of the Files. This is not implemented yet.
|
|
45
|
+
|
|
46
|
+
Incomplete: Obj exists, has no File with pending_id != None.
|
|
47
|
+
Complete: Obj exists, has at least one File with pending_id == None,
|
|
48
|
+
|
|
49
|
+
Lifecycle:
|
|
50
|
+
1. Incomplete + Used: Newly created object, data not written yet.
|
|
51
|
+
2. Complete + Used: Data written, links created.
|
|
52
|
+
3. Complete + Unused: Data written, no more links exist.
|
|
53
|
+
4. Incomplete + Unused: Data maybe deleted, no more links exist.
|
|
54
|
+
"""
|
|
34
55
|
|
|
35
56
|
id: M[int] = mc(primary_key=True)
|
|
36
|
-
|
|
57
|
+
pending_id: M[int | None] = mc(_make_pending_fk())
|
|
58
|
+
metadata_bytes: M[bytes] = mc("metadata")
|
|
37
59
|
size: M[int] = mc()
|
|
38
|
-
|
|
39
|
-
|
|
60
|
+
created_at: M[int] = mc()
|
|
61
|
+
updated_at: M[int] = mc(server_default=sa.text("-1"), index=True)
|
|
40
62
|
orphaned_at: M[int | None] = mc()
|
|
41
|
-
pending_id: M[int | None] = mc(sa.ForeignKey("dedup_pending.id", ondelete="CASCADE"))
|
|
42
|
-
|
|
43
|
-
links: M[list["Link"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
44
|
-
tags: M[list["Tag"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
45
|
-
hashes: M[list["Hash"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
46
|
-
pending: M["Pending | None"] = relationship(back_populates="files", **rel_kw_basic)
|
|
47
63
|
|
|
48
|
-
|
|
49
|
-
|
|
64
|
+
files: M[list["File"]] = relationship(back_populates="obj", **rel_kw_cascade)
|
|
65
|
+
tags: M[list["Tag"]] = relationship(back_populates="obj", **rel_kw_cascade)
|
|
66
|
+
hashes: M[list["Hash"]] = relationship(back_populates="obj", **rel_kw_cascade)
|
|
67
|
+
pending: M["Pending | None"] = relationship(back_populates="objs", **rel_kw_basic)
|
|
50
68
|
|
|
51
69
|
@property
|
|
52
70
|
def hashes_dict(self):
|
|
53
71
|
return {(h := x.to_digest()).function: h for x in self.hashes}
|
|
54
72
|
|
|
55
73
|
@classmethod
|
|
56
|
-
def
|
|
74
|
+
def q_is_complete(cls):
|
|
75
|
+
F = sao.aliased(File)
|
|
76
|
+
return sa.exists().select_from(F).where(Rel(F.obj) == cls, F.pending_id == None)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def q_is_orphaned(cls):
|
|
80
|
+
F = sao.aliased(File)
|
|
81
|
+
return ~sa.exists().select_from(F).where(Rel(F.obj) == cls, F.link_count != 0)
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def make_sql_update_orphaned(cls, orphaned_at_now):
|
|
57
85
|
"""
|
|
58
86
|
Construct the SQL DML statement which sets :attr:`orphaned_at` according to whether any
|
|
59
87
|
links are left that point to this dedup file.
|
|
60
88
|
"""
|
|
61
|
-
if orphaned_at_now is None:
|
|
62
|
-
orphaned_at_now = now()
|
|
63
|
-
L = sao.aliased(Link)
|
|
64
89
|
return sa.update(cls).values(
|
|
65
90
|
orphaned_at=sa.case(
|
|
66
91
|
# If a Link exists, then it's NULL.
|
|
67
|
-
(
|
|
92
|
+
(~cls.q_is_orphaned(), None),
|
|
68
93
|
# If the orphaned_at file was set in the past, then keep that value.
|
|
69
94
|
(cls.orphaned_at < orphaned_at_now, cls.orphaned_at),
|
|
70
95
|
# Otherwise, set it to the current timestamp.
|
|
@@ -73,7 +98,43 @@ class DedupFile:
|
|
|
73
98
|
)
|
|
74
99
|
|
|
75
100
|
|
|
76
|
-
|
|
101
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
102
|
+
class File:
|
|
103
|
+
"""
|
|
104
|
+
Represents a single deduplicated file regardless of backend (hardlink, symlink, reflink).
|
|
105
|
+
|
|
106
|
+
The file contents may not yet be available if :attr:`pending_id` is not NULL.
|
|
107
|
+
|
|
108
|
+
If :attr:`obj_id` is NULL, then the File has been (probably) deleted and in a future transaction
|
|
109
|
+
the row should also be deleted if the file is confirmed deleted.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
__tablename__ = "dedup_file"
|
|
113
|
+
|
|
114
|
+
# this is used as a speedup when verifying hardlinks
|
|
115
|
+
_cached_file_stat = None
|
|
116
|
+
|
|
117
|
+
id: M[int] = mc(primary_key=True)
|
|
118
|
+
obj_id: M[int | None] = mc(_make_obj_fk(), index=True)
|
|
119
|
+
link_count: M[int] = mc(server_default=sa.text("-1"))
|
|
120
|
+
pending_id: M[int | None] = mc(sa.ForeignKey("dedup_pending.id", ondelete="CASCADE"))
|
|
121
|
+
created_at: M[int | None] = mc()
|
|
122
|
+
|
|
123
|
+
obj: M[list[Obj]] = relationship(back_populates="files", **rel_kw_cascade)
|
|
124
|
+
links: M[list["Link"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
125
|
+
pending: M["Pending | None"] = relationship(back_populates="files", **rel_kw_basic)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
129
|
+
class FileCorruption:
|
|
130
|
+
"""
|
|
131
|
+
Represents information about a corrupted file.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
__tablename__ = "dedup_file_corrupt"
|
|
135
|
+
id: M[int] = mc(sa.ForeignKey("dedup_file.id", ondelete="CASCADE"), primary_key=True)
|
|
136
|
+
exception_name: M[str] = mc()
|
|
137
|
+
exception_string: M[str] = mc()
|
|
77
138
|
|
|
78
139
|
|
|
79
140
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
@@ -83,7 +144,8 @@ class Pending:
|
|
|
83
144
|
id: M[int] = mc(primary_key=True)
|
|
84
145
|
expire_at: M[int] = mc()
|
|
85
146
|
|
|
86
|
-
files: M[list["
|
|
147
|
+
files: M[list["File"]] = relationship(back_populates="pending", **rel_kw_cascade)
|
|
148
|
+
objs: M[list["Obj"]] = relationship(back_populates="pending", **rel_kw_cascade)
|
|
87
149
|
|
|
88
150
|
|
|
89
151
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
@@ -92,44 +154,131 @@ class Link:
|
|
|
92
154
|
|
|
93
155
|
__tablename__ = "dedup_link"
|
|
94
156
|
|
|
95
|
-
|
|
96
|
-
file_id: M[int] = mc(_make_file_fk(), index=True
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
sa.
|
|
111
|
-
)
|
|
157
|
+
path: M[bytes] = mc(primary_key=True) # utf-8 encoded
|
|
158
|
+
file_id: M[int] = mc(_make_file_fk(), index=True)
|
|
159
|
+
mtime: M[int | None] = mc() # whether this field means anything depends on the backend
|
|
160
|
+
created_at: M[int | None] = mc()
|
|
161
|
+
|
|
162
|
+
file: M["File"] = relationship(back_populates="links", **rel_kw_basic)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
_tmp_meta = sa.MetaData()
|
|
166
|
+
tmp_bytes = sa.Table("bytes", _tmp_meta, sa.Column("id", sa.LargeBinary, primary_key=True))
|
|
167
|
+
tmp_ints = sa.Table("ints", _tmp_meta, sa.Column("id", sa.Integer, primary_key=True))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@attr.s(frozen=True)
|
|
171
|
+
class TmpNewFiles:
|
|
172
|
+
files: sa.Table = attr.ib()
|
|
173
|
+
tags: sa.Table = attr.ib()
|
|
174
|
+
hashes: sa.Table = attr.ib()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@attr.s(frozen=True)
|
|
178
|
+
class TmpNewFiles2:
|
|
179
|
+
files: sa.Table = attr.ib()
|
|
180
|
+
links: sa.Table = attr.ib()
|
|
181
|
+
objs: sa.Table = attr.ib()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@attr.s(frozen=True)
|
|
185
|
+
class TmpCheckLinks:
|
|
186
|
+
links: sa.Table = attr.ib()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@attr.s(frozen=True)
|
|
190
|
+
class TmpDeleteExtra:
|
|
191
|
+
files: sa.Table = attr.ib()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _make_temp_new_files(maker: MetaDataMaker):
|
|
195
|
+
# If an Obj has already-available contents in a File, then that File's id is written to
|
|
196
|
+
# content_file_id. Otherwise it is NULL.
|
|
197
|
+
files = maker.table(
|
|
198
|
+
"files",
|
|
199
|
+
sa.Column("id", sa.Integer, primary_key=True, nullable=False),
|
|
200
|
+
sa.Column("link_path", sa.LargeBinary, nullable=True),
|
|
201
|
+
sa.Column("metadata_bytes", sa.LargeBinary),
|
|
202
|
+
sa.Column("insert_obj_if_missing", sa.Boolean),
|
|
203
|
+
# sa.Column("adopt_existing", sa.Boolean, server_default=text("0")),
|
|
204
|
+
sa.Column("obj_id", sa.Integer, nullable=True), # ref: Obj.id
|
|
205
|
+
sa.Column("new_obj_id", sa.Integer, nullable=True), # ref: Obj.id
|
|
206
|
+
)
|
|
207
|
+
tags = maker.table(
|
|
208
|
+
"files_by_tag",
|
|
209
|
+
sa.Column("id", sa.Integer, primary_key=True, index=True), # ref: temp.files.id
|
|
210
|
+
sa.Column("name", sa.LargeBinary, primary_key=True),
|
|
211
|
+
)
|
|
212
|
+
hashes = maker.table(
|
|
213
|
+
"files_by_hash",
|
|
214
|
+
sa.Column("id", sa.Integer, primary_key=True), # ref: temp.files.id
|
|
215
|
+
sa.Column("hash_function", sa.Integer, primary_key=True),
|
|
216
|
+
sa.Column("hash", sa.LargeBinary),
|
|
217
|
+
)
|
|
218
|
+
return TmpNewFiles(files=files, tags=tags, hashes=hashes)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _make_temp_new_files2(maker: MetaDataMaker):
|
|
222
|
+
files = maker.table(
|
|
223
|
+
"files",
|
|
224
|
+
sa.Column("file_id", sa.Integer, primary_key=True, nullable=False),
|
|
225
|
+
sa.Column("obj_id", sa.Integer),
|
|
226
|
+
)
|
|
227
|
+
links = maker.table(
|
|
228
|
+
"links",
|
|
229
|
+
sa.Column("link_path", sa.LargeBinary, primary_key=True, nullable=False),
|
|
230
|
+
sa.Column("file_id", sa.Integer),
|
|
231
|
+
sa.Column("link_count", sa.Integer),
|
|
232
|
+
)
|
|
233
|
+
objs = maker.table(
|
|
234
|
+
"objs",
|
|
235
|
+
sa.Column("obj_id", sa.Integer, primary_key=True, nullable=False),
|
|
236
|
+
sa.Column("size", sa.Integer),
|
|
237
|
+
)
|
|
238
|
+
return TmpNewFiles2(files=files, links=links, objs=objs)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _make_temp_check_lx(maker: MetaDataMaker):
|
|
242
|
+
links = maker.table(
|
|
243
|
+
"links",
|
|
244
|
+
sa.Column("path", sa.LargeBinary, primary_key=True, nullable=False),
|
|
245
|
+
)
|
|
246
|
+
return TmpCheckLinks(links)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _make_temp_delete_extra(maker: MetaDataMaker):
|
|
250
|
+
files = maker.table(
|
|
251
|
+
"files",
|
|
252
|
+
sa.Column("id", sa.Integer, primary_key=True, nullable=False),
|
|
253
|
+
)
|
|
254
|
+
return TmpDeleteExtra(files)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
tmp_new_files: CacheTmp[TmpNewFiles] = CacheTmp(_make_temp_new_files)
|
|
258
|
+
tmp_new_files2: CacheTmp[TmpNewFiles2] = CacheTmp(_make_temp_new_files2)
|
|
259
|
+
tmp_check_links: CacheTmp[TmpCheckLinks] = CacheTmp(_make_temp_check_lx)
|
|
260
|
+
tmp_delete_extra: CacheTmp[TmpDeleteExtra] = CacheTmp(_make_temp_delete_extra)
|
|
112
261
|
|
|
113
262
|
|
|
114
263
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
115
264
|
class Tag:
|
|
116
265
|
__tablename__ = "dedup_tag"
|
|
117
266
|
|
|
118
|
-
|
|
267
|
+
obj_id: M[int] = mc(_make_obj_fk(), primary_key=True)
|
|
119
268
|
name: M[bytes] = mc(primary_key=True, index=True)
|
|
120
269
|
|
|
121
|
-
|
|
270
|
+
obj: M["Obj"] = relationship(back_populates="tags", **rel_kw_basic)
|
|
122
271
|
|
|
123
272
|
|
|
124
273
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
125
274
|
class Hash:
|
|
126
275
|
__tablename__ = "dedup_hashes"
|
|
127
276
|
|
|
128
|
-
|
|
277
|
+
obj_id: M[int] = mc(_make_obj_fk(), primary_key=True)
|
|
129
278
|
hash_function: M[int] = mc(primary_key=True)
|
|
130
279
|
hash: M[bytes] = mc(index=True)
|
|
131
280
|
|
|
132
|
-
|
|
281
|
+
obj: M["Obj"] = relationship(back_populates="hashes", **rel_kw_basic)
|
|
133
282
|
|
|
134
283
|
@classmethod
|
|
135
284
|
def from_digest(cls, digest: mh.Digest, **kw):
|
|
@@ -164,11 +313,17 @@ class _HashCompareByDigest:
|
|
|
164
313
|
return sa.not_(self == other)
|
|
165
314
|
|
|
166
315
|
|
|
316
|
+
sa.Index("ix_dedup_obj_pending_partial", Obj.pending_id, sqlite_where=Obj.pending_id != None)
|
|
317
|
+
sa.Index("ix_dedup_file_pending_partial", File.pending_id, sqlite_where=File.pending_id != None)
|
|
318
|
+
sa.Index("ix_dedup_obj_orphaned_at_partial", Obj.orphaned_at, sqlite_where=Obj.orphaned_at != None)
|
|
319
|
+
sa.Index("ix_dedup_file_link_count", File.obj_id, sa.desc(File.link_count))
|
|
167
320
|
sa.Index(
|
|
168
|
-
"
|
|
321
|
+
"ix_dedup_file_link_count_invalid",
|
|
322
|
+
File.id,
|
|
323
|
+
sqlite_where=File.link_count < sa.literal_column("0"),
|
|
169
324
|
)
|
|
170
325
|
sa.Index(
|
|
171
|
-
"
|
|
172
|
-
|
|
173
|
-
sqlite_where=
|
|
326
|
+
"ix_dedup_obj_created_at_partial",
|
|
327
|
+
Obj.created_at,
|
|
328
|
+
sqlite_where=Obj.created_at < sa.literal_column("0"),
|
|
174
329
|
)
|
vocker/repo/io.py
CHANGED
|
@@ -387,6 +387,10 @@ class RepoTransfer:
|
|
|
387
387
|
|
|
388
388
|
assert_(not is_dir)
|
|
389
389
|
|
|
390
|
+
# The content may already be available locally, let's just try.
|
|
391
|
+
if (fp := self.dedup.open_by_hash(digest)) is not None:
|
|
392
|
+
return fp
|
|
393
|
+
|
|
390
394
|
_open = None if acc is None else (lambda: acc.download_open(path))
|
|
391
395
|
|
|
392
396
|
req = de.DedupLinkRequest(
|
|
@@ -560,6 +564,8 @@ class RepoTransfer:
|
|
|
560
564
|
for archive_info in archive_infos:
|
|
561
565
|
archive_io = ex.enter_context(self.open_compressed(archive_info.archive_path))
|
|
562
566
|
files += (A(archive_io, info) for info in archive_info.files)
|
|
567
|
+
if files and (fs := files[0].info.files):
|
|
568
|
+
logger.info("extracting files...", data_sample_path=str(fs[0].path))
|
|
563
569
|
exporter.provide_files(files)
|
|
564
570
|
|
|
565
571
|
def _group_archive_infos(iterable):
|
|
@@ -583,6 +589,7 @@ class RepoTransfer:
|
|
|
583
589
|
exe.submit(_process, a_info_group)
|
|
584
590
|
for a_info_group in _group_archive_infos(iterable)
|
|
585
591
|
)
|
|
592
|
+
logger.info("done extracting, now finalizing...")
|
|
586
593
|
exporter.end_session()
|
|
587
594
|
|
|
588
595
|
|
vocker/system.py
CHANGED
|
@@ -16,8 +16,9 @@ import typing as ty
|
|
|
16
16
|
|
|
17
17
|
import atomicwrites
|
|
18
18
|
import attr
|
|
19
|
+
import marshmallow as ma
|
|
20
|
+
import marshmallow.fields as maf
|
|
19
21
|
import platformdirs
|
|
20
|
-
import strictyaml as sy
|
|
21
22
|
import structlog
|
|
22
23
|
|
|
23
24
|
from . import dedup as de, multihash as mh, image as im
|
|
@@ -40,10 +41,6 @@ def validate_local_repo_name(name: str) -> None:
|
|
|
40
41
|
raise ValueError(f"invalid repo name: {name!r}")
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
def cget(x, *args):
|
|
44
|
-
return x.value.get(*args)
|
|
45
|
-
|
|
46
|
-
|
|
47
44
|
@attr.s(eq=False, hash=False)
|
|
48
45
|
class RemoteRepository:
|
|
49
46
|
uri: str = attr.ib()
|
|
@@ -73,7 +70,8 @@ class _Remotes(MutableMapping[str, RemoteRepository]):
|
|
|
73
70
|
return self.system._config["remote_repositories"]
|
|
74
71
|
|
|
75
72
|
def __getitem__(self, k):
|
|
76
|
-
d = self._data[k]
|
|
73
|
+
d = self._data[k]
|
|
74
|
+
d.pop("comment", None)
|
|
77
75
|
return RemoteRepository(**d)
|
|
78
76
|
|
|
79
77
|
def __setitem__(self, k, v: RemoteRepository | None):
|
|
@@ -90,21 +88,25 @@ class _Remotes(MutableMapping[str, RemoteRepository]):
|
|
|
90
88
|
self[k] = None
|
|
91
89
|
|
|
92
90
|
def __iter__(self):
|
|
93
|
-
return iter(x
|
|
91
|
+
return iter(x for x in self._data)
|
|
94
92
|
|
|
95
93
|
def __len__(self, k, v):
|
|
96
|
-
return len(self._data
|
|
94
|
+
return len(self._data)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class _SchemaWithComment(ma.Schema):
|
|
98
|
+
class Meta:
|
|
99
|
+
unknown = ma.RAISE
|
|
100
|
+
|
|
101
|
+
comment = maf.Field(allow_none=True, data_key="#", required=False)
|
|
97
102
|
|
|
98
103
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
sy.Str(),
|
|
106
|
-
sy.Any(),
|
|
107
|
-
)
|
|
104
|
+
class SchemaRemoteRepository(_SchemaWithComment):
|
|
105
|
+
uri = maf.String(required=True)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class SchemaConfig(_SchemaWithComment):
|
|
109
|
+
remote_repositories = maf.Dict(maf.String(), maf.Nested(SchemaRemoteRepository, required=False))
|
|
108
110
|
|
|
109
111
|
|
|
110
112
|
class ImageType(enum.Enum):
|
|
@@ -112,18 +114,16 @@ class ImageType(enum.Enum):
|
|
|
112
114
|
|
|
113
115
|
|
|
114
116
|
@attr.s(eq=False, hash=False)
|
|
115
|
-
class
|
|
117
|
+
class JSONFileWithCaching:
|
|
116
118
|
path: Path = attr.ib()
|
|
117
|
-
schema = attr.ib(default=None)
|
|
119
|
+
schema: ma.Schema = attr.ib(default=None)
|
|
118
120
|
_mtime = None
|
|
119
121
|
_document = None
|
|
120
122
|
|
|
121
123
|
@property
|
|
122
124
|
def document(self):
|
|
123
125
|
if (mtime := (p := self.path).stat().st_mtime_ns) != self._mtime:
|
|
124
|
-
self._document = doc =
|
|
125
|
-
p.read_bytes().decode("utf-8"), schema=self.schema, label=str(self.path)
|
|
126
|
-
)
|
|
126
|
+
self._document = doc = self.schema.load(json.loads(p.read_bytes()))
|
|
127
127
|
self._mtime = mtime
|
|
128
128
|
else:
|
|
129
129
|
doc = self._document
|
|
@@ -134,7 +134,7 @@ class StrictYamlFileWithCaching:
|
|
|
134
134
|
with atomicwrites.atomic_write(
|
|
135
135
|
str(self.path), mode="wt", overwrite=True, encoding="utf-8", newline="\n"
|
|
136
136
|
) as fp:
|
|
137
|
-
|
|
137
|
+
json.dump(self.schema.dump(new_value), fp, indent=2)
|
|
138
138
|
self._document = new_value
|
|
139
139
|
|
|
140
140
|
|
|
@@ -261,7 +261,10 @@ class LocalRepository:
|
|
|
261
261
|
yield u
|
|
262
262
|
|
|
263
263
|
# Adopt the files.
|
|
264
|
+
dedup = self.system.repo_dedup
|
|
265
|
+
_plain = de.DedupFileMetadata.make_plain()
|
|
264
266
|
base = self._path_base
|
|
267
|
+
hashes = {}
|
|
265
268
|
reqs_adopt = []
|
|
266
269
|
reqs_copy = []
|
|
267
270
|
for p_rel in u.updated_paths:
|
|
@@ -270,12 +273,20 @@ class LocalRepository:
|
|
|
270
273
|
if p.exists():
|
|
271
274
|
if not p.is_file():
|
|
272
275
|
raise ValueError("only regular files are supported")
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
+
hashes[p_rel] = h = dedup.get_file_hash(hf, p, check_link=True)
|
|
277
|
+
if h is None:
|
|
278
|
+
reqs_adopt.append(
|
|
279
|
+
de.DedupLinkRequest(
|
|
280
|
+
hash_function=hf,
|
|
281
|
+
link_path=p,
|
|
282
|
+
file_metadata=_plain,
|
|
283
|
+
file_contents_hash=None,
|
|
284
|
+
open_file_once=None,
|
|
285
|
+
adopt_existing=True,
|
|
286
|
+
)
|
|
287
|
+
)
|
|
276
288
|
|
|
277
|
-
dedup
|
|
278
|
-
dedup.adopt_files(hf, reqs_adopt)
|
|
289
|
+
dedup.run_batch(reqs_adopt)
|
|
279
290
|
|
|
280
291
|
# Now we gather the hashes for all the files so we can update the manifest nodes. We
|
|
281
292
|
# need to do this after the `adopt_files` above because that operation computes the
|
|
@@ -285,12 +296,24 @@ class LocalRepository:
|
|
|
285
296
|
)
|
|
286
297
|
for p_rel in u.updated_paths:
|
|
287
298
|
if (p := u.workspace_path / p_rel).exists():
|
|
288
|
-
|
|
289
|
-
|
|
299
|
+
if (r := hashes[p_rel]) is None:
|
|
300
|
+
r = dedup.get_file_hash(hf, p, check_link=True)
|
|
301
|
+
assert r is not None, f"expected {p} to be adopted"
|
|
290
302
|
value = False, r[1]
|
|
303
|
+
reqs_copy.append(
|
|
304
|
+
de.DedupLinkRequest(
|
|
305
|
+
hash_function=hf,
|
|
306
|
+
link_path=(dst := base / p_rel),
|
|
307
|
+
file_metadata=_plain,
|
|
308
|
+
file_contents_hash=r[1],
|
|
309
|
+
open_file_once=None,
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
dst.parent.mkdir(exist_ok=True, parents=True)
|
|
291
313
|
else:
|
|
292
314
|
value = None
|
|
293
315
|
dirs[len(p_rel.parts) - 1][p_rel.parent][p_rel.name] = value
|
|
316
|
+
del hashes
|
|
294
317
|
|
|
295
318
|
# Here begins the critical section. If this part fails, the local repository will be broken.
|
|
296
319
|
self._path_ok.unlink(missing_ok=True)
|
|
@@ -418,13 +441,13 @@ class System:
|
|
|
418
441
|
|
|
419
442
|
self.path_repo_local.mkdir(exist_ok=True, parents=True)
|
|
420
443
|
|
|
421
|
-
config_path = self.path_base / "
|
|
422
|
-
cfg =
|
|
444
|
+
config_path = self.path_base / "vocker.json"
|
|
445
|
+
cfg = JSONFileWithCaching(config_path, schema=SchemaConfig())
|
|
423
446
|
try:
|
|
424
447
|
cfg.document
|
|
425
448
|
except FileNotFoundError:
|
|
426
449
|
config_path.parent.mkdir(exist_ok=True, parents=True)
|
|
427
|
-
config_path.write_bytes(b"")
|
|
450
|
+
config_path.write_bytes(b"{}")
|
|
428
451
|
cfg.document
|
|
429
452
|
self._config_file = cfg
|
|
430
453
|
self._init_config()
|
|
@@ -433,10 +456,17 @@ class System:
|
|
|
433
456
|
c = self._config
|
|
434
457
|
modified = False
|
|
435
458
|
|
|
436
|
-
if
|
|
459
|
+
if c.get(k := "remote_repositories") is None:
|
|
437
460
|
c[k] = {}
|
|
438
461
|
modified = True
|
|
439
462
|
|
|
463
|
+
if c.get(k := "comment") is None:
|
|
464
|
+
c[k] = [
|
|
465
|
+
"Since JSON doesn't allow comments, you can place them inside the '#' key inside",
|
|
466
|
+
"most of the dictionaries.",
|
|
467
|
+
]
|
|
468
|
+
modified = True
|
|
469
|
+
|
|
440
470
|
if modified:
|
|
441
471
|
self._config_write(c)
|
|
442
472
|
|
vocker/util.py
CHANGED
|
@@ -47,10 +47,12 @@ def pprofile(options=None):
|
|
|
47
47
|
with runner:
|
|
48
48
|
yield runner
|
|
49
49
|
finally:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
if cg:
|
|
51
|
+
with open(cg, "wt", encoding="utf-8") as file:
|
|
52
|
+
prof.callgrind(file)
|
|
53
|
+
if annotate:
|
|
54
|
+
with open(annotate, "wt", encoding="utf-8") as file:
|
|
55
|
+
prof.annotate(file)
|
|
54
56
|
|
|
55
57
|
|
|
56
58
|
def supports_executable() -> bool:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vocker
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Docker-like manager for virtualenvs
|
|
5
5
|
Author-email: Eduard Christian Dumitrescu <eduard.c.dumitrescu@gmail.com>
|
|
6
6
|
License: General Public License v3
|
|
7
|
-
Project-URL: Homepage, https://hydra.ecd.space/
|
|
8
|
-
Project-URL: Changelog, https://hydra.ecd.space/
|
|
7
|
+
Project-URL: Homepage, https://hydra.ecd.space/ecd/vocker/
|
|
8
|
+
Project-URL: Changelog, https://hydra.ecd.space/ecd/vocker/file?name=CHANGELOG.md&ci=trunk
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
Requires-Dist: atomicwrites
|
|
11
11
|
Requires-Dist: attrs
|
|
@@ -15,10 +15,9 @@ Requires-Dist: filelock
|
|
|
15
15
|
Requires-Dist: immutabledict
|
|
16
16
|
Requires-Dist: marshmallow
|
|
17
17
|
Requires-Dist: platformdirs
|
|
18
|
-
Requires-Dist: sansio_tools>=1
|
|
19
|
-
Requires-Dist: sqlalchemy_boltons>=
|
|
18
|
+
Requires-Dist: sansio_tools>=1
|
|
19
|
+
Requires-Dist: sqlalchemy_boltons>=5
|
|
20
20
|
Requires-Dist: SQLAlchemy
|
|
21
|
-
Requires-Dist: strictyaml
|
|
22
21
|
Requires-Dist: structlog
|
|
23
22
|
Requires-Dist: cbor2
|
|
24
23
|
Provides-Extra: zstandard
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
vocker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
vocker/__main__.py,sha256=jNkuxmxpoWPeKXHVf-TyEECSY62QZQHoR2F_Bp6zsNM,35
|
|
3
|
-
vocker/cli.py,sha256=
|
|
4
|
-
vocker/dedup.py,sha256=
|
|
5
|
-
vocker/dedup_models.py,sha256=
|
|
3
|
+
vocker/cli.py,sha256=066HLK3ayv1qSajlFhkvU3wowPzH84ZOt_yGtedaSNk,13151
|
|
4
|
+
vocker/dedup.py,sha256=J7bdhbNPaeMFV6bj72_uYtYfOSBboAIljSBuNTHARso,72447
|
|
5
|
+
vocker/dedup_models.py,sha256=stgBOshVzEwWZykcv1RVHX_3P2qTeRWzB1xWLuNdW4E,11227
|
|
6
6
|
vocker/image.py,sha256=lewNLLiXnd_N1CSs4gnYFEj-d5RkIBiPQiN8hNL2fIs,28181
|
|
7
7
|
vocker/integer_to_path.py,sha256=5ghlupk9VLzXLtcfwnVEVFxtBxyT8A_ooV8-2EAnoFw,1433
|
|
8
8
|
vocker/multihash.py,sha256=-VhksUBam6N01fICtTg_TJrJcEIHJrYVKzkD1B_bdfI,8760
|
|
9
9
|
vocker/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
vocker/system.py,sha256=
|
|
11
|
-
vocker/util.py,sha256=
|
|
10
|
+
vocker/system.py,sha256=v3ONv335EqDdCzne--ybTZPdHgUZ_kRKhzCALXjrcFw,25728
|
|
11
|
+
vocker/util.py,sha256=1Escs1FSrzrTy3Rzhn9r41K75KTt9VjQV91iT3P_FyI,3275
|
|
12
12
|
vocker/util_models.py,sha256=2bN5eousF92oH7BAv1ZFoyh6iqNAnJ_niiclp2_RaHI,395
|
|
13
13
|
vocker/repo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
vocker/repo/compression.py,sha256=l2g1e6SaugpqORbg3zwRM1zwlEXedbYOihm5nDpCejU,6442
|
|
15
|
-
vocker/repo/io.py,sha256=
|
|
16
|
-
vocker-0.
|
|
17
|
-
vocker-0.
|
|
18
|
-
vocker-0.
|
|
19
|
-
vocker-0.
|
|
15
|
+
vocker/repo/io.py,sha256=Juzt7vHjfd-H36tWpdYZNh7ucf-h0bC7_WwZ46liNx0,25614
|
|
16
|
+
vocker-0.3.0.dist-info/METADATA,sha256=a7Qhq98kbIQe8sQfef7xl5S3dXZUN54Hx0snel0MTW4,3835
|
|
17
|
+
vocker-0.3.0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
18
|
+
vocker-0.3.0.dist-info/top_level.txt,sha256=5x7g7T2L44UKODxVZ4vmWjxDnnruxaZ5yloYi0wLoUg,7
|
|
19
|
+
vocker-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|