vocker 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vocker/cli.py +1 -2
- vocker/dedup.py +913 -676
- vocker/dedup_models.py +204 -49
- vocker/repo/io.py +7 -0
- vocker/system.py +39 -16
- vocker/util.py +6 -4
- {vocker-0.2.0.dist-info → vocker-0.3.1.dist-info}/METADATA +5 -5
- {vocker-0.2.0.dist-info → vocker-0.3.1.dist-info}/RECORD +10 -10
- {vocker-0.2.0.dist-info → vocker-0.3.1.dist-info}/WHEEL +0 -0
- {vocker-0.2.0.dist-info → vocker-0.3.1.dist-info}/top_level.txt +0 -0
vocker/dedup_models.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import dataclasses
|
|
2
4
|
import typing as ty
|
|
3
5
|
|
|
6
|
+
import attr
|
|
4
7
|
import sqlalchemy as sa
|
|
5
8
|
from sqlalchemy import orm as sao
|
|
6
9
|
from sqlalchemy.orm import Mapped as M, mapped_column as mc, relationship, DeclarativeBase
|
|
7
10
|
from sqlalchemy_boltons.orm import RelationshipComparator as Rel
|
|
11
|
+
from sqlalchemy_boltons.temporary import CacheTemporaryMetaData as CacheTmp, MetaDataMaker
|
|
8
12
|
|
|
9
13
|
from . import multihash as mh
|
|
10
14
|
from .util_models import now, rel_kw_basic, rel_kw_cascade
|
|
@@ -14,57 +18,78 @@ class BaseDedup(DeclarativeBase):
|
|
|
14
18
|
pass
|
|
15
19
|
|
|
16
20
|
|
|
21
|
+
_make_file_fk = lambda: sa.ForeignKey("dedup_file.id", ondelete="CASCADE")
|
|
22
|
+
_make_obj_fk = lambda: sa.ForeignKey("dedup_obj.id", ondelete="CASCADE")
|
|
23
|
+
_make_pending_fk = lambda: sa.ForeignKey("dedup_pending.id", ondelete="CASCADE")
|
|
24
|
+
|
|
25
|
+
|
|
17
26
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
18
27
|
class DedupConfig:
|
|
19
28
|
__tablename__ = "dedup_config"
|
|
20
29
|
|
|
21
30
|
key: M[str] = mc(primary_key=True)
|
|
22
|
-
value: M[str] = mc(
|
|
31
|
+
value: M[str] = mc()
|
|
23
32
|
|
|
24
33
|
|
|
25
34
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
26
|
-
class
|
|
35
|
+
class Obj:
|
|
36
|
+
__tablename__ = "dedup_obj"
|
|
27
37
|
"""
|
|
28
|
-
Represents
|
|
38
|
+
Represents an idealized deduplicated file. There may be multiple actual filesystem files
|
|
39
|
+
associated to this idealized object.
|
|
29
40
|
|
|
30
|
-
|
|
31
|
-
"""
|
|
41
|
+
When an Obj has no more links, :attr:`orphaned_at` is set to the current time.
|
|
32
42
|
|
|
33
|
-
|
|
43
|
+
When a File has no more links, and there are other Files with the same content, we should delete
|
|
44
|
+
all but one of the Files. This is not implemented yet.
|
|
45
|
+
|
|
46
|
+
Incomplete: Obj exists, has no File with pending_id != None.
|
|
47
|
+
Complete: Obj exists, has at least one File with pending_id == None,
|
|
48
|
+
|
|
49
|
+
Lifecycle:
|
|
50
|
+
1. Incomplete + Used: Newly created object, data not written yet.
|
|
51
|
+
2. Complete + Used: Data written, links created.
|
|
52
|
+
3. Complete + Unused: Data written, no more links exist.
|
|
53
|
+
4. Incomplete + Unused: Data maybe deleted, no more links exist.
|
|
54
|
+
"""
|
|
34
55
|
|
|
35
56
|
id: M[int] = mc(primary_key=True)
|
|
36
|
-
|
|
57
|
+
pending_id: M[int | None] = mc(_make_pending_fk())
|
|
58
|
+
metadata_bytes: M[bytes] = mc("metadata")
|
|
37
59
|
size: M[int] = mc()
|
|
38
|
-
|
|
39
|
-
|
|
60
|
+
created_at: M[int] = mc()
|
|
61
|
+
updated_at: M[int] = mc(server_default=sa.text("-1"), index=True)
|
|
40
62
|
orphaned_at: M[int | None] = mc()
|
|
41
|
-
pending_id: M[int | None] = mc(sa.ForeignKey("dedup_pending.id", ondelete="CASCADE"))
|
|
42
|
-
|
|
43
|
-
links: M[list["Link"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
44
|
-
tags: M[list["Tag"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
45
|
-
hashes: M[list["Hash"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
46
|
-
pending: M["Pending | None"] = relationship(back_populates="files", **rel_kw_basic)
|
|
47
63
|
|
|
48
|
-
|
|
49
|
-
|
|
64
|
+
files: M[list["File"]] = relationship(back_populates="obj", **rel_kw_cascade)
|
|
65
|
+
tags: M[list["Tag"]] = relationship(back_populates="obj", **rel_kw_cascade)
|
|
66
|
+
hashes: M[list["Hash"]] = relationship(back_populates="obj", **rel_kw_cascade)
|
|
67
|
+
pending: M["Pending | None"] = relationship(back_populates="objs", **rel_kw_basic)
|
|
50
68
|
|
|
51
69
|
@property
|
|
52
70
|
def hashes_dict(self):
|
|
53
71
|
return {(h := x.to_digest()).function: h for x in self.hashes}
|
|
54
72
|
|
|
55
73
|
@classmethod
|
|
56
|
-
def
|
|
74
|
+
def q_is_complete(cls):
|
|
75
|
+
F = sao.aliased(File)
|
|
76
|
+
return sa.exists().select_from(F).where(Rel(F.obj) == cls, F.pending_id == None)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def q_is_orphaned(cls):
|
|
80
|
+
F = sao.aliased(File)
|
|
81
|
+
return ~sa.exists().select_from(F).where(Rel(F.obj) == cls, F.link_count != 0)
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def make_sql_update_orphaned(cls, orphaned_at_now):
|
|
57
85
|
"""
|
|
58
86
|
Construct the SQL DML statement which sets :attr:`orphaned_at` according to whether any
|
|
59
87
|
links are left that point to this dedup file.
|
|
60
88
|
"""
|
|
61
|
-
if orphaned_at_now is None:
|
|
62
|
-
orphaned_at_now = now()
|
|
63
|
-
L = sao.aliased(Link)
|
|
64
89
|
return sa.update(cls).values(
|
|
65
90
|
orphaned_at=sa.case(
|
|
66
91
|
# If a Link exists, then it's NULL.
|
|
67
|
-
(
|
|
92
|
+
(~cls.q_is_orphaned(), None),
|
|
68
93
|
# If the orphaned_at file was set in the past, then keep that value.
|
|
69
94
|
(cls.orphaned_at < orphaned_at_now, cls.orphaned_at),
|
|
70
95
|
# Otherwise, set it to the current timestamp.
|
|
@@ -73,7 +98,43 @@ class DedupFile:
|
|
|
73
98
|
)
|
|
74
99
|
|
|
75
100
|
|
|
76
|
-
|
|
101
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
102
|
+
class File:
|
|
103
|
+
"""
|
|
104
|
+
Represents a single deduplicated file regardless of backend (hardlink, symlink, reflink).
|
|
105
|
+
|
|
106
|
+
The file contents may not yet be available if :attr:`pending_id` is not NULL.
|
|
107
|
+
|
|
108
|
+
If :attr:`obj_id` is NULL, then the File has been (probably) deleted and in a future transaction
|
|
109
|
+
the row should also be deleted if the file is confirmed deleted.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
__tablename__ = "dedup_file"
|
|
113
|
+
|
|
114
|
+
# this is used as a speedup when verifying hardlinks
|
|
115
|
+
_cached_file_stat = None
|
|
116
|
+
|
|
117
|
+
id: M[int] = mc(primary_key=True)
|
|
118
|
+
obj_id: M[int | None] = mc(_make_obj_fk(), index=True)
|
|
119
|
+
link_count: M[int] = mc(server_default=sa.text("-1"))
|
|
120
|
+
pending_id: M[int | None] = mc(sa.ForeignKey("dedup_pending.id", ondelete="CASCADE"))
|
|
121
|
+
created_at: M[int | None] = mc()
|
|
122
|
+
|
|
123
|
+
obj: M[list[Obj]] = relationship(back_populates="files", **rel_kw_cascade)
|
|
124
|
+
links: M[list["Link"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
125
|
+
pending: M["Pending | None"] = relationship(back_populates="files", **rel_kw_basic)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
129
|
+
class FileCorruption:
|
|
130
|
+
"""
|
|
131
|
+
Represents information about a corrupted file.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
__tablename__ = "dedup_file_corrupt"
|
|
135
|
+
id: M[int] = mc(sa.ForeignKey("dedup_file.id", ondelete="CASCADE"), primary_key=True)
|
|
136
|
+
exception_name: M[str] = mc()
|
|
137
|
+
exception_string: M[str] = mc()
|
|
77
138
|
|
|
78
139
|
|
|
79
140
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
@@ -83,7 +144,8 @@ class Pending:
|
|
|
83
144
|
id: M[int] = mc(primary_key=True)
|
|
84
145
|
expire_at: M[int] = mc()
|
|
85
146
|
|
|
86
|
-
files: M[list["
|
|
147
|
+
files: M[list["File"]] = relationship(back_populates="pending", **rel_kw_cascade)
|
|
148
|
+
objs: M[list["Obj"]] = relationship(back_populates="pending", **rel_kw_cascade)
|
|
87
149
|
|
|
88
150
|
|
|
89
151
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
@@ -92,44 +154,131 @@ class Link:
|
|
|
92
154
|
|
|
93
155
|
__tablename__ = "dedup_link"
|
|
94
156
|
|
|
95
|
-
|
|
96
|
-
file_id: M[int] = mc(_make_file_fk(), index=True
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
sa.
|
|
111
|
-
)
|
|
157
|
+
path: M[bytes] = mc(primary_key=True) # utf-8 encoded
|
|
158
|
+
file_id: M[int] = mc(_make_file_fk(), index=True)
|
|
159
|
+
mtime: M[int | None] = mc() # whether this field means anything depends on the backend
|
|
160
|
+
created_at: M[int | None] = mc()
|
|
161
|
+
|
|
162
|
+
file: M["File"] = relationship(back_populates="links", **rel_kw_basic)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
_tmp_meta = sa.MetaData()
|
|
166
|
+
tmp_bytes = sa.Table("bytes", _tmp_meta, sa.Column("id", sa.LargeBinary, primary_key=True))
|
|
167
|
+
tmp_ints = sa.Table("ints", _tmp_meta, sa.Column("id", sa.Integer, primary_key=True))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@attr.s(frozen=True)
|
|
171
|
+
class TmpNewFiles:
|
|
172
|
+
files: sa.Table = attr.ib()
|
|
173
|
+
tags: sa.Table = attr.ib()
|
|
174
|
+
hashes: sa.Table = attr.ib()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@attr.s(frozen=True)
|
|
178
|
+
class TmpNewFiles2:
|
|
179
|
+
files: sa.Table = attr.ib()
|
|
180
|
+
links: sa.Table = attr.ib()
|
|
181
|
+
objs: sa.Table = attr.ib()
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@attr.s(frozen=True)
|
|
185
|
+
class TmpCheckLinks:
|
|
186
|
+
links: sa.Table = attr.ib()
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@attr.s(frozen=True)
|
|
190
|
+
class TmpDeleteExtra:
|
|
191
|
+
files: sa.Table = attr.ib()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _make_temp_new_files(maker: MetaDataMaker):
|
|
195
|
+
# If an Obj has already-available contents in a File, then that File's id is written to
|
|
196
|
+
# content_file_id. Otherwise it is NULL.
|
|
197
|
+
files = maker.table(
|
|
198
|
+
"files",
|
|
199
|
+
sa.Column("id", sa.Integer, primary_key=True, nullable=False),
|
|
200
|
+
sa.Column("link_path", sa.LargeBinary, nullable=True),
|
|
201
|
+
sa.Column("metadata_bytes", sa.LargeBinary),
|
|
202
|
+
sa.Column("insert_obj_if_missing", sa.Boolean),
|
|
203
|
+
# sa.Column("adopt_existing", sa.Boolean, server_default=text("0")),
|
|
204
|
+
sa.Column("obj_id", sa.Integer, nullable=True), # ref: Obj.id
|
|
205
|
+
sa.Column("new_obj_id", sa.Integer, nullable=True), # ref: Obj.id
|
|
206
|
+
)
|
|
207
|
+
tags = maker.table(
|
|
208
|
+
"files_by_tag",
|
|
209
|
+
sa.Column("id", sa.Integer, primary_key=True, index=True), # ref: temp.files.id
|
|
210
|
+
sa.Column("name", sa.LargeBinary, primary_key=True),
|
|
211
|
+
)
|
|
212
|
+
hashes = maker.table(
|
|
213
|
+
"files_by_hash",
|
|
214
|
+
sa.Column("id", sa.Integer, primary_key=True), # ref: temp.files.id
|
|
215
|
+
sa.Column("hash_function", sa.Integer, primary_key=True),
|
|
216
|
+
sa.Column("hash", sa.LargeBinary),
|
|
217
|
+
)
|
|
218
|
+
return TmpNewFiles(files=files, tags=tags, hashes=hashes)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _make_temp_new_files2(maker: MetaDataMaker):
|
|
222
|
+
files = maker.table(
|
|
223
|
+
"files",
|
|
224
|
+
sa.Column("file_id", sa.Integer, primary_key=True, nullable=False),
|
|
225
|
+
sa.Column("obj_id", sa.Integer),
|
|
226
|
+
)
|
|
227
|
+
links = maker.table(
|
|
228
|
+
"links",
|
|
229
|
+
sa.Column("link_path", sa.LargeBinary, primary_key=True, nullable=False),
|
|
230
|
+
sa.Column("file_id", sa.Integer),
|
|
231
|
+
sa.Column("link_count", sa.Integer),
|
|
232
|
+
)
|
|
233
|
+
objs = maker.table(
|
|
234
|
+
"objs",
|
|
235
|
+
sa.Column("obj_id", sa.Integer, primary_key=True, nullable=False),
|
|
236
|
+
sa.Column("size", sa.Integer),
|
|
237
|
+
)
|
|
238
|
+
return TmpNewFiles2(files=files, links=links, objs=objs)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _make_temp_check_lx(maker: MetaDataMaker):
|
|
242
|
+
links = maker.table(
|
|
243
|
+
"links",
|
|
244
|
+
sa.Column("path", sa.LargeBinary, primary_key=True, nullable=False),
|
|
245
|
+
)
|
|
246
|
+
return TmpCheckLinks(links)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _make_temp_delete_extra(maker: MetaDataMaker):
|
|
250
|
+
files = maker.table(
|
|
251
|
+
"files",
|
|
252
|
+
sa.Column("id", sa.Integer, primary_key=True, nullable=False),
|
|
253
|
+
)
|
|
254
|
+
return TmpDeleteExtra(files)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
tmp_new_files: CacheTmp[TmpNewFiles] = CacheTmp(_make_temp_new_files)
|
|
258
|
+
tmp_new_files2: CacheTmp[TmpNewFiles2] = CacheTmp(_make_temp_new_files2)
|
|
259
|
+
tmp_check_links: CacheTmp[TmpCheckLinks] = CacheTmp(_make_temp_check_lx)
|
|
260
|
+
tmp_delete_extra: CacheTmp[TmpDeleteExtra] = CacheTmp(_make_temp_delete_extra)
|
|
112
261
|
|
|
113
262
|
|
|
114
263
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
115
264
|
class Tag:
|
|
116
265
|
__tablename__ = "dedup_tag"
|
|
117
266
|
|
|
118
|
-
|
|
267
|
+
obj_id: M[int] = mc(_make_obj_fk(), primary_key=True)
|
|
119
268
|
name: M[bytes] = mc(primary_key=True, index=True)
|
|
120
269
|
|
|
121
|
-
|
|
270
|
+
obj: M["Obj"] = relationship(back_populates="tags", **rel_kw_basic)
|
|
122
271
|
|
|
123
272
|
|
|
124
273
|
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
125
274
|
class Hash:
|
|
126
275
|
__tablename__ = "dedup_hashes"
|
|
127
276
|
|
|
128
|
-
|
|
277
|
+
obj_id: M[int] = mc(_make_obj_fk(), primary_key=True)
|
|
129
278
|
hash_function: M[int] = mc(primary_key=True)
|
|
130
279
|
hash: M[bytes] = mc(index=True)
|
|
131
280
|
|
|
132
|
-
|
|
281
|
+
obj: M["Obj"] = relationship(back_populates="hashes", **rel_kw_basic)
|
|
133
282
|
|
|
134
283
|
@classmethod
|
|
135
284
|
def from_digest(cls, digest: mh.Digest, **kw):
|
|
@@ -164,11 +313,17 @@ class _HashCompareByDigest:
|
|
|
164
313
|
return sa.not_(self == other)
|
|
165
314
|
|
|
166
315
|
|
|
316
|
+
sa.Index("ix_dedup_obj_pending_partial", Obj.pending_id, sqlite_where=Obj.pending_id != None)
|
|
317
|
+
sa.Index("ix_dedup_file_pending_partial", File.pending_id, sqlite_where=File.pending_id != None)
|
|
318
|
+
sa.Index("ix_dedup_obj_orphaned_at_partial", Obj.orphaned_at, sqlite_where=Obj.orphaned_at != None)
|
|
319
|
+
sa.Index("ix_dedup_file_link_count", File.obj_id, sa.desc(File.link_count))
|
|
167
320
|
sa.Index(
|
|
168
|
-
"
|
|
321
|
+
"ix_dedup_file_link_count_invalid",
|
|
322
|
+
File.id,
|
|
323
|
+
sqlite_where=File.link_count < sa.literal_column("0"),
|
|
169
324
|
)
|
|
170
325
|
sa.Index(
|
|
171
|
-
"
|
|
172
|
-
|
|
173
|
-
sqlite_where=
|
|
326
|
+
"ix_dedup_obj_created_at_partial",
|
|
327
|
+
Obj.created_at,
|
|
328
|
+
sqlite_where=Obj.created_at < sa.literal_column("0"),
|
|
174
329
|
)
|
vocker/repo/io.py
CHANGED
|
@@ -387,6 +387,10 @@ class RepoTransfer:
|
|
|
387
387
|
|
|
388
388
|
assert_(not is_dir)
|
|
389
389
|
|
|
390
|
+
# The content may already be available locally, let's just try.
|
|
391
|
+
if (fp := self.dedup.open_by_hash(digest)) is not None:
|
|
392
|
+
return fp
|
|
393
|
+
|
|
390
394
|
_open = None if acc is None else (lambda: acc.download_open(path))
|
|
391
395
|
|
|
392
396
|
req = de.DedupLinkRequest(
|
|
@@ -560,6 +564,8 @@ class RepoTransfer:
|
|
|
560
564
|
for archive_info in archive_infos:
|
|
561
565
|
archive_io = ex.enter_context(self.open_compressed(archive_info.archive_path))
|
|
562
566
|
files += (A(archive_io, info) for info in archive_info.files)
|
|
567
|
+
if files and (fs := files[0].info.files):
|
|
568
|
+
logger.info("extracting files...", data_sample_path=str(fs[0].path))
|
|
563
569
|
exporter.provide_files(files)
|
|
564
570
|
|
|
565
571
|
def _group_archive_infos(iterable):
|
|
@@ -583,6 +589,7 @@ class RepoTransfer:
|
|
|
583
589
|
exe.submit(_process, a_info_group)
|
|
584
590
|
for a_info_group in _group_archive_infos(iterable)
|
|
585
591
|
)
|
|
592
|
+
logger.info("done extracting, now finalizing...")
|
|
586
593
|
exporter.end_session()
|
|
587
594
|
|
|
588
595
|
|
vocker/system.py
CHANGED
|
@@ -169,15 +169,15 @@ class UpdatingLocalRepository:
|
|
|
169
169
|
def p(suffix):
|
|
170
170
|
return path.with_name(path.name + suffix)
|
|
171
171
|
|
|
172
|
-
# compress contents directly to
|
|
173
|
-
with self.open(p(".
|
|
172
|
+
# compress contents directly to xz
|
|
173
|
+
with self.open(p(".xz"), "wb") as f1, cx.open_compressor(f1, "xz") as f:
|
|
174
174
|
yield f
|
|
175
175
|
|
|
176
|
-
# compress to
|
|
177
|
-
with self.open(p(".
|
|
178
|
-
|
|
179
|
-
) as fw1, cx.open_compressor(fw1, "
|
|
180
|
-
|
|
176
|
+
# # compress to zst as well
|
|
177
|
+
# with self.open(p(".xz"), "rb") as fr1, cx.open_decompressor(fr1, "xz") as fr, self.open(
|
|
178
|
+
# p(".zst"), "wb"
|
|
179
|
+
# ) as fw1, cx.open_compressor(fw1, "zst") as fw:
|
|
180
|
+
# shutil.copyfileobj(fr, fw)
|
|
181
181
|
|
|
182
182
|
def iterdir(self, path: PurePath):
|
|
183
183
|
raise NotImplementedError("not needed yet")
|
|
@@ -261,7 +261,10 @@ class LocalRepository:
|
|
|
261
261
|
yield u
|
|
262
262
|
|
|
263
263
|
# Adopt the files.
|
|
264
|
+
dedup = self.system.repo_dedup
|
|
265
|
+
_plain = de.DedupFileMetadata.make_plain()
|
|
264
266
|
base = self._path_base
|
|
267
|
+
hashes = {}
|
|
265
268
|
reqs_adopt = []
|
|
266
269
|
reqs_copy = []
|
|
267
270
|
for p_rel in u.updated_paths:
|
|
@@ -270,12 +273,20 @@ class LocalRepository:
|
|
|
270
273
|
if p.exists():
|
|
271
274
|
if not p.is_file():
|
|
272
275
|
raise ValueError("only regular files are supported")
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
+
hashes[p_rel] = h = dedup.get_file_hash(hf, p, check_link=True)
|
|
277
|
+
if h is None:
|
|
278
|
+
reqs_adopt.append(
|
|
279
|
+
de.DedupLinkRequest(
|
|
280
|
+
hash_function=hf,
|
|
281
|
+
link_path=p,
|
|
282
|
+
file_metadata=_plain,
|
|
283
|
+
file_contents_hash=None,
|
|
284
|
+
open_file_once=None,
|
|
285
|
+
adopt_existing=True,
|
|
286
|
+
)
|
|
287
|
+
)
|
|
276
288
|
|
|
277
|
-
dedup
|
|
278
|
-
dedup.adopt_files(hf, reqs_adopt)
|
|
289
|
+
dedup.run_batch(reqs_adopt)
|
|
279
290
|
|
|
280
291
|
# Now we gather the hashes for all the files so we can update the manifest nodes. We
|
|
281
292
|
# need to do this after the `adopt_files` above because that operation computes the
|
|
@@ -285,12 +296,24 @@ class LocalRepository:
|
|
|
285
296
|
)
|
|
286
297
|
for p_rel in u.updated_paths:
|
|
287
298
|
if (p := u.workspace_path / p_rel).exists():
|
|
288
|
-
|
|
289
|
-
|
|
299
|
+
if (r := hashes[p_rel]) is None:
|
|
300
|
+
r = dedup.get_file_hash(hf, p, check_link=True)
|
|
301
|
+
assert r is not None, f"expected {p} to be adopted"
|
|
290
302
|
value = False, r[1]
|
|
303
|
+
reqs_copy.append(
|
|
304
|
+
de.DedupLinkRequest(
|
|
305
|
+
hash_function=hf,
|
|
306
|
+
link_path=(dst := base / p_rel),
|
|
307
|
+
file_metadata=_plain,
|
|
308
|
+
file_contents_hash=r[1],
|
|
309
|
+
open_file_once=None,
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
dst.parent.mkdir(exist_ok=True, parents=True)
|
|
291
313
|
else:
|
|
292
314
|
value = None
|
|
293
315
|
dirs[len(p_rel.parts) - 1][p_rel.parent][p_rel.name] = value
|
|
316
|
+
del hashes
|
|
294
317
|
|
|
295
318
|
# Here begins the critical section. If this part fails, the local repository will be broken.
|
|
296
319
|
self._path_ok.unlink(missing_ok=True)
|
|
@@ -556,7 +579,7 @@ class System:
|
|
|
556
579
|
with u.open(archive_path / "h.bin", "wb") as f:
|
|
557
580
|
rio.HashesWriter(f).write_all(h for h in archive_digests)
|
|
558
581
|
|
|
559
|
-
archive_size = u.get_path_for_open(archive_path / "a.
|
|
582
|
+
archive_size = u.get_path_for_open(archive_path / "a.xz", "wb").stat().st_size
|
|
560
583
|
|
|
561
584
|
shard_path = u.id_to_path("shard", (shard_id := u.allocate_id("shard")))
|
|
562
585
|
with u.open_for_write_multi_compressed(shard_path / "p") as f:
|
|
@@ -585,7 +608,7 @@ class System:
|
|
|
585
608
|
with u.open_for_write_multi_compressed(img_path / "u") as f:
|
|
586
609
|
rio.cbor_dump({"image_type": image_type.value}, f)
|
|
587
610
|
|
|
588
|
-
with u.open(img_path / "u.
|
|
611
|
+
with u.open(img_path / "u.xz", "rb") as f1, cx.open_decompressor(f1, "xz") as f:
|
|
589
612
|
hasher = hf()
|
|
590
613
|
while b := f.read(65536):
|
|
591
614
|
hasher.update(b)
|
vocker/util.py
CHANGED
|
@@ -47,10 +47,12 @@ def pprofile(options=None):
|
|
|
47
47
|
with runner:
|
|
48
48
|
yield runner
|
|
49
49
|
finally:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
if cg:
|
|
51
|
+
with open(cg, "wt", encoding="utf-8") as file:
|
|
52
|
+
prof.callgrind(file)
|
|
53
|
+
if annotate:
|
|
54
|
+
with open(annotate, "wt", encoding="utf-8") as file:
|
|
55
|
+
prof.annotate(file)
|
|
54
56
|
|
|
55
57
|
|
|
56
58
|
def supports_executable() -> bool:
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: vocker
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Docker-like manager for virtualenvs
|
|
5
5
|
Author-email: Eduard Christian Dumitrescu <eduard.c.dumitrescu@gmail.com>
|
|
6
6
|
License: General Public License v3
|
|
7
|
-
Project-URL: Homepage, https://hydra.ecd.space/
|
|
8
|
-
Project-URL: Changelog, https://hydra.ecd.space/
|
|
7
|
+
Project-URL: Homepage, https://hydra.ecd.space/ecd/vocker/
|
|
8
|
+
Project-URL: Changelog, https://hydra.ecd.space/ecd/vocker/file?name=CHANGELOG.md&ci=trunk
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
Requires-Dist: atomicwrites
|
|
11
11
|
Requires-Dist: attrs
|
|
@@ -15,8 +15,8 @@ Requires-Dist: filelock
|
|
|
15
15
|
Requires-Dist: immutabledict
|
|
16
16
|
Requires-Dist: marshmallow
|
|
17
17
|
Requires-Dist: platformdirs
|
|
18
|
-
Requires-Dist: sansio_tools>=1
|
|
19
|
-
Requires-Dist: sqlalchemy_boltons>=
|
|
18
|
+
Requires-Dist: sansio_tools>=1
|
|
19
|
+
Requires-Dist: sqlalchemy_boltons>=5
|
|
20
20
|
Requires-Dist: SQLAlchemy
|
|
21
21
|
Requires-Dist: structlog
|
|
22
22
|
Requires-Dist: cbor2
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
vocker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
vocker/__main__.py,sha256=jNkuxmxpoWPeKXHVf-TyEECSY62QZQHoR2F_Bp6zsNM,35
|
|
3
|
-
vocker/cli.py,sha256=
|
|
4
|
-
vocker/dedup.py,sha256=
|
|
5
|
-
vocker/dedup_models.py,sha256=
|
|
3
|
+
vocker/cli.py,sha256=066HLK3ayv1qSajlFhkvU3wowPzH84ZOt_yGtedaSNk,13151
|
|
4
|
+
vocker/dedup.py,sha256=J7bdhbNPaeMFV6bj72_uYtYfOSBboAIljSBuNTHARso,72447
|
|
5
|
+
vocker/dedup_models.py,sha256=stgBOshVzEwWZykcv1RVHX_3P2qTeRWzB1xWLuNdW4E,11227
|
|
6
6
|
vocker/image.py,sha256=lewNLLiXnd_N1CSs4gnYFEj-d5RkIBiPQiN8hNL2fIs,28181
|
|
7
7
|
vocker/integer_to_path.py,sha256=5ghlupk9VLzXLtcfwnVEVFxtBxyT8A_ooV8-2EAnoFw,1433
|
|
8
8
|
vocker/multihash.py,sha256=-VhksUBam6N01fICtTg_TJrJcEIHJrYVKzkD1B_bdfI,8760
|
|
9
9
|
vocker/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
vocker/system.py,sha256=
|
|
11
|
-
vocker/util.py,sha256=
|
|
10
|
+
vocker/system.py,sha256=fX3By0_MqC705T1IdmkxAY4FM6VaY8x6FdegB6a8-dM,25727
|
|
11
|
+
vocker/util.py,sha256=1Escs1FSrzrTy3Rzhn9r41K75KTt9VjQV91iT3P_FyI,3275
|
|
12
12
|
vocker/util_models.py,sha256=2bN5eousF92oH7BAv1ZFoyh6iqNAnJ_niiclp2_RaHI,395
|
|
13
13
|
vocker/repo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
vocker/repo/compression.py,sha256=l2g1e6SaugpqORbg3zwRM1zwlEXedbYOihm5nDpCejU,6442
|
|
15
|
-
vocker/repo/io.py,sha256=
|
|
16
|
-
vocker-0.
|
|
17
|
-
vocker-0.
|
|
18
|
-
vocker-0.
|
|
19
|
-
vocker-0.
|
|
15
|
+
vocker/repo/io.py,sha256=Juzt7vHjfd-H36tWpdYZNh7ucf-h0bC7_WwZ46liNx0,25614
|
|
16
|
+
vocker-0.3.1.dist-info/METADATA,sha256=NUmG0yxLQmKnVUvsKM-8awDqd4d924LB_fVt9fdB67E,3835
|
|
17
|
+
vocker-0.3.1.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
|
|
18
|
+
vocker-0.3.1.dist-info/top_level.txt,sha256=5x7g7T2L44UKODxVZ4vmWjxDnnruxaZ5yloYi0wLoUg,7
|
|
19
|
+
vocker-0.3.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|