vocker 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vocker/__init__.py +0 -0
- vocker/__main__.py +3 -0
- vocker/cli.py +384 -0
- vocker/dedup.py +1676 -0
- vocker/dedup_models.py +174 -0
- vocker/image.py +870 -0
- vocker/integer_to_path.py +51 -0
- vocker/multihash.py +302 -0
- vocker/py.typed +0 -0
- vocker/repo/__init__.py +0 -0
- vocker/repo/compression.py +239 -0
- vocker/repo/io.py +711 -0
- vocker/system.py +681 -0
- vocker/util.py +120 -0
- vocker/util_models.py +13 -0
- vocker-0.1.0.dist-info/METADATA +56 -0
- vocker-0.1.0.dist-info/RECORD +19 -0
- vocker-0.1.0.dist-info/WHEEL +5 -0
- vocker-0.1.0.dist-info/top_level.txt +1 -0
vocker/dedup_models.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import typing as ty
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sa
|
|
5
|
+
from sqlalchemy import orm as sao
|
|
6
|
+
from sqlalchemy.orm import Mapped as M, mapped_column as mc, relationship, DeclarativeBase
|
|
7
|
+
from sqlalchemy_boltons.orm import RelationshipComparator as Rel
|
|
8
|
+
|
|
9
|
+
from . import multihash as mh
|
|
10
|
+
from .util_models import now, rel_kw_basic, rel_kw_cascade
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseDedup(DeclarativeBase):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
18
|
+
class DedupConfig:
|
|
19
|
+
__tablename__ = "dedup_config"
|
|
20
|
+
|
|
21
|
+
key: M[str] = mc(primary_key=True)
|
|
22
|
+
value: M[str] = mc(nullable=False)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
26
|
+
class DedupFile:
|
|
27
|
+
"""
|
|
28
|
+
Represents a single deduplicated file regardless of backend (hardlink, symlink, reflink).
|
|
29
|
+
|
|
30
|
+
The file contents may not yet be available if :attr:`pending_file` is nonempty.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
__tablename__ = "dedup_file"
|
|
34
|
+
|
|
35
|
+
id: M[int] = mc(primary_key=True)
|
|
36
|
+
file_metadata: M[bytes] = mc("metadata")
|
|
37
|
+
size: M[int] = mc()
|
|
38
|
+
mtime: M[int] = mc()
|
|
39
|
+
created_at: M[int] = mc(insert_default=now)
|
|
40
|
+
orphaned_at: M[int | None] = mc()
|
|
41
|
+
pending_id: M[int | None] = mc(sa.ForeignKey("dedup_pending.id", ondelete="CASCADE"))
|
|
42
|
+
|
|
43
|
+
links: M[list["Link"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
44
|
+
tags: M[list["Tag"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
45
|
+
hashes: M[list["Hash"]] = relationship(back_populates="file", **rel_kw_cascade)
|
|
46
|
+
pending: M["Pending | None"] = relationship(back_populates="files", **rel_kw_basic)
|
|
47
|
+
|
|
48
|
+
# this is used as a speedup when verifying hardlinks
|
|
49
|
+
_cached_file_stat = None
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def hashes_dict(self):
|
|
53
|
+
return {(h := x.to_digest()).function: h for x in self.hashes}
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def make_update_orphaned(cls, orphaned_at_now=None):
|
|
57
|
+
"""
|
|
58
|
+
Construct the SQL DML statement which sets :attr:`orphaned_at` according to whether any
|
|
59
|
+
links are left that point to this dedup file.
|
|
60
|
+
"""
|
|
61
|
+
if orphaned_at_now is None:
|
|
62
|
+
orphaned_at_now = now()
|
|
63
|
+
L = sao.aliased(Link)
|
|
64
|
+
return sa.update(cls).values(
|
|
65
|
+
orphaned_at=sa.case(
|
|
66
|
+
# If a Link exists, then it's NULL.
|
|
67
|
+
(sa.exists().select_from(L).where(Rel(L.file) == cls), None),
|
|
68
|
+
# If the orphaned_at file was set in the past, then keep that value.
|
|
69
|
+
(cls.orphaned_at < orphaned_at_now, cls.orphaned_at),
|
|
70
|
+
# Otherwise, set it to the current timestamp.
|
|
71
|
+
else_=orphaned_at_now,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
_make_file_fk = lambda: sa.ForeignKey("dedup_file.id", ondelete="CASCADE")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
80
|
+
class Pending:
|
|
81
|
+
__tablename__ = "dedup_pending"
|
|
82
|
+
|
|
83
|
+
id: M[int] = mc(primary_key=True)
|
|
84
|
+
expire_at: M[int] = mc()
|
|
85
|
+
|
|
86
|
+
files: M[list["DedupFile"]] = relationship(back_populates="pending", **rel_kw_cascade)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
90
|
+
class Link:
|
|
91
|
+
"""A link (usage) of a deduplicated file."""
|
|
92
|
+
|
|
93
|
+
__tablename__ = "dedup_link"
|
|
94
|
+
|
|
95
|
+
link_path: M[bytes] = mc(primary_key=True) # utf-8 encoded
|
|
96
|
+
file_id: M[int] = mc(_make_file_fk(), index=True, nullable=False)
|
|
97
|
+
created_at: M[int] = mc(insert_default=now, nullable=False)
|
|
98
|
+
|
|
99
|
+
file: M["DedupFile"] = relationship(back_populates="links", **rel_kw_basic)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
tmp_bytes = sa.Table("bytes", sa.MetaData(), sa.Column("id", sa.LargeBinary, primary_key=True))
|
|
103
|
+
tmp_ints = sa.Table("ints", sa.MetaData(), sa.Column("id", sa.Integer, primary_key=True))
|
|
104
|
+
tmp_hash_lookup = sa.Table(
|
|
105
|
+
"files_by_hash",
|
|
106
|
+
sa.MetaData(),
|
|
107
|
+
sa.Column("id", sa.Integer, primary_key=True),
|
|
108
|
+
sa.Column("hash_function", sa.Integer),
|
|
109
|
+
sa.Column("digest", sa.LargeBinary),
|
|
110
|
+
sa.Column("metadata_bytes", sa.LargeBinary),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
115
|
+
class Tag:
|
|
116
|
+
__tablename__ = "dedup_tag"
|
|
117
|
+
|
|
118
|
+
file_id: M[int] = mc(_make_file_fk(), primary_key=True, nullable=False)
|
|
119
|
+
name: M[bytes] = mc(primary_key=True, index=True)
|
|
120
|
+
|
|
121
|
+
file: M["DedupFile"] = relationship(back_populates="tags", **rel_kw_basic)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@BaseDedup.registry.mapped_as_dataclass(init=False)
|
|
125
|
+
class Hash:
|
|
126
|
+
__tablename__ = "dedup_hashes"
|
|
127
|
+
|
|
128
|
+
file_id: M[int] = mc(_make_file_fk(), primary_key=True, nullable=False)
|
|
129
|
+
hash_function: M[int] = mc(primary_key=True)
|
|
130
|
+
hash: M[bytes] = mc(index=True)
|
|
131
|
+
|
|
132
|
+
file: M["DedupFile"] = relationship(back_populates="hashes", **rel_kw_basic)
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def from_digest(cls, digest: mh.Digest, **kw):
|
|
136
|
+
return cls(hash_function=digest.function.function_code, hash=digest.digest, **kw)
|
|
137
|
+
|
|
138
|
+
def to_digest(self):
|
|
139
|
+
return mh.registry.decode_from_code_and_digest(self.hash_function, self.hash)
|
|
140
|
+
|
|
141
|
+
@classmethod
|
|
142
|
+
def compare_digest(cls):
|
|
143
|
+
return _HashCompareByDigest(cls)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclasses.dataclass(eq=False)
|
|
147
|
+
class _HashCompareByDigest:
|
|
148
|
+
alias: type[Hash]
|
|
149
|
+
|
|
150
|
+
def in_(self, digests: ty.Iterable[mh.Digest]):
|
|
151
|
+
a = self.alias
|
|
152
|
+
return sa.tuple_(a.hash, a.hash_function).in_(
|
|
153
|
+
(x.digest, x.function.function_code) for x in digests
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def __eq__(self, other):
|
|
157
|
+
if isinstance(other, mh.Digest):
|
|
158
|
+
a = self.alias
|
|
159
|
+
return sa.and_(a.hash == other.digest, a.hash_function == other.function.function_code)
|
|
160
|
+
|
|
161
|
+
return NotImplemented
|
|
162
|
+
|
|
163
|
+
def __ne__(self, other):
|
|
164
|
+
return sa.not_(self == other)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
sa.Index(
|
|
168
|
+
"ix_dedup_file_pending_partial", DedupFile.pending_id, sqlite_where=DedupFile.pending_id != None
|
|
169
|
+
)
|
|
170
|
+
sa.Index(
|
|
171
|
+
"ix_dedup_file_orphaned_at_partial",
|
|
172
|
+
DedupFile.orphaned_at,
|
|
173
|
+
sqlite_where=DedupFile.orphaned_at != None,
|
|
174
|
+
)
|