thds.core 0.0.1__py3-none-any.whl → 1.31.20250123022540__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of thds.core might be problematic. Click here for more details.
- thds/core/__init__.py +48 -0
- thds/core/ansi_esc.py +46 -0
- thds/core/cache.py +201 -0
- thds/core/calgitver.py +82 -0
- thds/core/concurrency.py +100 -0
- thds/core/config.py +250 -0
- thds/core/decos.py +55 -0
- thds/core/dict_utils.py +188 -0
- thds/core/env.py +40 -0
- thds/core/exit_after.py +121 -0
- thds/core/files.py +125 -0
- thds/core/fretry.py +115 -0
- thds/core/generators.py +56 -0
- thds/core/git.py +81 -0
- thds/core/hash_cache.py +86 -0
- thds/core/hashing.py +106 -0
- thds/core/home.py +15 -0
- thds/core/hostname.py +10 -0
- thds/core/imports.py +17 -0
- thds/core/inspect.py +58 -0
- thds/core/iterators.py +9 -0
- thds/core/lazy.py +83 -0
- thds/core/link.py +153 -0
- thds/core/log/__init__.py +29 -0
- thds/core/log/basic_config.py +171 -0
- thds/core/log/json_formatter.py +43 -0
- thds/core/log/kw_formatter.py +84 -0
- thds/core/log/kw_logger.py +93 -0
- thds/core/log/logfmt.py +302 -0
- thds/core/merge_args.py +168 -0
- thds/core/meta.json +8 -0
- thds/core/meta.py +518 -0
- thds/core/parallel.py +200 -0
- thds/core/pickle_visit.py +24 -0
- thds/core/prof.py +276 -0
- thds/core/progress.py +112 -0
- thds/core/protocols.py +17 -0
- thds/core/py.typed +0 -0
- thds/core/scaling.py +39 -0
- thds/core/scope.py +199 -0
- thds/core/source.py +238 -0
- thds/core/source_serde.py +104 -0
- thds/core/sqlite/__init__.py +21 -0
- thds/core/sqlite/connect.py +33 -0
- thds/core/sqlite/copy.py +35 -0
- thds/core/sqlite/ddl.py +4 -0
- thds/core/sqlite/functions.py +63 -0
- thds/core/sqlite/index.py +22 -0
- thds/core/sqlite/insert_utils.py +23 -0
- thds/core/sqlite/merge.py +84 -0
- thds/core/sqlite/meta.py +190 -0
- thds/core/sqlite/read.py +66 -0
- thds/core/sqlite/sqlmap.py +179 -0
- thds/core/sqlite/structured.py +138 -0
- thds/core/sqlite/types.py +64 -0
- thds/core/sqlite/upsert.py +139 -0
- thds/core/sqlite/write.py +99 -0
- thds/core/stack_context.py +41 -0
- thds/core/thunks.py +40 -0
- thds/core/timer.py +214 -0
- thds/core/tmp.py +85 -0
- thds/core/types.py +4 -0
- thds.core-1.31.20250123022540.dist-info/METADATA +68 -0
- thds.core-1.31.20250123022540.dist-info/RECORD +67 -0
- {thds.core-0.0.1.dist-info → thds.core-1.31.20250123022540.dist-info}/WHEEL +1 -1
- thds.core-1.31.20250123022540.dist-info/entry_points.txt +4 -0
- thds.core-1.31.20250123022540.dist-info/top_level.txt +1 -0
- thds.core-0.0.1.dist-info/METADATA +0 -8
- thds.core-0.0.1.dist-info/RECORD +0 -4
- thds.core-0.0.1.dist-info/top_level.txt +0 -1
thds/core/meta.py
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import typing as ty
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from functools import lru_cache
|
|
8
|
+
from getpass import getuser
|
|
9
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
10
|
+
from importlib.resources import Package, open_text
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from types import MappingProxyType
|
|
13
|
+
|
|
14
|
+
import attrs
|
|
15
|
+
from cattrs import Converter
|
|
16
|
+
|
|
17
|
+
from . import calgitver, git
|
|
18
|
+
from .log import getLogger
|
|
19
|
+
from .types import StrOrPath
|
|
20
|
+
|
|
21
|
+
LayoutType = ty.Literal["flat", "src"]
|
|
22
|
+
NameFormatType = ty.Literal["git", "docker", "hive"]
|
|
23
|
+
|
|
24
|
+
TIMESTAMP_FORMAT = "%Y%m%d%H%M%S"
|
|
25
|
+
|
|
26
|
+
DOCKER_EXCLUSION_REGEX = r"[^\w\-.]+"
|
|
27
|
+
DOCKER_SUB_CHARACTER = "-"
|
|
28
|
+
HIVE_EXCLUSION_REGEX = r"[\W]+"
|
|
29
|
+
HIVE_SUB_CHARACTER = "_"
|
|
30
|
+
VERSION_EXCLUSION_REGEX = r"[^\d.]+"
|
|
31
|
+
VERSION_SUB_CHARACTER = ""
|
|
32
|
+
|
|
33
|
+
CI_TIMESTAMP = "CI_TIMESTAMP"
|
|
34
|
+
CI_USER = "runner"
|
|
35
|
+
DEPLOYING = "DEPLOYING"
|
|
36
|
+
GIT_COMMIT = "GIT_COMMIT"
|
|
37
|
+
GIT_IS_CLEAN = "GIT_IS_CLEAN"
|
|
38
|
+
GIT_IS_DIRTY = "GIT_IS_DIRTY"
|
|
39
|
+
GIT_BRANCH = "GIT_BRANCH"
|
|
40
|
+
MAIN = "main"
|
|
41
|
+
THDS_USER = "THDS_USER"
|
|
42
|
+
|
|
43
|
+
META_FILE = "meta.json"
|
|
44
|
+
|
|
45
|
+
LOGGER = getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def format_name(name: str, format: NameFormatType = "git") -> str:
|
|
49
|
+
if format == "git":
|
|
50
|
+
return name
|
|
51
|
+
elif format == "docker":
|
|
52
|
+
return re.sub(DOCKER_EXCLUSION_REGEX, DOCKER_SUB_CHARACTER, name)
|
|
53
|
+
elif format == "hive":
|
|
54
|
+
return re.sub(HIVE_EXCLUSION_REGEX, HIVE_SUB_CHARACTER, name).lower()
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"'{format}' is not a supported `format`. Supported formats: {ty.get_args(NameFormatType)}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@ty.overload
|
|
62
|
+
def get_timestamp() -> str:
|
|
63
|
+
... # pragma: no cover
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@ty.overload
|
|
67
|
+
def get_timestamp(as_datetime: ty.Literal[True]) -> datetime:
|
|
68
|
+
... # pragma: no cover
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@ty.overload
|
|
72
|
+
def get_timestamp(as_datetime: ty.Literal[False]) -> str:
|
|
73
|
+
... # pragma: no cover
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_timestamp(as_datetime: bool = False):
|
|
77
|
+
timestamp = datetime.now(timezone.utc)
|
|
78
|
+
return timestamp.strftime(TIMESTAMP_FORMAT) if not as_datetime else timestamp
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def make_calgitver() -> str:
|
|
82
|
+
"""An older version of calgitver that allows for non-determinstic datetimes
|
|
83
|
+
if the repo is dirty.
|
|
84
|
+
|
|
85
|
+
See calgitver.calgitver for docs on what this means.
|
|
86
|
+
"""
|
|
87
|
+
dirty = "" if is_clean() else "dirty"
|
|
88
|
+
if not dirty:
|
|
89
|
+
# we only attempt this 'determinstic' datetime if the repo is clean, because if
|
|
90
|
+
# it's not clean then this isn't deterministic anyway, and so we'd rather just
|
|
91
|
+
# have an up-to-date timestamp
|
|
92
|
+
try:
|
|
93
|
+
return calgitver.calgitver()
|
|
94
|
+
except git.NO_GIT:
|
|
95
|
+
pass
|
|
96
|
+
base_components = (
|
|
97
|
+
datetime.now(tz=timezone.utc).strftime(git.CALGITVER_NO_SECONDS_FORMAT),
|
|
98
|
+
get_commit()[: calgitver.SHORT_HASH],
|
|
99
|
+
)
|
|
100
|
+
return "-".join((*base_components, dirty)).rstrip("-")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def print_calgitver():
|
|
104
|
+
print(make_calgitver())
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@ty.overload
|
|
108
|
+
def extract_timestamp(version: str) -> str:
|
|
109
|
+
"""Returns timestamp in full YYYYMMDDHHMMSS format even if the input was a CalGitVer string with no seconds."""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@ty.overload
|
|
113
|
+
def extract_timestamp(version: str, as_datetime: ty.Literal[True]) -> datetime:
|
|
114
|
+
... # pragma: no cover
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@ty.overload
|
|
118
|
+
def extract_timestamp(version: str, as_datetime: ty.Literal[False]) -> str:
|
|
119
|
+
... # pragma: no cover
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def extract_timestamp(version: str, as_datetime: bool = False):
|
|
123
|
+
def to_result(dt: datetime):
|
|
124
|
+
return dt.replace(tzinfo=timezone.utc) if as_datetime else dt.strftime(TIMESTAMP_FORMAT)
|
|
125
|
+
|
|
126
|
+
# This is intended to be general-purpose and therefore a bit heuristic.
|
|
127
|
+
# We attempt to parse the version as CalGitVer first, since it is a
|
|
128
|
+
# narrow format. Failing that, we'll try SemCalVer.
|
|
129
|
+
if calgitver.parse_calgitver(version):
|
|
130
|
+
try:
|
|
131
|
+
return to_result(datetime.strptime(version[:13], git.CALGITVER_NO_SECONDS_FORMAT))
|
|
132
|
+
except ValueError:
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
version = re.sub(VERSION_EXCLUSION_REGEX, VERSION_SUB_CHARACTER, version)
|
|
136
|
+
version_ = version.split(".")
|
|
137
|
+
if len(version_) >= 3:
|
|
138
|
+
try:
|
|
139
|
+
return to_result(datetime.strptime(version_[2], TIMESTAMP_FORMAT))
|
|
140
|
+
except ValueError:
|
|
141
|
+
pass
|
|
142
|
+
|
|
143
|
+
raise ValueError(
|
|
144
|
+
f"`version`: {version} is not a timestamp-containing version string (SemCalVer or CalGitVer)."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def norm_name(pkg: str) -> str:
|
|
149
|
+
"""Apparently poetry creates slightly different dist-info
|
|
150
|
+
directories and METADATA files than p-i-p-e-n-v did.
|
|
151
|
+
"""
|
|
152
|
+
return pkg.replace(".", "_")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _get_pkg_root_filename(pkg: Package) -> str:
|
|
156
|
+
if not isinstance(pkg, str):
|
|
157
|
+
return pkg.__file__ or ""
|
|
158
|
+
try:
|
|
159
|
+
pkg_spec = importlib.util.find_spec(pkg) # type: ignore
|
|
160
|
+
return pkg_spec and pkg_spec.origin or ""
|
|
161
|
+
except ModuleNotFoundError:
|
|
162
|
+
return ""
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@lru_cache(None)
|
|
166
|
+
def get_version(pkg: Package, orig: str = "") -> str:
|
|
167
|
+
# first try direct lookup from the pyproject.toml, if one can be found,
|
|
168
|
+
# because poetry frequently has outdated info in the venv it creates.
|
|
169
|
+
pkg_root_file = _get_pkg_root_filename(pkg)
|
|
170
|
+
if pkg_root_file:
|
|
171
|
+
version_ = find_pyproject_toml_version(Path(pkg_root_file), str(pkg))
|
|
172
|
+
if version_:
|
|
173
|
+
return version_
|
|
174
|
+
try:
|
|
175
|
+
version_ = version(norm_name(str(pkg)))
|
|
176
|
+
except PackageNotFoundError:
|
|
177
|
+
try:
|
|
178
|
+
version_ = version(str(pkg))
|
|
179
|
+
except PackageNotFoundError:
|
|
180
|
+
# 'recurse' upward, assuming that the package name is overly-specified
|
|
181
|
+
pkg_ = pkg.split(".")
|
|
182
|
+
if len(pkg_) <= 1:
|
|
183
|
+
# Check to see if there's a
|
|
184
|
+
# meta.json file hanging around, and if so, see if it contains a pyproject_version.
|
|
185
|
+
metadata = read_metadata(orig or pkg)
|
|
186
|
+
if metadata and metadata.pyproject_version:
|
|
187
|
+
return metadata.pyproject_version
|
|
188
|
+
|
|
189
|
+
for env_var in ("CALGITVER", "GIT_COMMIT"):
|
|
190
|
+
env_var_version = os.getenv(env_var)
|
|
191
|
+
lvl = LOGGER.debug if env_var == "CALGITVER" else LOGGER.info
|
|
192
|
+
if env_var_version:
|
|
193
|
+
lvl(f"Using {env_var} {env_var_version} as fallback version for {orig or pkg}")
|
|
194
|
+
return env_var_version
|
|
195
|
+
|
|
196
|
+
LOGGER.warning("Could not find a version for `%s`. Package not found.", orig or pkg)
|
|
197
|
+
return ""
|
|
198
|
+
return get_version(".".join(pkg_[:-1]), orig or pkg)
|
|
199
|
+
|
|
200
|
+
return version_
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class NoBasePackageFromMain(ValueError):
|
|
204
|
+
"""
|
|
205
|
+
`get_base_package` needs a 'real' package or module name, not '__main__',
|
|
206
|
+
in order to discover a meaningful name for the package.
|
|
207
|
+
You may be using a dynamic library from within __main__ that calls get_base_package,
|
|
208
|
+
and inside __main__, Python doesn't let us do any nice introspection on a module's name.
|
|
209
|
+
So, please call that code from a module that isn't what was passed to `python -m` -
|
|
210
|
+
that is, split your code into a minimal __main__.py and a separate module.
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@lru_cache(None)
|
|
215
|
+
def get_base_package(pkg: Package) -> str:
|
|
216
|
+
try:
|
|
217
|
+
str_pkg = str(pkg)
|
|
218
|
+
if str_pkg == "__main__":
|
|
219
|
+
raise NoBasePackageFromMain(NoBasePackageFromMain.__doc__)
|
|
220
|
+
_ = version(norm_name(str_pkg))
|
|
221
|
+
except PackageNotFoundError:
|
|
222
|
+
try:
|
|
223
|
+
_ = version(str(pkg))
|
|
224
|
+
except PackageNotFoundError:
|
|
225
|
+
pkg_ = pkg.split(".")
|
|
226
|
+
if len(pkg_) <= 1:
|
|
227
|
+
LOGGER.warning("Could not find the base package for `%s`. Package not found.", pkg)
|
|
228
|
+
return ""
|
|
229
|
+
else:
|
|
230
|
+
return get_base_package(".".join(pkg_[:-1]))
|
|
231
|
+
|
|
232
|
+
return str(pkg)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def get_repo_name() -> str:
|
|
236
|
+
try:
|
|
237
|
+
return git.get_repo_name()
|
|
238
|
+
except git.NO_GIT:
|
|
239
|
+
LOGGER.debug("`get_repo_name` found no repo name.")
|
|
240
|
+
return ""
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def get_commit(pkg: Package = "") -> str: # should really be named get_commit_hash
|
|
244
|
+
if GIT_COMMIT in os.environ:
|
|
245
|
+
LOGGER.debug("`get_commit` reading from env var.")
|
|
246
|
+
return os.environ[GIT_COMMIT]
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
return git.get_commit_hash()
|
|
250
|
+
except git.NO_GIT:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
if pkg:
|
|
255
|
+
LOGGER.debug("`get_commit` reading from metadata.")
|
|
256
|
+
metadata = read_metadata(pkg)
|
|
257
|
+
if metadata.is_empty:
|
|
258
|
+
raise EmptyMetadataException
|
|
259
|
+
return metadata.git_commit
|
|
260
|
+
except EmptyMetadataException:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
LOGGER.warning("`get_commit` found no commit.")
|
|
264
|
+
return ""
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def is_clean(pkg: Package = "") -> bool:
|
|
268
|
+
if GIT_IS_CLEAN in os.environ:
|
|
269
|
+
LOGGER.debug("`is_clean` reading from env var.")
|
|
270
|
+
return bool(os.environ[GIT_IS_CLEAN])
|
|
271
|
+
|
|
272
|
+
if GIT_IS_DIRTY in os.environ:
|
|
273
|
+
# compatibility with docker-tools/build_push
|
|
274
|
+
LOGGER.debug("`is_clean` reading from env var.")
|
|
275
|
+
return not bool(os.getenv(GIT_IS_DIRTY))
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
return git.is_clean()
|
|
279
|
+
except git.NO_GIT:
|
|
280
|
+
pass
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
if pkg:
|
|
284
|
+
LOGGER.debug("`is_clean` reading from metadata.")
|
|
285
|
+
metadata = read_metadata(pkg)
|
|
286
|
+
if metadata.is_empty:
|
|
287
|
+
raise EmptyMetadataException
|
|
288
|
+
return metadata.git_is_clean
|
|
289
|
+
except EmptyMetadataException:
|
|
290
|
+
pass
|
|
291
|
+
|
|
292
|
+
LOGGER.warning("`is_clean` found no cleanliness - assume dirty.")
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_branch(pkg: Package = "", format: NameFormatType = "git") -> str:
|
|
297
|
+
def _get_branch(pkg: Package = "") -> str:
|
|
298
|
+
if GIT_BRANCH in os.environ:
|
|
299
|
+
LOGGER.debug("`get_branch` reading from env var.")
|
|
300
|
+
return os.environ[GIT_BRANCH]
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
return git.get_branch()
|
|
304
|
+
except git.NO_GIT:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
try:
|
|
308
|
+
if pkg:
|
|
309
|
+
LOGGER.debug("`get_branch` reading from metadata.")
|
|
310
|
+
metadata = read_metadata(pkg)
|
|
311
|
+
if not metadata.git_branch:
|
|
312
|
+
raise EmptyMetadataException
|
|
313
|
+
return metadata.git_branch
|
|
314
|
+
except EmptyMetadataException:
|
|
315
|
+
pass
|
|
316
|
+
|
|
317
|
+
LOGGER.warning("`get_branch` found no branch.")
|
|
318
|
+
return ""
|
|
319
|
+
|
|
320
|
+
return format_name(_get_branch(pkg), format)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_user(pkg: Package = "", format: NameFormatType = "git") -> str:
|
|
324
|
+
def _get_user(pkg: Package = "") -> str:
|
|
325
|
+
if THDS_USER in os.environ:
|
|
326
|
+
LOGGER.debug("`get_user` reading from env var.")
|
|
327
|
+
return os.environ[THDS_USER]
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
if pkg:
|
|
331
|
+
LOGGER.debug("`get_user` reading from metadata.")
|
|
332
|
+
metadata = read_metadata(pkg)
|
|
333
|
+
if not metadata.thds_user:
|
|
334
|
+
raise EmptyMetadataException
|
|
335
|
+
return metadata.thds_user
|
|
336
|
+
except EmptyMetadataException:
|
|
337
|
+
pass
|
|
338
|
+
|
|
339
|
+
LOGGER.debug("`get_user` found no user data - getting system user.")
|
|
340
|
+
return getuser()
|
|
341
|
+
|
|
342
|
+
return format_name(_get_user(pkg), format)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def is_deployed(pkg: Package) -> bool:
|
|
346
|
+
meta = read_metadata(pkg)
|
|
347
|
+
return not meta.is_empty
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _hacky_get_pyproject_toml_version(pkg: Package, wdir: Path) -> str:
|
|
351
|
+
# it will be a good day when Python packages a toml reader by default.
|
|
352
|
+
ppt = wdir / "pyproject.toml"
|
|
353
|
+
if ppt.exists():
|
|
354
|
+
with open(ppt) as f:
|
|
355
|
+
toml = f.read()
|
|
356
|
+
# check name for sanity - we don't want to pull a version
|
|
357
|
+
# out of, say, the root project when that doesn't match our project name.
|
|
358
|
+
# TODO: extract name and version more nicely.
|
|
359
|
+
# TODO: normalize the name here more robustly.
|
|
360
|
+
if not re.search(rf"name\s*=\s*[\"']({pkg.replace('_', '-')})[\"']", toml):
|
|
361
|
+
LOGGER.warning(f"The package name in pyproject.toml does not match the package name ({pkg})")
|
|
362
|
+
for line in toml.splitlines():
|
|
363
|
+
if m := re.match(r"version\s*=\s*[\"'](?P<version>[a-zA-Z0-9.]+)[\"']", line):
|
|
364
|
+
return m.group("version")
|
|
365
|
+
return ""
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def find_pyproject_toml_version(starting_path: Path, pkg: Package) -> str:
|
|
369
|
+
"""A way of looking to see if there's a pyproject.toml that defines our package's
|
|
370
|
+
version. Only really useful in a monorepo context.
|
|
371
|
+
"""
|
|
372
|
+
while starting_path != starting_path.parent:
|
|
373
|
+
directory = starting_path.parent
|
|
374
|
+
ppt = directory / "pyproject.toml"
|
|
375
|
+
if ppt.exists():
|
|
376
|
+
# the first one we find is the only one we'll try.
|
|
377
|
+
# anything above that can't possibly be the appropriate
|
|
378
|
+
# pyproject.toml.
|
|
379
|
+
try:
|
|
380
|
+
return _hacky_get_pyproject_toml_version(pkg, directory)
|
|
381
|
+
except ValueError as ve:
|
|
382
|
+
LOGGER.info(str(ve))
|
|
383
|
+
return ""
|
|
384
|
+
starting_path = directory
|
|
385
|
+
|
|
386
|
+
return ""
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
MiscType = ty.Mapping[str, ty.Union[str, int, float, bool]]
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
@attrs.frozen
|
|
393
|
+
class Metadata:
|
|
394
|
+
git_commit: str = ""
|
|
395
|
+
git_branch: str = ""
|
|
396
|
+
git_is_clean: bool = False
|
|
397
|
+
pyproject_version: str = "" # only present if the project defines `version` inside pyproject.toml
|
|
398
|
+
thds_user: str = ""
|
|
399
|
+
misc: MiscType = attrs.field(factory=lambda: MappingProxyType(dict()))
|
|
400
|
+
|
|
401
|
+
@property
|
|
402
|
+
def docker_branch(self) -> str:
|
|
403
|
+
return format_name(self.git_branch, "docker")
|
|
404
|
+
|
|
405
|
+
@property
|
|
406
|
+
def hive_branch(self) -> str:
|
|
407
|
+
return format_name(self.git_branch, "hive")
|
|
408
|
+
|
|
409
|
+
@property
|
|
410
|
+
def docker_user(self) -> str:
|
|
411
|
+
return format_name(self.thds_user, "docker")
|
|
412
|
+
|
|
413
|
+
@property
|
|
414
|
+
def hive_user(self) -> str:
|
|
415
|
+
return format_name(self.thds_user, "hive")
|
|
416
|
+
|
|
417
|
+
@property
|
|
418
|
+
def is_empty(self) -> bool:
|
|
419
|
+
return all(not getattr(self, field.name) for field in attrs.fields(Metadata))
|
|
420
|
+
|
|
421
|
+
@property
|
|
422
|
+
def git_is_dirty(self) -> bool:
|
|
423
|
+
return not self.git_is_clean
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
meta_converter = Converter(forbid_extra_keys=True)
|
|
427
|
+
meta_converter.register_structure_hook(
|
|
428
|
+
Metadata, lambda v, _: Metadata(misc=MappingProxyType(v.pop("misc", {})), **v)
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class EmptyMetadataException(Exception):
|
|
433
|
+
pass
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def init_metadata(misc: ty.Optional[MiscType] = None, pyproject_toml_version: str = "") -> Metadata:
|
|
437
|
+
return Metadata(
|
|
438
|
+
git_commit=get_commit(),
|
|
439
|
+
git_branch=get_branch(),
|
|
440
|
+
git_is_clean=is_clean(),
|
|
441
|
+
pyproject_version=pyproject_toml_version,
|
|
442
|
+
thds_user=os.getenv(THDS_USER, getuser()),
|
|
443
|
+
misc=MappingProxyType(misc) if misc else MappingProxyType(dict()),
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _sanitize_metadata_for_docker_tools(d: dict):
|
|
448
|
+
"""We want our Docker builds to be able to take advantage of
|
|
449
|
+
caching based on the contents of the sources copied over into
|
|
450
|
+
them. If we embed a meta.json into each library where the commit
|
|
451
|
+
hash changes every time a commit happens, then we've blown away
|
|
452
|
+
our entire cache.
|
|
453
|
+
|
|
454
|
+
The Docker builds already inject this metadata as environment
|
|
455
|
+
variables after the source copies happen, so there's no need for
|
|
456
|
+
us to embed it this way.
|
|
457
|
+
"""
|
|
458
|
+
d["git_commit"] = ""
|
|
459
|
+
d["git_branch"] = ""
|
|
460
|
+
d["git_is_clean"] = ""
|
|
461
|
+
d["thds_user"] = THDS_USER
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def write_metadata(
|
|
465
|
+
pkg: str,
|
|
466
|
+
*,
|
|
467
|
+
misc: ty.Optional[MiscType] = None,
|
|
468
|
+
namespace: str = "thds",
|
|
469
|
+
layout: LayoutType = "src",
|
|
470
|
+
wdir: ty.Optional[StrOrPath] = None,
|
|
471
|
+
deploying: bool = False,
|
|
472
|
+
for_docker_tools_build: bool = False,
|
|
473
|
+
) -> None:
|
|
474
|
+
wdir_ = Path(wdir) if wdir else Path(".")
|
|
475
|
+
assert wdir_
|
|
476
|
+
if os.getenv(DEPLOYING) or deploying:
|
|
477
|
+
LOGGER.debug("Writing metadata.")
|
|
478
|
+
metadata = init_metadata(
|
|
479
|
+
misc=misc, pyproject_toml_version=_hacky_get_pyproject_toml_version(pkg, wdir_)
|
|
480
|
+
)
|
|
481
|
+
metadata_path = os.path.join(
|
|
482
|
+
"src" if layout == "src" else "",
|
|
483
|
+
namespace.replace("-", "/").replace(".", "/"),
|
|
484
|
+
pkg.replace("-", "_").replace(".", "/"),
|
|
485
|
+
META_FILE,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
LOGGER.info(f"Writing metadata for {pkg} to {wdir_ / metadata_path}")
|
|
489
|
+
with open(wdir_ / metadata_path, "w") as f:
|
|
490
|
+
metadata_dict = meta_converter.unstructure(metadata)
|
|
491
|
+
if for_docker_tools_build:
|
|
492
|
+
_sanitize_metadata_for_docker_tools(metadata_dict)
|
|
493
|
+
json.dump(metadata_dict, f, indent=2)
|
|
494
|
+
f.write("\n") # Add newline because Py JSON does not
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
@lru_cache(None)
|
|
498
|
+
def read_metadata(pkg: Package) -> Metadata:
|
|
499
|
+
LOGGER.debug("Reading metadata.")
|
|
500
|
+
|
|
501
|
+
if pkg == "__main__":
|
|
502
|
+
raise ValueError("`read_meta` expects a package or module name, not '__main__'.")
|
|
503
|
+
|
|
504
|
+
if not pkg:
|
|
505
|
+
raise ValueError(
|
|
506
|
+
"`read_meta` is missing a package or module name. "
|
|
507
|
+
"If using `__package__` make sure an __init__.py is present."
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
with open_text(pkg, META_FILE) as f:
|
|
512
|
+
return meta_converter.structure(json.load(f), Metadata)
|
|
513
|
+
# pkg=__name__ will raise a TypeError unless it is called in an __init__.py
|
|
514
|
+
except (ModuleNotFoundError, FileNotFoundError, TypeError):
|
|
515
|
+
pkg_ = pkg.split(".")
|
|
516
|
+
if len(pkg_) <= 1:
|
|
517
|
+
return Metadata()
|
|
518
|
+
return read_metadata(".".join(pkg_[:-1]))
|