thds.tabularasa 0.13.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thds/tabularasa/__main__.py +5 -3
- thds/tabularasa/data_dependencies/adls.py +2 -0
- thds/tabularasa/testing/__init__.py +5 -0
- thds/tabularasa/testing/mock_sqlite.py +114 -0
- {thds_tabularasa-0.13.0.dist-info → thds_tabularasa-0.14.0.dist-info}/METADATA +1 -1
- {thds_tabularasa-0.13.0.dist-info → thds_tabularasa-0.14.0.dist-info}/RECORD +9 -7
- {thds_tabularasa-0.13.0.dist-info → thds_tabularasa-0.14.0.dist-info}/WHEEL +0 -0
- {thds_tabularasa-0.13.0.dist-info → thds_tabularasa-0.14.0.dist-info}/entry_points.txt +0 -0
- {thds_tabularasa-0.13.0.dist-info → thds_tabularasa-0.14.0.dist-info}/top_level.txt +0 -0
thds/tabularasa/__main__.py
CHANGED
|
@@ -943,16 +943,18 @@ class ReferenceDataManager:
|
|
|
943
943
|
return None
|
|
944
944
|
raise IOError(table.name)
|
|
945
945
|
|
|
946
|
-
failed:
|
|
946
|
+
failed: list[tuple[str, Exception]] = []
|
|
947
947
|
synced: List[str] = []
|
|
948
948
|
for table_name, res in parallel.yield_all([(t.name, partial(inner, t)) for t in tables_to_sync]):
|
|
949
949
|
if isinstance(res, parallel.Error):
|
|
950
|
-
failed.append(table_name)
|
|
950
|
+
failed.append((table_name, res.error))
|
|
951
951
|
elif res is not None:
|
|
952
952
|
synced.append(table_name)
|
|
953
953
|
|
|
954
954
|
if failed:
|
|
955
|
-
|
|
955
|
+
first_exc = failed[0][1]
|
|
956
|
+
table_names = [name for name, _ in failed]
|
|
957
|
+
raise RuntimeError(f"Sync failed for tables {', '.join(table_names)}") from first_exc
|
|
956
958
|
|
|
957
959
|
down_ = (
|
|
958
960
|
f"to local build directory {pkg_resources.resource_filename(self.package, self.package_data_dir)}"
|
|
@@ -6,6 +6,7 @@ from typing import List, Optional, Protocol, Union
|
|
|
6
6
|
import attr
|
|
7
7
|
|
|
8
8
|
from thds.adls import ADLSFileSystem, fqn
|
|
9
|
+
from thds.core import fretry
|
|
9
10
|
from thds.tabularasa.schema.files import ADLSDataSpec, RemoteBlobStoreSpec
|
|
10
11
|
|
|
11
12
|
CACHE_DIR = ".cache/"
|
|
@@ -40,6 +41,7 @@ def adls_filesystem(account: str, filesystem: str, cache_dir: Optional[Union[Pat
|
|
|
40
41
|
return ADLSFileSystem(account, filesystem, cache_dir=cache_dir)
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
@fretry.retry_regular(fretry.is_exc(Exception), fretry.n_times(3))
|
|
43
45
|
def sync_adls_data(
|
|
44
46
|
adls_spec: ADLSDataSpec, cache_dir: Optional[Union[Path, str]] = CACHE_DIR
|
|
45
47
|
) -> List[ADLSDownloadResult]:
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import inspect
|
|
3
|
+
import sqlite3
|
|
4
|
+
import tempfile
|
|
5
|
+
import typing as ty
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import attrs
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
import pyarrow.parquet
|
|
11
|
+
|
|
12
|
+
from thds.core import scope
|
|
13
|
+
from thds.core.types import StrOrPath
|
|
14
|
+
from thds.tabularasa.data_dependencies import sqlite, util
|
|
15
|
+
from thds.tabularasa.schema import load_schema
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _GeneratedSqliteLoader(ty.Protocol):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
package: ty.Optional[str],
|
|
22
|
+
db_path: str,
|
|
23
|
+
) -> None: ...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_UNTIL_EXIT_SCOPE = scope.Scope("tabularasa.testing.mock_sqlite_loader")
|
|
27
|
+
# this scope is for creating temporary sqlite database files that persist until program exit, in case the caller of
|
|
28
|
+
# mock_sqlite_loader doesn't want to manage the database file themselves
|
|
29
|
+
|
|
30
|
+
L = ty.TypeVar("L", bound=_GeneratedSqliteLoader)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def mock_sqlite_loader(
|
|
34
|
+
loader_cls: ty.Type[L],
|
|
35
|
+
data: ty.Mapping[str, ty.Collection[attrs.AttrsInstance]],
|
|
36
|
+
tmp_db_path: ty.Optional[StrOrPath] = None,
|
|
37
|
+
*,
|
|
38
|
+
package: ty.Optional[str] = None,
|
|
39
|
+
schema_path: str = "schema.yaml",
|
|
40
|
+
validate: bool = False,
|
|
41
|
+
) -> L:
|
|
42
|
+
"""Construct an instance of your custom generated sqlite loader from mocked data. Note that this is guaranteed
|
|
43
|
+
typesafe because regardless of how you define your mock records, the resulting sqlite loader will be a true instance
|
|
44
|
+
of your generated loader class, and will have all the same lookup methods and will use all the same deserialization
|
|
45
|
+
logic for reading rows from the database and returning actual instances from your library's data model.
|
|
46
|
+
|
|
47
|
+
:param loader_cls: The generated sqlite loader class to instantiate.
|
|
48
|
+
:param data: A mapping from table names to collections of attrs records representing rows.
|
|
49
|
+
:param package: The root package name containing the schema and generated loader(s). If omitted, it will be inferred
|
|
50
|
+
from the loader class's `__module__` attribute by climbing up until a schema file is found.
|
|
51
|
+
:param schema_path: The path to the schema file within the package.
|
|
52
|
+
:param tmp_db_path: Optional path to a file to use for the sqlite database. If None, a temporary file is created.
|
|
53
|
+
Note that in this case the temporary file will not be cleaned up until program exit.
|
|
54
|
+
:param validate: Whether to validate data against the schema when inserting data into the database.
|
|
55
|
+
:return: An instance of the specified sqlite loader class populated with the provided mocked data, with empty
|
|
56
|
+
tables for any table names that were not included in the `data` mapping.
|
|
57
|
+
"""
|
|
58
|
+
if package is None:
|
|
59
|
+
if package_ := inspect.signature(loader_cls).parameters["package"].default:
|
|
60
|
+
package_candidates = [package_]
|
|
61
|
+
else:
|
|
62
|
+
loader_module_path = loader_cls.__module__.split(".")
|
|
63
|
+
package_candidates = [
|
|
64
|
+
".".join(loader_module_path[:i]) for i in range(len(loader_module_path), 0, -1)
|
|
65
|
+
]
|
|
66
|
+
else:
|
|
67
|
+
package_candidates = [package]
|
|
68
|
+
|
|
69
|
+
for package_ in package_candidates:
|
|
70
|
+
try:
|
|
71
|
+
schema = load_schema(package_, schema_path)
|
|
72
|
+
except (ModuleNotFoundError, FileNotFoundError):
|
|
73
|
+
continue
|
|
74
|
+
else:
|
|
75
|
+
break
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError(
|
|
78
|
+
f"Could not infer package containing schema from loader class {loader_cls.__qualname__}; "
|
|
79
|
+
"please specify the 'package' argument explicitly."
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if tmp_db_path is None:
|
|
83
|
+
tmp_db_path = _UNTIL_EXIT_SCOPE.enter(tempfile.NamedTemporaryFile(suffix=".sqlite")).name
|
|
84
|
+
|
|
85
|
+
unknown_tables = set(data.keys()).difference(schema.tables.keys())
|
|
86
|
+
if unknown_tables:
|
|
87
|
+
raise ValueError(f"Data provided for unknown tables: {sorted(unknown_tables)}")
|
|
88
|
+
|
|
89
|
+
with (
|
|
90
|
+
tempfile.TemporaryDirectory() as tmpdir,
|
|
91
|
+
contextlib.closing(sqlite3.connect(str(tmp_db_path))) as con,
|
|
92
|
+
):
|
|
93
|
+
# this tmpdir is only for staging parquet files before loading into sqlite; it's fine that they get deleted
|
|
94
|
+
# immediately after the database is populated
|
|
95
|
+
for name, table in schema.tables.items():
|
|
96
|
+
rows = data.get(name, [])
|
|
97
|
+
pa_table = pa.Table.from_pylist(
|
|
98
|
+
[attrs.asdict(row, recurse=True) for row in rows], schema=table.parquet_schema
|
|
99
|
+
)
|
|
100
|
+
filename = name + ".parquet"
|
|
101
|
+
pyarrow.parquet.write_table(
|
|
102
|
+
pa_table, Path(tmpdir) / filename, version=util.PARQUET_FORMAT_VERSION
|
|
103
|
+
)
|
|
104
|
+
sqlite.insert_table(
|
|
105
|
+
con,
|
|
106
|
+
table,
|
|
107
|
+
package=None,
|
|
108
|
+
data_dir=tmpdir,
|
|
109
|
+
filename=filename,
|
|
110
|
+
validate=validate,
|
|
111
|
+
cast=False if validate else True,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return loader_cls(package=None, db_path=str(tmp_db_path))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.tabularasa
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.0
|
|
4
4
|
Summary: Trilliant Health reference data build system.
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
thds/tabularasa/__init__.py,sha256=jc6w1WD868MQ2t4wkRNYvRssojwXvPDcNyC8V5gwbl0,169
|
|
2
|
-
thds/tabularasa/__main__.py,sha256=
|
|
2
|
+
thds/tabularasa/__main__.py,sha256=Ryfd7YogTE_qFjp8IJA-KTeTXXD9INS5GJGmdPVvWBw,47791
|
|
3
3
|
thds/tabularasa/compat.py,sha256=j0313TPIXtkbfvRI0AH4if8GLrjQSrDJ9heayCIl9w8,1037
|
|
4
4
|
thds/tabularasa/git_util.py,sha256=fBFhaCPi_5W2BpG2B3WiPcAWJvuVI_pG47rt73wLO6E,1388
|
|
5
5
|
thds/tabularasa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -7,7 +7,7 @@ thds/tabularasa/sqlite3_compat.py,sha256=67hldmiLFTjG0qL2TPX0USV7XNfTjEN3j8MneqN
|
|
|
7
7
|
thds/tabularasa/sqlite_from_parquet.py,sha256=yJatUIAbgErHUOL5dhchWJwzKZCrDrx93SP0nGm7It8,1115
|
|
8
8
|
thds/tabularasa/to_sqlite.py,sha256=5lcEUh38MNebxAJdLp2XGWOP_WQDIADtL1fyhOvi9UU,1715
|
|
9
9
|
thds/tabularasa/data_dependencies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
thds/tabularasa/data_dependencies/adls.py,sha256=
|
|
10
|
+
thds/tabularasa/data_dependencies/adls.py,sha256=vJAuc5Key-vO1N6DGo5dj9fIx_4hMALAVC17qhvkT7Y,3257
|
|
11
11
|
thds/tabularasa/data_dependencies/build.py,sha256=6iYgw93sOF2Nlnb6WSmA9NvPNwOf_Yyi2wXUQpRVkJM,23382
|
|
12
12
|
thds/tabularasa/data_dependencies/sqlite.py,sha256=eweuLdoxyGlG-PvQUANarlMe_mmZgA5cxuMbOYxcpsQ,12576
|
|
13
13
|
thds/tabularasa/data_dependencies/tabular.py,sha256=oq9wFse235ikLEv8Zvol59ptRRojZbkbzXJyQeFfC9o,6529
|
|
@@ -39,8 +39,10 @@ thds/tabularasa/schema/compilation/pyarrow.py,sha256=pcNQ3a6UPJT1PBj6xHOl99UvZft
|
|
|
39
39
|
thds/tabularasa/schema/compilation/sphinx.py,sha256=we5X-ZpCk6WH-8KCXAv6Nklg1JZmnkGPT3V2EHa2_rg,17491
|
|
40
40
|
thds/tabularasa/schema/compilation/sqlite.py,sha256=wSrSlVCYeuTpOf9AOHAnp6gJHkjHZhx8UkgkYgfoQVw,2368
|
|
41
41
|
thds/tabularasa/schema/compilation/util.py,sha256=YXFe1_yoBobED010hstKIoq-dwLHo6SBv1v1IAw6AYU,3886
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
thds_tabularasa-0.
|
|
45
|
-
thds_tabularasa-0.
|
|
46
|
-
thds_tabularasa-0.
|
|
42
|
+
thds/tabularasa/testing/__init__.py,sha256=XoLzB-DotxFw9KHt2vfH72k7pyAAFI2bW-qqq6nww1g,85
|
|
43
|
+
thds/tabularasa/testing/mock_sqlite.py,sha256=xoV4w_GaDgtZf17iUux2-LA6Va1XRJdC2FU34dysh0o,4769
|
|
44
|
+
thds_tabularasa-0.14.0.dist-info/METADATA,sha256=fzbOzf8zgv-IBEcUN_6stkhIkhccgbrmJBi_jbGdkS4,26786
|
|
45
|
+
thds_tabularasa-0.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
46
|
+
thds_tabularasa-0.14.0.dist-info/entry_points.txt,sha256=PX4ShRonjv6lMsVjrGu8RkFzpyyvgM9EnZlNfMomd9k,61
|
|
47
|
+
thds_tabularasa-0.14.0.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
48
|
+
thds_tabularasa-0.14.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|