thds.tabularasa 0.13.1__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -943,16 +943,18 @@ class ReferenceDataManager:
943
943
  return None
944
944
  raise IOError(table.name)
945
945
 
946
- failed: List[str] = []
946
+ failed: list[tuple[str, Exception]] = []
947
947
  synced: List[str] = []
948
948
  for table_name, res in parallel.yield_all([(t.name, partial(inner, t)) for t in tables_to_sync]):
949
949
  if isinstance(res, parallel.Error):
950
- failed.append(table_name)
950
+ failed.append((table_name, res.error))
951
951
  elif res is not None:
952
952
  synced.append(table_name)
953
953
 
954
954
  if failed:
955
- raise RuntimeError(f"Sync failed for tables {', '.join(failed)}")
955
+ first_exc = failed[0][1]
956
+ table_names = [name for name, _ in failed]
957
+ raise RuntimeError(f"Sync failed for tables {', '.join(table_names)}") from first_exc
956
958
 
957
959
  down_ = (
958
960
  f"to local build directory {pkg_resources.resource_filename(self.package, self.package_data_dir)}"
@@ -6,6 +6,7 @@ from typing import List, Optional, Protocol, Union
6
6
  import attr
7
7
 
8
8
  from thds.adls import ADLSFileSystem, fqn
9
+ from thds.core import fretry
9
10
  from thds.tabularasa.schema.files import ADLSDataSpec, RemoteBlobStoreSpec
10
11
 
11
12
  CACHE_DIR = ".cache/"
@@ -40,6 +41,7 @@ def adls_filesystem(account: str, filesystem: str, cache_dir: Optional[Union[Pat
40
41
  return ADLSFileSystem(account, filesystem, cache_dir=cache_dir)
41
42
 
42
43
 
44
+ @fretry.retry_regular(fretry.is_exc(Exception), fretry.n_times(3))
43
45
  def sync_adls_data(
44
46
  adls_spec: ADLSDataSpec, cache_dir: Optional[Union[Path, str]] = CACHE_DIR
45
47
  ) -> List[ADLSDownloadResult]:
@@ -0,0 +1,5 @@
1
+ __all__ = [
2
+ "mock_sqlite_loader",
3
+ ]
4
+
5
+ from .mock_sqlite import mock_sqlite_loader
@@ -0,0 +1,114 @@
1
+ import contextlib
2
+ import inspect
3
+ import sqlite3
4
+ import tempfile
5
+ import typing as ty
6
+ from pathlib import Path
7
+
8
+ import attrs
9
+ import pyarrow as pa
10
+ import pyarrow.parquet
11
+
12
+ from thds.core import scope
13
+ from thds.core.types import StrOrPath
14
+ from thds.tabularasa.data_dependencies import sqlite, util
15
+ from thds.tabularasa.schema import load_schema
16
+
17
+
18
+ class _GeneratedSqliteLoader(ty.Protocol):
19
+ def __init__(
20
+ self,
21
+ package: ty.Optional[str],
22
+ db_path: str,
23
+ ) -> None: ...
24
+
25
+
26
+ _UNTIL_EXIT_SCOPE = scope.Scope("tabularasa.testing.mock_sqlite_loader")
27
+ # this scope is for creating temporary sqlite database files that persist until program exit, in case the caller of
28
+ # mock_sqlite_loader doesn't want to manage the database file themselves
29
+
30
+ L = ty.TypeVar("L", bound=_GeneratedSqliteLoader)
31
+
32
+
33
+ def mock_sqlite_loader(
34
+ loader_cls: ty.Type[L],
35
+ data: ty.Mapping[str, ty.Collection[attrs.AttrsInstance]],
36
+ tmp_db_path: ty.Optional[StrOrPath] = None,
37
+ *,
38
+ package: ty.Optional[str] = None,
39
+ schema_path: str = "schema.yaml",
40
+ validate: bool = False,
41
+ ) -> L:
42
+ """Construct an instance of your custom generated sqlite loader from mocked data. Note that this is guaranteed
43
+ typesafe because regardless of how you define your mock records, the resulting sqlite loader will be a true instance
44
+ of your generated loader class, and will have all the same lookup methods and will use all the same deserialization
45
+ logic for reading rows from the database and returning actual instances from your library's data model.
46
+
47
+ :param loader_cls: The generated sqlite loader class to instantiate.
48
+ :param data: A mapping from table names to collections of attrs records representing rows.
49
+ :param package: The root package name containing the schema and generated loader(s). If omitted, it will be inferred
50
+ from the loader class's `__module__` attribute by climbing up until a schema file is found.
51
+ :param schema_path: The path to the schema file within the package.
52
+ :param tmp_db_path: Optional path to a file to use for the sqlite database. If None, a temporary file is created.
53
+ Note that in this case the temporary file will not be cleaned up until program exit.
54
+ :param validate: Whether to validate data against the schema when inserting data into the database.
55
+ :return: An instance of the specified sqlite loader class populated with the provided mocked data, with empty
56
+ tables for any table names that were not included in the `data` mapping.
57
+ """
58
+ if package is None:
59
+ if package_ := inspect.signature(loader_cls).parameters["package"].default:
60
+ package_candidates = [package_]
61
+ else:
62
+ loader_module_path = loader_cls.__module__.split(".")
63
+ package_candidates = [
64
+ ".".join(loader_module_path[:i]) for i in range(len(loader_module_path), 0, -1)
65
+ ]
66
+ else:
67
+ package_candidates = [package]
68
+
69
+ for package_ in package_candidates:
70
+ try:
71
+ schema = load_schema(package_, schema_path)
72
+ except (ModuleNotFoundError, FileNotFoundError):
73
+ continue
74
+ else:
75
+ break
76
+ else:
77
+ raise ValueError(
78
+ f"Could not infer package containing schema from loader class {loader_cls.__qualname__}; "
79
+ "please specify the 'package' argument explicitly."
80
+ )
81
+
82
+ if tmp_db_path is None:
83
+ tmp_db_path = _UNTIL_EXIT_SCOPE.enter(tempfile.NamedTemporaryFile(suffix=".sqlite")).name
84
+
85
+ unknown_tables = set(data.keys()).difference(schema.tables.keys())
86
+ if unknown_tables:
87
+ raise ValueError(f"Data provided for unknown tables: {sorted(unknown_tables)}")
88
+
89
+ with (
90
+ tempfile.TemporaryDirectory() as tmpdir,
91
+ contextlib.closing(sqlite3.connect(str(tmp_db_path))) as con,
92
+ ):
93
+ # this tmpdir is only for staging parquet files before loading into sqlite; it's fine that they get deleted
94
+ # immediately after the database is populated
95
+ for name, table in schema.tables.items():
96
+ rows = data.get(name, [])
97
+ pa_table = pa.Table.from_pylist(
98
+ [attrs.asdict(row, recurse=True) for row in rows], schema=table.parquet_schema
99
+ )
100
+ filename = name + ".parquet"
101
+ pyarrow.parquet.write_table(
102
+ pa_table, Path(tmpdir) / filename, version=util.PARQUET_FORMAT_VERSION
103
+ )
104
+ sqlite.insert_table(
105
+ con,
106
+ table,
107
+ package=None,
108
+ data_dir=tmpdir,
109
+ filename=filename,
110
+ validate=validate,
111
+ cast=False if validate else True,
112
+ )
113
+
114
+ return loader_cls(package=None, db_path=str(tmp_db_path))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: thds.tabularasa
3
- Version: 0.13.1
3
+ Version: 0.14.0
4
4
  Summary: Trilliant Health reference data build system.
5
5
  Author-email: Trilliant Health <info@trillianthealth.com>
6
6
  Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
@@ -1,5 +1,5 @@
1
1
  thds/tabularasa/__init__.py,sha256=jc6w1WD868MQ2t4wkRNYvRssojwXvPDcNyC8V5gwbl0,169
2
- thds/tabularasa/__main__.py,sha256=w10WQRwQmer4Hn3JmgHqjtVJ2WEjs9MtTiDvsAS9gog,47648
2
+ thds/tabularasa/__main__.py,sha256=Ryfd7YogTE_qFjp8IJA-KTeTXXD9INS5GJGmdPVvWBw,47791
3
3
  thds/tabularasa/compat.py,sha256=j0313TPIXtkbfvRI0AH4if8GLrjQSrDJ9heayCIl9w8,1037
4
4
  thds/tabularasa/git_util.py,sha256=fBFhaCPi_5W2BpG2B3WiPcAWJvuVI_pG47rt73wLO6E,1388
5
5
  thds/tabularasa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -7,7 +7,7 @@ thds/tabularasa/sqlite3_compat.py,sha256=67hldmiLFTjG0qL2TPX0USV7XNfTjEN3j8MneqN
7
7
  thds/tabularasa/sqlite_from_parquet.py,sha256=yJatUIAbgErHUOL5dhchWJwzKZCrDrx93SP0nGm7It8,1115
8
8
  thds/tabularasa/to_sqlite.py,sha256=5lcEUh38MNebxAJdLp2XGWOP_WQDIADtL1fyhOvi9UU,1715
9
9
  thds/tabularasa/data_dependencies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- thds/tabularasa/data_dependencies/adls.py,sha256=smL-iRYr9aXFFpW4inQBQmB-ieBKXcBVe9AdwV2zisw,3161
10
+ thds/tabularasa/data_dependencies/adls.py,sha256=vJAuc5Key-vO1N6DGo5dj9fIx_4hMALAVC17qhvkT7Y,3257
11
11
  thds/tabularasa/data_dependencies/build.py,sha256=6iYgw93sOF2Nlnb6WSmA9NvPNwOf_Yyi2wXUQpRVkJM,23382
12
12
  thds/tabularasa/data_dependencies/sqlite.py,sha256=eweuLdoxyGlG-PvQUANarlMe_mmZgA5cxuMbOYxcpsQ,12576
13
13
  thds/tabularasa/data_dependencies/tabular.py,sha256=oq9wFse235ikLEv8Zvol59ptRRojZbkbzXJyQeFfC9o,6529
@@ -39,8 +39,10 @@ thds/tabularasa/schema/compilation/pyarrow.py,sha256=pcNQ3a6UPJT1PBj6xHOl99UvZft
39
39
  thds/tabularasa/schema/compilation/sphinx.py,sha256=we5X-ZpCk6WH-8KCXAv6Nklg1JZmnkGPT3V2EHa2_rg,17491
40
40
  thds/tabularasa/schema/compilation/sqlite.py,sha256=wSrSlVCYeuTpOf9AOHAnp6gJHkjHZhx8UkgkYgfoQVw,2368
41
41
  thds/tabularasa/schema/compilation/util.py,sha256=YXFe1_yoBobED010hstKIoq-dwLHo6SBv1v1IAw6AYU,3886
42
- thds_tabularasa-0.13.1.dist-info/METADATA,sha256=flLUSZeccW-NUJgBBPwUrBeB94h1-TsjwdIUaNkdB8c,26786
43
- thds_tabularasa-0.13.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- thds_tabularasa-0.13.1.dist-info/entry_points.txt,sha256=PX4ShRonjv6lMsVjrGu8RkFzpyyvgM9EnZlNfMomd9k,61
45
- thds_tabularasa-0.13.1.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
46
- thds_tabularasa-0.13.1.dist-info/RECORD,,
42
+ thds/tabularasa/testing/__init__.py,sha256=XoLzB-DotxFw9KHt2vfH72k7pyAAFI2bW-qqq6nww1g,85
43
+ thds/tabularasa/testing/mock_sqlite.py,sha256=xoV4w_GaDgtZf17iUux2-LA6Va1XRJdC2FU34dysh0o,4769
44
+ thds_tabularasa-0.14.0.dist-info/METADATA,sha256=fzbOzf8zgv-IBEcUN_6stkhIkhccgbrmJBi_jbGdkS4,26786
45
+ thds_tabularasa-0.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
+ thds_tabularasa-0.14.0.dist-info/entry_points.txt,sha256=PX4ShRonjv6lMsVjrGu8RkFzpyyvgM9EnZlNfMomd9k,61
47
+ thds_tabularasa-0.14.0.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
48
+ thds_tabularasa-0.14.0.dist-info/RECORD,,