thds.tabularasa 0.14.0__py3-none-any.whl → 0.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thds/tabularasa/__main__.py +5 -6
- thds/tabularasa/data_dependencies/sqlite.py +6 -1
- thds/tabularasa/loaders/sqlite_util.py +4 -18
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.2.dist-info}/METADATA +1 -1
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.2.dist-info}/RECORD +8 -8
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.2.dist-info}/WHEEL +0 -0
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.2.dist-info}/entry_points.txt +0 -0
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.2.dist-info}/top_level.txt +0 -0
thds/tabularasa/__main__.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
2
|
import shutil
|
|
4
3
|
import subprocess
|
|
5
4
|
import sys
|
|
@@ -14,7 +13,7 @@ from typing import Dict, Iterable, Iterator, List, NamedTuple, Optional, Set, Tu
|
|
|
14
13
|
import networkx as nx
|
|
15
14
|
import pkg_resources
|
|
16
15
|
|
|
17
|
-
from thds.core import parallel
|
|
16
|
+
from thds.core import link, parallel
|
|
18
17
|
from thds.tabularasa.data_dependencies.adls import (
|
|
19
18
|
ADLSFileIntegrityError,
|
|
20
19
|
ADLSFileSystem,
|
|
@@ -730,7 +729,7 @@ class ReferenceDataManager:
|
|
|
730
729
|
file_path = self.data_path_for(table)
|
|
731
730
|
if file_path.exists():
|
|
732
731
|
self.logger.warning(f"Removing built file for table {table.name} at {file_path}")
|
|
733
|
-
|
|
732
|
+
file_path.unlink()
|
|
734
733
|
else:
|
|
735
734
|
self.logger.info(f"No file found for table {table.name}; nothing to remove")
|
|
736
735
|
try:
|
|
@@ -763,7 +762,7 @@ class ReferenceDataManager:
|
|
|
763
762
|
for table in tables_to_update:
|
|
764
763
|
table_name = table.name
|
|
765
764
|
table_path = self.data_path_for(table)
|
|
766
|
-
if
|
|
765
|
+
if table_path.exists():
|
|
767
766
|
md5 = hash_file(table_path)
|
|
768
767
|
old_md5 = table.md5
|
|
769
768
|
if old_md5 is None:
|
|
@@ -847,10 +846,10 @@ class ReferenceDataManager:
|
|
|
847
846
|
local_cache_path = paths[0].local_path
|
|
848
847
|
if sync_data.local_file_exists:
|
|
849
848
|
self.logger.warning(f"Removing existing file {sync_data.local_path}")
|
|
850
|
-
|
|
849
|
+
sync_data.local_path.unlink()
|
|
851
850
|
self.logger.info(f"Linking downloaded file to local build file {sync_data.local_path}")
|
|
852
851
|
sync_data.local_path.parent.mkdir(parents=True, exist_ok=True)
|
|
853
|
-
|
|
852
|
+
link.link(local_cache_path, sync_data.local_path)
|
|
854
853
|
return True
|
|
855
854
|
|
|
856
855
|
def sync_blob_store(
|
|
@@ -222,7 +222,9 @@ def populate_sqlite_db(
|
|
|
222
222
|
table_predicate: Callable[[Table], bool] = is_build_time_package_table,
|
|
223
223
|
data_path_overrides: Optional[Mapping[str, Path]] = None,
|
|
224
224
|
):
|
|
225
|
-
"""Populate a sqlite database with data for a set of tables from a `reference_data.schema.Schema
|
|
225
|
+
"""Populate a sqlite database with data for a set of tables from a `reference_data.schema.Schema`.
|
|
226
|
+
Note that this can safely be called concurrently in multiple processes on the same database file; a file lock
|
|
227
|
+
is acquired on the database file and only released when the data insertion is complete.
|
|
226
228
|
|
|
227
229
|
:param schema: the `reference_data.schema.Schema` object defining the data to be inserted
|
|
228
230
|
:param db_package: name of the package where the database file is stored, if any. In case `None` is
|
|
@@ -259,6 +261,9 @@ def populate_sqlite_db(
|
|
|
259
261
|
# gather all tables before executing any I/O
|
|
260
262
|
insert_tables = [table for table in schema.filter_tables(table_predicate) if table.has_indexes]
|
|
261
263
|
|
|
264
|
+
if not insert_tables:
|
|
265
|
+
return
|
|
266
|
+
|
|
262
267
|
with bulk_write_connection(db_path, db_package, close=True) as con:
|
|
263
268
|
for table in insert_tables:
|
|
264
269
|
table_filename: Optional[str]
|
|
@@ -22,7 +22,6 @@ from thds.tabularasa.sqlite3_compat import sqlite3
|
|
|
22
22
|
|
|
23
23
|
DEFAULT_ATTR_SQLITE_CACHE_SIZE = 100_000
|
|
24
24
|
DEFAULT_MMAP_BYTES = int(os.environ.get("TABULA_RASA_DEFAULT_MMAP_BYTES", 8_589_934_592)) # 8 GB
|
|
25
|
-
DISABLE_WAL_MODE = bool(os.environ.get("REF_D_DISABLE_SQLITE_WAL_MODE", False))
|
|
26
25
|
|
|
27
26
|
PARAMETERIZABLE_BUILTINS = sys.version_info >= (3, 9)
|
|
28
27
|
|
|
@@ -159,11 +158,6 @@ def set_bulk_write_mode(con: sqlite3.Connection) -> sqlite3.Connection:
|
|
|
159
158
|
logger.debug("Setting pragmas for bulk write optimization")
|
|
160
159
|
# https://www.sqlite.org/pragma.html#pragma_synchronous
|
|
161
160
|
_log_exec_sql(logger, con, "PRAGMA synchronous = 0") # OFF
|
|
162
|
-
# https://www.sqlite.org/pragma.html#pragma_journal_mode
|
|
163
|
-
if not DISABLE_WAL_MODE:
|
|
164
|
-
_log_exec_sql(logger, con, "PRAGMA journal_mode = WAL")
|
|
165
|
-
# https://www.sqlite.org/pragma.html#pragma_locking_mode
|
|
166
|
-
_log_exec_sql(logger, con, "PRAGMA locking_mode = EXCLUSIVE")
|
|
167
161
|
|
|
168
162
|
return con
|
|
169
163
|
|
|
@@ -171,16 +165,7 @@ def set_bulk_write_mode(con: sqlite3.Connection) -> sqlite3.Connection:
|
|
|
171
165
|
def unset_bulk_write_mode(con: sqlite3.Connection) -> sqlite3.Connection:
|
|
172
166
|
logger = logging.getLogger(__name__)
|
|
173
167
|
logger.debug("Setting pragmas for bulk write optimization")
|
|
174
|
-
# https://www.sqlite.org/pragma.html#pragma_journal_mode
|
|
175
|
-
# resetting this to the default. This is a property of the database, rather than the connection.
|
|
176
|
-
# the other settings are connection-specific.
|
|
177
|
-
# according to the docs, the WAL journal mode should be disabled before the locking mode is restored,
|
|
178
|
-
# else any attempt to do so is a no-op.
|
|
179
|
-
_log_exec_sql(logger, con, "PRAGMA journal_mode = DELETE")
|
|
180
|
-
# https://www.sqlite.org/pragma.html#pragma_synchronous
|
|
181
168
|
_log_exec_sql(logger, con, "PRAGMA synchronous = 2") # FULL (default)
|
|
182
|
-
# https://www.sqlite.org/pragma.html#pragma_locking_mode
|
|
183
|
-
_log_exec_sql(logger, con, "PRAGMA locking_mode = NORMAL")
|
|
184
169
|
|
|
185
170
|
return con
|
|
186
171
|
|
|
@@ -191,9 +176,10 @@ def bulk_write_connection(
|
|
|
191
176
|
) -> ty.Generator[sqlite3.Connection, None, None]:
|
|
192
177
|
"""Context manager to set/unset bulk write mode on a sqlite connection. Sets pragmas for efficient bulk writes,
|
|
193
178
|
such as loosening synchronous and locking modes. If `close` is True, the connection will be closed on exit.
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
179
|
+
To avoid bulk insert routines being run by other processes concurrently, we also acquire a file lock on the
|
|
180
|
+
database file on entry and release it on exit. Other processes attempting to perform bulk writes to the same file
|
|
181
|
+
will block until the lock is released. In the case of tabularasa init-sqlite, the semantics then imply that those
|
|
182
|
+
workers will perform no writes at all, since metadata will indicate that the data in the file is up-to-date.
|
|
197
183
|
"""
|
|
198
184
|
db_path_ = to_local_path(db_path, db_package).absolute()
|
|
199
185
|
lock_path = db_path_.with_suffix(".lock")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.tabularasa
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.2
|
|
4
4
|
Summary: Trilliant Health reference data build system.
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
thds/tabularasa/__init__.py,sha256=jc6w1WD868MQ2t4wkRNYvRssojwXvPDcNyC8V5gwbl0,169
|
|
2
|
-
thds/tabularasa/__main__.py,sha256=
|
|
2
|
+
thds/tabularasa/__main__.py,sha256=DlaUfXu03tbBVucRuMw7354LeBs8d5tRCuAprZs0XYs,47778
|
|
3
3
|
thds/tabularasa/compat.py,sha256=j0313TPIXtkbfvRI0AH4if8GLrjQSrDJ9heayCIl9w8,1037
|
|
4
4
|
thds/tabularasa/git_util.py,sha256=fBFhaCPi_5W2BpG2B3WiPcAWJvuVI_pG47rt73wLO6E,1388
|
|
5
5
|
thds/tabularasa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -9,7 +9,7 @@ thds/tabularasa/to_sqlite.py,sha256=5lcEUh38MNebxAJdLp2XGWOP_WQDIADtL1fyhOvi9UU,
|
|
|
9
9
|
thds/tabularasa/data_dependencies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
thds/tabularasa/data_dependencies/adls.py,sha256=vJAuc5Key-vO1N6DGo5dj9fIx_4hMALAVC17qhvkT7Y,3257
|
|
11
11
|
thds/tabularasa/data_dependencies/build.py,sha256=6iYgw93sOF2Nlnb6WSmA9NvPNwOf_Yyi2wXUQpRVkJM,23382
|
|
12
|
-
thds/tabularasa/data_dependencies/sqlite.py,sha256=
|
|
12
|
+
thds/tabularasa/data_dependencies/sqlite.py,sha256=sMP_NInBEDoH5SScIRYxtOvcPUi9WXfE3_jCoOBduGo,12825
|
|
13
13
|
thds/tabularasa/data_dependencies/tabular.py,sha256=oq9wFse235ikLEv8Zvol59ptRRojZbkbzXJyQeFfC9o,6529
|
|
14
14
|
thds/tabularasa/data_dependencies/util.py,sha256=FQ9G1nIpqKh00z2lXOt0Y2R1mLQsEb-BC6Tka1z2egc,8489
|
|
15
15
|
thds/tabularasa/diff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -19,7 +19,7 @@ thds/tabularasa/diff/summary.py,sha256=gENtDwhSrDYeN-8fWr6Ug2zgdp584b0pZF9UBYzKF
|
|
|
19
19
|
thds/tabularasa/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
thds/tabularasa/loaders/lazy_adls.py,sha256=jrWy5tTKDQfWEv6aHQ3UJhFzLrDPOlSGsArv9zcl1g8,1375
|
|
21
21
|
thds/tabularasa/loaders/parquet_util.py,sha256=u75j3PkMSakO2zfq4zksWzXLYnaO--WizAgXTcSpXRY,13354
|
|
22
|
-
thds/tabularasa/loaders/sqlite_util.py,sha256=
|
|
22
|
+
thds/tabularasa/loaders/sqlite_util.py,sha256=3Gi1Y4iTVCD9FXqylQw1eyFwVuplQUrjY1J0SC5FFWg,11099
|
|
23
23
|
thds/tabularasa/loaders/util.py,sha256=XmsGkDdL6O8R6B4667Iqi5HoRgq0YMs6LP3VvPIqPVU,21369
|
|
24
24
|
thds/tabularasa/schema/__init__.py,sha256=bowvNXrrDrWB3TAmwDxCeEAvVEe9z7iRfqRaNg1Qmo4,440
|
|
25
25
|
thds/tabularasa/schema/constraints.py,sha256=V2vh01BhYR8OVQvgdujqSi0l_fMJvFKYSlBvWExZFG0,9744
|
|
@@ -41,8 +41,8 @@ thds/tabularasa/schema/compilation/sqlite.py,sha256=wSrSlVCYeuTpOf9AOHAnp6gJHkjH
|
|
|
41
41
|
thds/tabularasa/schema/compilation/util.py,sha256=YXFe1_yoBobED010hstKIoq-dwLHo6SBv1v1IAw6AYU,3886
|
|
42
42
|
thds/tabularasa/testing/__init__.py,sha256=XoLzB-DotxFw9KHt2vfH72k7pyAAFI2bW-qqq6nww1g,85
|
|
43
43
|
thds/tabularasa/testing/mock_sqlite.py,sha256=xoV4w_GaDgtZf17iUux2-LA6Va1XRJdC2FU34dysh0o,4769
|
|
44
|
-
thds_tabularasa-0.14.
|
|
45
|
-
thds_tabularasa-0.14.
|
|
46
|
-
thds_tabularasa-0.14.
|
|
47
|
-
thds_tabularasa-0.14.
|
|
48
|
-
thds_tabularasa-0.14.
|
|
44
|
+
thds_tabularasa-0.14.2.dist-info/METADATA,sha256=H48Ilbbjrag4tb1s1IhIlh5tNbUnwjXdaQ3zSC9rejg,26786
|
|
45
|
+
thds_tabularasa-0.14.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
46
|
+
thds_tabularasa-0.14.2.dist-info/entry_points.txt,sha256=PX4ShRonjv6lMsVjrGu8RkFzpyyvgM9EnZlNfMomd9k,61
|
|
47
|
+
thds_tabularasa-0.14.2.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
48
|
+
thds_tabularasa-0.14.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|