thds.tabularasa 0.14.0__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thds/tabularasa/data_dependencies/sqlite.py +6 -1
- thds/tabularasa/loaders/sqlite_util.py +4 -18
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.1.dist-info}/METADATA +1 -1
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.1.dist-info}/RECORD +7 -7
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.1.dist-info}/WHEEL +0 -0
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.1.dist-info}/entry_points.txt +0 -0
- {thds_tabularasa-0.14.0.dist-info → thds_tabularasa-0.14.1.dist-info}/top_level.txt +0 -0
|
@@ -222,7 +222,9 @@ def populate_sqlite_db(
|
|
|
222
222
|
table_predicate: Callable[[Table], bool] = is_build_time_package_table,
|
|
223
223
|
data_path_overrides: Optional[Mapping[str, Path]] = None,
|
|
224
224
|
):
|
|
225
|
-
"""Populate a sqlite database with data for a set of tables from a `reference_data.schema.Schema
|
|
225
|
+
"""Populate a sqlite database with data for a set of tables from a `reference_data.schema.Schema`.
|
|
226
|
+
Note that this can safely be called concurrently in multiple processes on the same database file; a file lock
|
|
227
|
+
is acquired on the database file and only released when the data insertion is complete.
|
|
226
228
|
|
|
227
229
|
:param schema: the `reference_data.schema.Schema` object defining the data to be inserted
|
|
228
230
|
:param db_package: name of the package where the database file is stored, if any. In case `None` is
|
|
@@ -259,6 +261,9 @@ def populate_sqlite_db(
|
|
|
259
261
|
# gather all tables before executing any I/O
|
|
260
262
|
insert_tables = [table for table in schema.filter_tables(table_predicate) if table.has_indexes]
|
|
261
263
|
|
|
264
|
+
if not insert_tables:
|
|
265
|
+
return
|
|
266
|
+
|
|
262
267
|
with bulk_write_connection(db_path, db_package, close=True) as con:
|
|
263
268
|
for table in insert_tables:
|
|
264
269
|
table_filename: Optional[str]
|
|
@@ -22,7 +22,6 @@ from thds.tabularasa.sqlite3_compat import sqlite3
|
|
|
22
22
|
|
|
23
23
|
DEFAULT_ATTR_SQLITE_CACHE_SIZE = 100_000
|
|
24
24
|
DEFAULT_MMAP_BYTES = int(os.environ.get("TABULA_RASA_DEFAULT_MMAP_BYTES", 8_589_934_592)) # 8 GB
|
|
25
|
-
DISABLE_WAL_MODE = bool(os.environ.get("REF_D_DISABLE_SQLITE_WAL_MODE", False))
|
|
26
25
|
|
|
27
26
|
PARAMETERIZABLE_BUILTINS = sys.version_info >= (3, 9)
|
|
28
27
|
|
|
@@ -159,11 +158,6 @@ def set_bulk_write_mode(con: sqlite3.Connection) -> sqlite3.Connection:
|
|
|
159
158
|
logger.debug("Setting pragmas for bulk write optimization")
|
|
160
159
|
# https://www.sqlite.org/pragma.html#pragma_synchronous
|
|
161
160
|
_log_exec_sql(logger, con, "PRAGMA synchronous = 0") # OFF
|
|
162
|
-
# https://www.sqlite.org/pragma.html#pragma_journal_mode
|
|
163
|
-
if not DISABLE_WAL_MODE:
|
|
164
|
-
_log_exec_sql(logger, con, "PRAGMA journal_mode = WAL")
|
|
165
|
-
# https://www.sqlite.org/pragma.html#pragma_locking_mode
|
|
166
|
-
_log_exec_sql(logger, con, "PRAGMA locking_mode = EXCLUSIVE")
|
|
167
161
|
|
|
168
162
|
return con
|
|
169
163
|
|
|
@@ -171,16 +165,7 @@ def set_bulk_write_mode(con: sqlite3.Connection) -> sqlite3.Connection:
|
|
|
171
165
|
def unset_bulk_write_mode(con: sqlite3.Connection) -> sqlite3.Connection:
|
|
172
166
|
logger = logging.getLogger(__name__)
|
|
173
167
|
logger.debug("Setting pragmas for bulk write optimization")
|
|
174
|
-
# https://www.sqlite.org/pragma.html#pragma_journal_mode
|
|
175
|
-
# resetting this to the default. This is a property of the database, rather than the connection.
|
|
176
|
-
# the other settings are connection-specific.
|
|
177
|
-
# according to the docs, the WAL journal mode should be disabled before the locking mode is restored,
|
|
178
|
-
# else any attempt to do so is a no-op.
|
|
179
|
-
_log_exec_sql(logger, con, "PRAGMA journal_mode = DELETE")
|
|
180
|
-
# https://www.sqlite.org/pragma.html#pragma_synchronous
|
|
181
168
|
_log_exec_sql(logger, con, "PRAGMA synchronous = 2") # FULL (default)
|
|
182
|
-
# https://www.sqlite.org/pragma.html#pragma_locking_mode
|
|
183
|
-
_log_exec_sql(logger, con, "PRAGMA locking_mode = NORMAL")
|
|
184
169
|
|
|
185
170
|
return con
|
|
186
171
|
|
|
@@ -191,9 +176,10 @@ def bulk_write_connection(
|
|
|
191
176
|
) -> ty.Generator[sqlite3.Connection, None, None]:
|
|
192
177
|
"""Context manager to set/unset bulk write mode on a sqlite connection. Sets pragmas for efficient bulk writes,
|
|
193
178
|
such as loosening synchronous and locking modes. If `close` is True, the connection will be closed on exit.
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
179
|
+
To avoid bulk insert routines being run by other processes concurrently, we also acquire a file lock on the
|
|
180
|
+
database file on entry and release it on exit. Other processes attempting to perform bulk writes to the same file
|
|
181
|
+
will block until the lock is released. In the case of tabularasa init-sqlite, the semantics then imply that those
|
|
182
|
+
workers will perform no writes at all, since metadata will indicate that the data in the file is up-to-date.
|
|
197
183
|
"""
|
|
198
184
|
db_path_ = to_local_path(db_path, db_package).absolute()
|
|
199
185
|
lock_path = db_path_.with_suffix(".lock")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: thds.tabularasa
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.1
|
|
4
4
|
Summary: Trilliant Health reference data build system.
|
|
5
5
|
Author-email: Trilliant Health <info@trillianthealth.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/TrilliantHealth/ds-monorepo
|
|
@@ -9,7 +9,7 @@ thds/tabularasa/to_sqlite.py,sha256=5lcEUh38MNebxAJdLp2XGWOP_WQDIADtL1fyhOvi9UU,
|
|
|
9
9
|
thds/tabularasa/data_dependencies/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
thds/tabularasa/data_dependencies/adls.py,sha256=vJAuc5Key-vO1N6DGo5dj9fIx_4hMALAVC17qhvkT7Y,3257
|
|
11
11
|
thds/tabularasa/data_dependencies/build.py,sha256=6iYgw93sOF2Nlnb6WSmA9NvPNwOf_Yyi2wXUQpRVkJM,23382
|
|
12
|
-
thds/tabularasa/data_dependencies/sqlite.py,sha256=
|
|
12
|
+
thds/tabularasa/data_dependencies/sqlite.py,sha256=sMP_NInBEDoH5SScIRYxtOvcPUi9WXfE3_jCoOBduGo,12825
|
|
13
13
|
thds/tabularasa/data_dependencies/tabular.py,sha256=oq9wFse235ikLEv8Zvol59ptRRojZbkbzXJyQeFfC9o,6529
|
|
14
14
|
thds/tabularasa/data_dependencies/util.py,sha256=FQ9G1nIpqKh00z2lXOt0Y2R1mLQsEb-BC6Tka1z2egc,8489
|
|
15
15
|
thds/tabularasa/diff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -19,7 +19,7 @@ thds/tabularasa/diff/summary.py,sha256=gENtDwhSrDYeN-8fWr6Ug2zgdp584b0pZF9UBYzKF
|
|
|
19
19
|
thds/tabularasa/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
thds/tabularasa/loaders/lazy_adls.py,sha256=jrWy5tTKDQfWEv6aHQ3UJhFzLrDPOlSGsArv9zcl1g8,1375
|
|
21
21
|
thds/tabularasa/loaders/parquet_util.py,sha256=u75j3PkMSakO2zfq4zksWzXLYnaO--WizAgXTcSpXRY,13354
|
|
22
|
-
thds/tabularasa/loaders/sqlite_util.py,sha256=
|
|
22
|
+
thds/tabularasa/loaders/sqlite_util.py,sha256=3Gi1Y4iTVCD9FXqylQw1eyFwVuplQUrjY1J0SC5FFWg,11099
|
|
23
23
|
thds/tabularasa/loaders/util.py,sha256=XmsGkDdL6O8R6B4667Iqi5HoRgq0YMs6LP3VvPIqPVU,21369
|
|
24
24
|
thds/tabularasa/schema/__init__.py,sha256=bowvNXrrDrWB3TAmwDxCeEAvVEe9z7iRfqRaNg1Qmo4,440
|
|
25
25
|
thds/tabularasa/schema/constraints.py,sha256=V2vh01BhYR8OVQvgdujqSi0l_fMJvFKYSlBvWExZFG0,9744
|
|
@@ -41,8 +41,8 @@ thds/tabularasa/schema/compilation/sqlite.py,sha256=wSrSlVCYeuTpOf9AOHAnp6gJHkjH
|
|
|
41
41
|
thds/tabularasa/schema/compilation/util.py,sha256=YXFe1_yoBobED010hstKIoq-dwLHo6SBv1v1IAw6AYU,3886
|
|
42
42
|
thds/tabularasa/testing/__init__.py,sha256=XoLzB-DotxFw9KHt2vfH72k7pyAAFI2bW-qqq6nww1g,85
|
|
43
43
|
thds/tabularasa/testing/mock_sqlite.py,sha256=xoV4w_GaDgtZf17iUux2-LA6Va1XRJdC2FU34dysh0o,4769
|
|
44
|
-
thds_tabularasa-0.14.
|
|
45
|
-
thds_tabularasa-0.14.
|
|
46
|
-
thds_tabularasa-0.14.
|
|
47
|
-
thds_tabularasa-0.14.
|
|
48
|
-
thds_tabularasa-0.14.
|
|
44
|
+
thds_tabularasa-0.14.1.dist-info/METADATA,sha256=rqg7l_iBlrh7E8-iXCeZyLcnOxJIuFDBw_5QNe1A9V0,26786
|
|
45
|
+
thds_tabularasa-0.14.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
46
|
+
thds_tabularasa-0.14.1.dist-info/entry_points.txt,sha256=PX4ShRonjv6lMsVjrGu8RkFzpyyvgM9EnZlNfMomd9k,61
|
|
47
|
+
thds_tabularasa-0.14.1.dist-info/top_level.txt,sha256=LTZaE5SkWJwv9bwOlMbIhiS-JWQEEIcjVYnJrt-CriY,5
|
|
48
|
+
thds_tabularasa-0.14.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|