tablassert 7.2.2__tar.gz → 7.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {tablassert-7.2.2 → tablassert-7.3.1}/CHANGELOG.md +17 -0
  2. tablassert-7.3.1/Dockerfile +8 -0
  3. {tablassert-7.2.2 → tablassert-7.3.1}/PKG-INFO +15 -1
  4. {tablassert-7.2.2 → tablassert-7.3.1}/README.md +14 -0
  5. tablassert-7.3.1/docs/api/lib.md +239 -0
  6. {tablassert-7.2.2 → tablassert-7.3.1}/mkdocs.yml +1 -0
  7. {tablassert-7.2.2 → tablassert-7.3.1}/pyproject.toml +1 -1
  8. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/lib.py +33 -3
  9. {tablassert-7.2.2 → tablassert-7.3.1}/uv.lock +1 -1
  10. tablassert-7.2.2/Dockerfile +0 -6
  11. {tablassert-7.2.2 → tablassert-7.3.1}/.github/workflows/autotag.yml +0 -0
  12. {tablassert-7.2.2 → tablassert-7.3.1}/.github/workflows/docker.yml +0 -0
  13. {tablassert-7.2.2 → tablassert-7.3.1}/.github/workflows/docs.yml +0 -0
  14. {tablassert-7.2.2 → tablassert-7.3.1}/.github/workflows/pipy.yml +0 -0
  15. {tablassert-7.2.2 → tablassert-7.3.1}/.gitignore +0 -0
  16. {tablassert-7.2.2 → tablassert-7.3.1}/.pre-commit-config.yaml +0 -0
  17. {tablassert-7.2.2 → tablassert-7.3.1}/AGENTS.md +0 -0
  18. {tablassert-7.2.2 → tablassert-7.3.1}/CITATION.cff +0 -0
  19. {tablassert-7.2.2 → tablassert-7.3.1}/CONTRIBUTING.md +0 -0
  20. {tablassert-7.2.2 → tablassert-7.3.1}/LICENSE +0 -0
  21. {tablassert-7.2.2 → tablassert-7.3.1}/docs/api/fullmap.md +0 -0
  22. {tablassert-7.2.2 → tablassert-7.3.1}/docs/api/qc.md +0 -0
  23. {tablassert-7.2.2 → tablassert-7.3.1}/docs/api/utils.md +0 -0
  24. {tablassert-7.2.2 → tablassert-7.3.1}/docs/cli.md +0 -0
  25. {tablassert-7.2.2 → tablassert-7.3.1}/docs/configuration/advanced-example.md +0 -0
  26. {tablassert-7.2.2 → tablassert-7.3.1}/docs/configuration/graph.md +0 -0
  27. {tablassert-7.2.2 → tablassert-7.3.1}/docs/configuration/table.md +0 -0
  28. {tablassert-7.2.2 → tablassert-7.3.1}/docs/datassert.md +0 -0
  29. {tablassert-7.2.2 → tablassert-7.3.1}/docs/docker.md +0 -0
  30. {tablassert-7.2.2 → tablassert-7.3.1}/docs/examples/tutorial-data.csv +0 -0
  31. {tablassert-7.2.2 → tablassert-7.3.1}/docs/examples/tutorial-graph.yaml +0 -0
  32. {tablassert-7.2.2 → tablassert-7.3.1}/docs/examples/tutorial-table.yaml +0 -0
  33. {tablassert-7.2.2 → tablassert-7.3.1}/docs/examples.md +0 -0
  34. {tablassert-7.2.2 → tablassert-7.3.1}/docs/index.md +0 -0
  35. {tablassert-7.2.2 → tablassert-7.3.1}/docs/installation.md +0 -0
  36. {tablassert-7.2.2 → tablassert-7.3.1}/docs/tutorial.md +0 -0
  37. {tablassert-7.2.2 → tablassert-7.3.1}/llms.txt +0 -0
  38. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/__init__.py +0 -0
  39. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/cli.py +0 -0
  40. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/downloader.py +0 -0
  41. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/enums.py +0 -0
  42. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/fullmap.py +0 -0
  43. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/ingests.py +0 -0
  44. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/log.py +0 -0
  45. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/models.py +0 -0
  46. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/nlp.py +0 -0
  47. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/qc.py +0 -0
  48. {tablassert-7.2.2 → tablassert-7.3.1}/src/tablassert/utils.py +0 -0
  49. {tablassert-7.2.2 → tablassert-7.3.1}/tests/__init__.py +0 -0
  50. {tablassert-7.2.2 → tablassert-7.3.1}/tests/conftest.py +0 -0
  51. {tablassert-7.2.2 → tablassert-7.3.1}/tests/fixtures/invalid_section_missing_source.yaml +0 -0
  52. {tablassert-7.2.2 → tablassert-7.3.1}/tests/fixtures/minimal_section.yaml +0 -0
  53. {tablassert-7.2.2 → tablassert-7.3.1}/tests/fixtures/minimal_section_with_sections.yaml +0 -0
  54. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_enums.py +0 -0
  55. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_fullmap.py +0 -0
  56. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_ingests.py +0 -0
  57. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_lib.py +0 -0
  58. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_models.py +0 -0
  59. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_nlp.py +0 -0
  60. {tablassert-7.2.2 → tablassert-7.3.1}/tests/test_utils.py +0 -0
@@ -2,6 +2,23 @@
2
2
 
3
3
  All notable changes to this project are documented in this file.
4
4
 
5
+ ## 7.3.1 - 2026-04-03
6
+
7
+ ### Changes
8
+ - Changed `resolve_many()` return type from `dict[str, list[str]]` to `list[dict[str, Any]]` — each resolved entity is now a row dictionary, produced via `to_dicts()`.
9
+ - `resolve_many()` now preserves the original input text in an `original {col}` key on each result row.
10
+
11
+ ### Documentation
12
+ - Updated `resolve_many()` API reference to match the current function signature, return type, and output format.
13
+
14
+ ## 7.3.0 - 2026-04-03
15
+
16
+ ### New Features
17
+ - Added `resolve_many()` to `lib` module — a standalone batch entity resolution function that resolves an iterable of text strings to CURIEs without requiring manual LazyFrame setup, NLP preprocessing, or DuckDB connection management.
18
+
19
+ ### Documentation
20
+ - Added detailed API reference page for `resolve_many()` covering function signature, parameters, return value, usage examples, and integration notes.
21
+
5
22
  ## 7.2.2 - 2026-04-01
6
23
 
7
24
  ### Bug Fixes
@@ -0,0 +1,8 @@
1
+ FROM python:3.14-slim
2
+
3
+ RUN pip install --no-cache-dir "tablassert"
4
+
5
+ VOLUME ["/data", "/datassert"]
6
+
7
+ ENTRYPOINT ["tablassert"]
8
+ CMD ["--help"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tablassert
3
- Version: 7.2.2
3
+ Version: 7.3.1
4
4
  Summary: Extract knowledge assertions from tabular data into NCATS Translator-compliant KGX NDJSON — declaratively, with entity resolution and quality control built in.
5
5
  Project-URL: Homepage, https://github.com/SkyeAv/Tablassert
6
6
  Project-URL: Source, https://github.com/SkyeAv/Tablassert
@@ -93,6 +93,20 @@ docker run --rm \
93
93
 
94
94
  </details>
95
95
 
96
+ ## Quick Demo
97
+
98
+ ```bash
99
+ # Build a knowledge graph from a YAML configuration
100
+ $ tablassert build-knowledge-graph graph-config.yaml
101
+ ⠋ Loading table configurations...
102
+ ⠋ Resolving entities across 16 DuckDB shards...
103
+ ⠋ Compiling subgraphs...
104
+ ⠋ Deduplicating nodes and edges...
105
+ ✓ Done — wrote nodes.ndjson and edges.ndjson to .storassert/
106
+ ```
107
+
108
+ Define your entities and relationships in YAML, point tablassert at your data, and get NCATS Translator-compliant KGX NDJSON out the other side — no code required.
109
+
96
110
  ## Key Features
97
111
 
98
112
  - **Declarative Configuration** — YAML-based, no code required
@@ -41,6 +41,20 @@ docker run --rm \
41
41
 
42
42
  </details>
43
43
 
44
+ ## Quick Demo
45
+
46
+ ```bash
47
+ # Build a knowledge graph from a YAML configuration
48
+ $ tablassert build-knowledge-graph graph-config.yaml
49
+ ⠋ Loading table configurations...
50
+ ⠋ Resolving entities across 16 DuckDB shards...
51
+ ⠋ Compiling subgraphs...
52
+ ⠋ Deduplicating nodes and edges...
53
+ ✓ Done — wrote nodes.ndjson and edges.ndjson to .storassert/
54
+ ```
55
+
56
+ Define your entities and relationships in YAML, point tablassert at your data, and get NCATS Translator-compliant KGX NDJSON out the other side — no code required.
57
+
44
58
  ## Key Features
45
59
 
46
60
  - **Declarative Configuration** — YAML-based, no code required
@@ -0,0 +1,239 @@
1
+ # Batch Resolution (lib)
2
+
3
+ The `lib` module exposes `resolve_many()`, a high-level convenience function for resolving an iterable of entity strings to CURIEs without requiring manual LazyFrame construction, NLP preprocessing, or DuckDB shard management.
4
+
5
+ It wraps the lower-level [`resolve()`](fullmap.md) pipeline — applying `level_one` and `level_two` normalization, opening all 16 DuckDB shard connections, executing entity resolution, and returning results as a plain Python dictionary.
6
+
7
+ ## resolve_many()
8
+
9
+ Standalone batch entity resolution function. Accepts a column name, an iterable of text strings, and a path to the datassert database, then returns resolved CURIEs and metadata as a dictionary of lists.
10
+
11
+ ### Function Signature
12
+
13
+ ```python
14
+ def resolve_many(
15
+ col: str,
16
+ entities: Iterable[str],
17
+ datassert: Path,
18
+ taxon: Optional[str] = None,
19
+ prioritize: Optional[list[Categories]] = None,
20
+ avoid: Optional[list[Categories]] = None,
21
+ column_context: bool = True,
22
+ ) -> list[dict[str, Any]]
23
+ ```
24
+
25
+ ### Parameters
26
+
27
+ **`col: str`**
28
+
29
+ Column name used internally to label the Polars Series and DataFrame columns during resolution. This name propagates through the NLP and resolution pipeline and determines the keys in the returned dictionary.
30
+
31
+ For example, if `col="gene"`, the returned dictionary will contain keys like `"gene"`, `"gene name"`, `"gene category"`, etc.
32
+
33
+ **`entities: Iterable[str]`**
34
+
35
+ An iterable of text strings to resolve. Each string is treated as a candidate entity name that will be normalized and matched against the datassert synonym database. Accepts any iterable — lists, tuples, generators, sets, etc.
36
+
37
+ Examples: `["TP53", "BRCA1", "EGFR"]`, `("aspirin", "ibuprofen")`, or a generator expression.
38
+
39
+ **`datassert: Path`**
40
+
41
+ Filesystem path to the root of the datassert database directory. The function expects a `data/` subdirectory containing 16 DuckDB shard files (`0.duckdb` through `15.duckdb`).
42
+
43
+ Each shard contains:
44
+ - Synonym mappings (text → CURIE)
45
+ - Preferred entity names
46
+ - Biolink categories
47
+ - NCBI Taxon IDs
48
+ - Source databases and versions
49
+
50
+ **`taxon: Optional[str]` (default: `None`)**
51
+
52
+ Optional NCBI Taxon ID for filtering results to a specific organism.
53
+
54
+ Example: `"9606"` restricts matches to human-specific entities. When `None`, no taxon filtering is applied and matches from all organisms are returned.
55
+
56
+ **`prioritize: Optional[list[Categories]]` (default: `None`)**
57
+
58
+ Optional list of Biolink categories to prefer when multiple matches exist for the same input term. Categories listed here receive higher ranking scores during resolution.
59
+
60
+ Example: `[Categories.Gene, Categories.Protein]` prefers gene and protein mappings over other categories like diseases or chemicals.
61
+
62
+ **`avoid: Optional[list[Categories]]` (default: `None`)**
63
+
64
+ Optional list of Biolink categories to exclude from results entirely. Any match belonging to an avoided category is filtered out before ranking.
65
+
66
+ Example: `[Categories.Gene]` prevents gene mappings from appearing in the output, even if they would otherwise be the best match.
67
+
68
+ **`column_context: bool` (default: `True`)**
69
+
70
+ Controls category-frequency tie-breaking when multiple matches exist for a term. When `True`, the resolution query adds a category frequency score and prefers the category that appears most frequently across all terms in the batch. When `False`, frequency-based tie-breaking is disabled.
71
+
72
+ This is useful when resolving a column of related entities (e.g., all genes) — the shared context helps disambiguate terms that map to multiple categories.
73
+
74
+ ### Return Value
75
+
76
+ Returns a `list[dict[str, Any]]` — one dictionary per resolved entity. The list is produced by calling `polars.DataFrame.to_dicts()` on the collected resolution output.
77
+
78
+ Each dictionary contains the following keys (where `{col}` is the value of the `col` parameter):
79
+
80
+ | Key | Description | Example Value |
81
+ |-----|-------------|---------------|
82
+ | `original {col}` | Original input text before normalization | `"TP53"` |
83
+ | `{col}` | CURIE identifier | `"HGNC:11998"` |
84
+ | `{col} name` | Preferred entity name | `"TP53"` |
85
+ | `{col} category` | Biolink category (prefixed) | `"biolink:Gene"` |
86
+ | `{col} taxon` | NCBI Taxon ID (prefixed) | `"NCBITaxon:9606"` |
87
+ | `{col} source` | Source database | `"HGNC"` |
88
+ | `{col} source version` | Database version | `"2025-01"` |
89
+ | `{col} nlp level` | NLP processing level used for match | `0` or `1` |
90
+
91
+ **Important:** Only entities that successfully resolve to a CURIE are included in the output. Unresolved entities are filtered out by `resolve()`. The returned list may therefore be shorter than the input iterable.
92
+
93
+ ### Pipeline Internals
94
+
95
+ `resolve_many()` executes the following steps internally:
96
+
97
+ 1. **Series construction** — Wraps the input iterable in a `pl.Series` with the given column name, then converts to a single-column `pl.LazyFrame`.
98
+
99
+ 2. **NLP normalization** — Applies `level_one()` (whitespace stripping + lowercasing) and `level_two()` (non-word character removal via `\W+`) to produce the two normalized columns required by `resolve()`.
100
+
101
+ 3. **DuckDB connection management** — Opens all 16 shard connections inside a `contextlib.ExitStack`, ensuring every connection is properly closed when resolution completes or if an error occurs.
102
+
103
+ 4. **Entity resolution** — Delegates to `fullmap.resolve()` which queries the sharded DuckDB database, ranks matches by category priority, preferred-name exactness, NLP level, and category frequency, then deduplicates to one CURIE per input string.
104
+
105
+ 5. **Collection and conversion** — Collects the lazy result into an eager `pl.DataFrame` and converts to a list of row dictionaries via `to_dicts()`.
106
+
107
+ ### Example Usage
108
+
109
+ #### Basic Gene Resolution
110
+
111
+ ```python
112
+ from pathlib import Path
113
+ from typing import Any
114
+ from tablassert.lib import resolve_many
115
+ from tablassert.enums import Categories
116
+
117
+ datassert: Path = Path("/path/to/datassert")
118
+
119
+ result: list[dict[str, Any]] = resolve_many(
120
+ col="gene",
121
+ entities=["TP53", "BRCA1", "EGFR", "KRAS"],
122
+ datassert=datassert,
123
+ taxon="9606",
124
+ prioritize=[Categories.Gene],
125
+ )
126
+
127
+ # result[0] → {"original gene": "TP53", "gene": "HGNC:11998", "gene name": "TP53", ...}
128
+ # result[1] → {"original gene": "BRCA1", "gene": "HGNC:1100", "gene name": "BRCA1", ...}
129
+ ```
130
+
131
+ #### Disease Resolution With Category Avoidance
132
+
133
+ ```python
134
+ from pathlib import Path
135
+ from typing import Any
136
+ from tablassert.lib import resolve_many
137
+ from tablassert.enums import Categories
138
+
139
+ datassert: Path = Path("/path/to/datassert")
140
+
141
+ result: list[dict[str, Any]] = resolve_many(
142
+ col="disease",
143
+ entities=["diabetes mellitus", "breast cancer", "alzheimer disease"],
144
+ datassert=datassert,
145
+ avoid=[Categories.Gene, Categories.Protein],
146
+ )
147
+
148
+ # result[0] → {"original disease": "diabetes mellitus", "disease": "MONDO:0005015", ...}
149
+ # result[1] → {"original disease": "breast cancer", "disease name": "breast cancer", ...}
150
+ ```
151
+
152
+ #### Chemical Resolution Without Column Context
153
+
154
+ ```python
155
+ from pathlib import Path
156
+ from typing import Any
157
+ from tablassert.lib import resolve_many
158
+
159
+ datassert: Path = Path("/path/to/datassert")
160
+
161
+ result: list[dict[str, Any]] = resolve_many(
162
+ col="chemical",
163
+ entities=["aspirin", "metformin", "ibuprofen"],
164
+ datassert=datassert,
165
+ column_context=False,
166
+ )
167
+ ```
168
+
169
+ #### Consuming Results
170
+
171
+ ```python
172
+ import polars as pl
173
+ from pathlib import Path
174
+ from typing import Any
175
+ from tablassert.lib import resolve_many
176
+
177
+ datassert: Path = Path("/path/to/datassert")
178
+
179
+ result: list[dict[str, Any]] = resolve_many(
180
+ col="gene",
181
+ entities=["TP53", "BRCA1"],
182
+ datassert=datassert,
183
+ taxon="9606",
184
+ )
185
+
186
+ # Convert back to a Polars DataFrame
187
+ df: pl.DataFrame = pl.DataFrame(result)
188
+
189
+ # Or iterate over resolved rows
190
+ for row in result:
191
+ print(f"{row['gene name']} → {row['gene']}")
192
+ ```
193
+
194
+ ### Comparison With resolve()
195
+
196
+ | Aspect | `resolve_many()` | `resolve()` |
197
+ |--------|-------------------|-------------|
198
+ | **Module** | `tablassert.lib` | `tablassert.fullmap` |
199
+ | **Input** | Plain iterable of strings | Pre-normalized `pl.LazyFrame` |
200
+ | **NLP** | Applied automatically | Must be applied upstream |
201
+ | **Connections** | Managed internally via `ExitStack` | Must be opened externally |
202
+ | **Output** | `list[dict[str, Any]]` | `pl.LazyFrame` |
203
+ | **Logging** | Uses default (`log=True`) | Configurable |
204
+ | **Context params** | Not exposed (`section_hash`, `config_file`, `tag`) | Fully configurable |
205
+ | **Use case** | Standalone batch lookups, scripting, notebooks | Internal pipeline integration |
206
+
207
+ `resolve_many()` is designed for ad-hoc and programmatic use — scripts, notebooks, and one-off lookups. For pipeline integration where you need full control over logging, context metadata, and lazy evaluation, use `resolve()` directly.
208
+
209
+ ### NLP Processing
210
+
211
+ `resolve_many()` applies both NLP normalization levels before resolution:
212
+
213
+ **Level one** — `level_one(lf, col)`:
214
+ - Strips leading/trailing whitespace
215
+ - Converts to lowercase
216
+ - Output column: `{col}` (overwrites the original)
217
+
218
+ **Level two** — `level_two(lf, col)`:
219
+ - Removes all non-word characters (`\W+` → `""`) from the level-one result
220
+ - Output column: `{col} two`
221
+
222
+ Both levels are queried during resolution. Level one (exact case-insensitive match) is preferred; level two is used as a fallback for terms with punctuation or special characters.
223
+
224
+ ### Error Handling
225
+
226
+ - If the `datassert` path does not contain the expected shard files, `duckdb.connect()` will raise an `IOException`.
227
+ - If `entities` is empty, the function returns a dictionary with empty lists for all output columns.
228
+ - The `ExitStack` ensures all 16 DuckDB connections are closed even if resolution raises an exception.
229
+ - Unresolved entities are silently filtered from the output (logged at INFO level by default via `resolve()`).
230
+
231
+ ## Integration
232
+
233
+ `resolve_many()` is a self-contained entry point. It does not require any prior setup beyond having a datassert database available. For full pipeline builds, use the CLI (`tablassert build-knowledge-graph`) which orchestrates resolution through the `Tcode` class.
234
+
235
+ ## Next Steps
236
+
237
+ - **[Entity Resolution](fullmap.md)** — Lower-level `resolve()` function details
238
+ - **[Quality Control](qc.md)** — Multi-stage validation of resolved entities
239
+ - **[Configuration](../configuration/table.md)** — YAML-driven entity resolution settings
@@ -14,6 +14,7 @@ nav:
14
14
  - Advanced Example: configuration/advanced-example.md
15
15
  - API Reference:
16
16
  - Entity Resolution: api/fullmap.md
17
+ - Batch Resolution: api/lib.md
17
18
  - Quality Control: api/qc.md
18
19
  - Utilities: api/utils.md
19
20
  - Changelog: ../CHANGELOG.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tablassert"
3
- version = "7.2.2"
3
+ version = "7.3.1"
4
4
  description = "Extract knowledge assertions from tabular data into NCATS Translator-compliant KGX NDJSON — declaratively, with entity resolution and quality control built in."
5
5
  authors = [
6
6
  { name = "Skye Lane Goetz", email = "sgoetz@isbscience.org" }
@@ -2,6 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import math
4
4
  import operator
5
+ from collections.abc import Iterable
6
+ from contextlib import ExitStack
5
7
  from functools import reduce
6
8
  from operator import add, eq, le
7
9
  from os.path import basename
@@ -13,11 +15,11 @@ from pydantic import Field, NonNegativeInt, PositiveInt
13
15
  from sqlite_utils import Database
14
16
 
15
17
  from tablassert.downloader import from_url
16
- from tablassert.enums import EncodingMethods, Files, Tokens
17
- from tablassert.fullmap import resolve
18
+ from tablassert.enums import Categories, EncodingMethods, Files, Tokens
19
+ from tablassert.fullmap import SHARDS, resolve
18
20
  from tablassert.log import logger
19
- from tablassert.nlp import level_one, level_two
20
21
  from tablassert.models import Encoding, NodeEncoding, Section
22
+ from tablassert.nlp import level_one, level_two
21
23
  from tablassert.qc import fullmap_audit
22
24
  from tablassert.utils import namespace_uuid
23
25
 
@@ -475,3 +477,31 @@ def compile_graph(subgraphs: list[Path], name: str, version: str, fmt: str = "mi
475
477
 
476
478
  dedup_stream(e, is_edges=True)
477
479
  dedup_stream(n, is_edges=False)
480
+
481
+
482
+ def resolve_many(
483
+ col: str,
484
+ entities: Iterable[str],
485
+ datassert: Path,
486
+ taxon: Optional[str] = None,
487
+ prioritize: Optional[list[Categories]] = None,
488
+ avoid: Optional[list[Categories]] = None,
489
+ column_context: bool = True,
490
+ ) -> list[dict[str, Any]]:
491
+ series: pl.Series = pl.Series(col, entities)
492
+ lf: pl.LazyFrame = series.to_frame().lazy()
493
+
494
+ lf = column(lf, add("original ", col), col)
495
+ lf = level_one(lf, col)
496
+ lf = level_two(lf, col)
497
+
498
+ with ExitStack() as stack:
499
+ conns: list[object] = [
500
+ stack.enter_context(duckdb.connect(datassert / "data" / f"{x}.duckdb", read_only=True))
501
+ for x in range(SHARDS)
502
+ ]
503
+
504
+ lf = resolve(lf, col, conns, taxon=taxon, prioritize=prioritize, avoid=avoid, column_context=column_context)
505
+
506
+ df: pl.DataFrame = lf.collect()
507
+ return df.to_dicts()
@@ -2211,7 +2211,7 @@ wheels = [
2211
2211
 
2212
2212
  [[package]]
2213
2213
  name = "tablassert"
2214
- version = "7.2.2"
2214
+ version = "7.3.0"
2215
2215
  source = { editable = "." }
2216
2216
  dependencies = [
2217
2217
  { name = "duckdb" },
@@ -1,6 +0,0 @@
1
- FROM python:3.14-slim
2
-
3
- RUN pip install --no-cache-dir "tablassert[full]"
4
-
5
- ENTRYPOINT ["tablassert"]
6
- CMD ["--help"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes