tablassert 7.3.3__tar.gz → 7.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {tablassert-7.3.3 → tablassert-7.3.5}/CHANGELOG.md +17 -2
  2. {tablassert-7.3.3 → tablassert-7.3.5}/PKG-INFO +9 -7
  3. {tablassert-7.3.3 → tablassert-7.3.5}/README.md +8 -6
  4. {tablassert-7.3.3 → tablassert-7.3.5}/docs/api/fullmap.md +6 -6
  5. {tablassert-7.3.3 → tablassert-7.3.5}/docs/api/lib.md +9 -9
  6. {tablassert-7.3.3 → tablassert-7.3.5}/docs/api/qc.md +3 -3
  7. tablassert-7.3.5/docs/changelog.md +13 -0
  8. {tablassert-7.3.3 → tablassert-7.3.5}/docs/configuration/advanced-example.md +10 -4
  9. {tablassert-7.3.3 → tablassert-7.3.5}/docs/configuration/graph.md +1 -1
  10. {tablassert-7.3.3 → tablassert-7.3.5}/docs/configuration/table.md +70 -58
  11. {tablassert-7.3.3 → tablassert-7.3.5}/docs/datassert.md +7 -7
  12. {tablassert-7.3.3 → tablassert-7.3.5}/docs/docker.md +2 -2
  13. {tablassert-7.3.3 → tablassert-7.3.5}/docs/examples.md +11 -7
  14. {tablassert-7.3.3 → tablassert-7.3.5}/docs/tutorial.md +1 -1
  15. {tablassert-7.3.3 → tablassert-7.3.5}/mkdocs.yml +1 -1
  16. {tablassert-7.3.3 → tablassert-7.3.5}/pyproject.toml +1 -1
  17. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/downloader.py +15 -4
  18. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/models.py +3 -1
  19. {tablassert-7.3.3 → tablassert-7.3.5}/uv.lock +1 -1
  20. {tablassert-7.3.3 → tablassert-7.3.5}/.github/workflows/autotag.yml +0 -0
  21. {tablassert-7.3.3 → tablassert-7.3.5}/.github/workflows/docker.yml +0 -0
  22. {tablassert-7.3.3 → tablassert-7.3.5}/.github/workflows/docs.yml +0 -0
  23. {tablassert-7.3.3 → tablassert-7.3.5}/.github/workflows/pipy.yml +0 -0
  24. {tablassert-7.3.3 → tablassert-7.3.5}/.gitignore +0 -0
  25. {tablassert-7.3.3 → tablassert-7.3.5}/.pre-commit-config.yaml +0 -0
  26. {tablassert-7.3.3 → tablassert-7.3.5}/AGENTS.md +0 -0
  27. {tablassert-7.3.3 → tablassert-7.3.5}/CITATION.cff +0 -0
  28. {tablassert-7.3.3 → tablassert-7.3.5}/CONTRIBUTING.md +0 -0
  29. {tablassert-7.3.3 → tablassert-7.3.5}/Dockerfile +0 -0
  30. {tablassert-7.3.3 → tablassert-7.3.5}/LICENSE +0 -0
  31. {tablassert-7.3.3 → tablassert-7.3.5}/docs/api/utils.md +0 -0
  32. {tablassert-7.3.3 → tablassert-7.3.5}/docs/cli.md +0 -0
  33. {tablassert-7.3.3 → tablassert-7.3.5}/docs/examples/tutorial-data.csv +0 -0
  34. {tablassert-7.3.3 → tablassert-7.3.5}/docs/examples/tutorial-graph.yaml +0 -0
  35. {tablassert-7.3.3 → tablassert-7.3.5}/docs/examples/tutorial-table.yaml +0 -0
  36. {tablassert-7.3.3 → tablassert-7.3.5}/docs/index.md +0 -0
  37. {tablassert-7.3.3 → tablassert-7.3.5}/docs/installation.md +0 -0
  38. {tablassert-7.3.3 → tablassert-7.3.5}/llms.txt +0 -0
  39. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/__init__.py +0 -0
  40. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/cli.py +0 -0
  41. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/enums.py +0 -0
  42. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/fullmap.py +0 -0
  43. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/ingests.py +0 -0
  44. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/lib.py +0 -0
  45. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/log.py +0 -0
  46. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/nlp.py +0 -0
  47. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/qc.py +0 -0
  48. {tablassert-7.3.3 → tablassert-7.3.5}/src/tablassert/utils.py +0 -0
  49. {tablassert-7.3.3 → tablassert-7.3.5}/tests/__init__.py +0 -0
  50. {tablassert-7.3.3 → tablassert-7.3.5}/tests/conftest.py +0 -0
  51. {tablassert-7.3.3 → tablassert-7.3.5}/tests/fixtures/invalid_section_missing_source.yaml +0 -0
  52. {tablassert-7.3.3 → tablassert-7.3.5}/tests/fixtures/minimal_section.yaml +0 -0
  53. {tablassert-7.3.3 → tablassert-7.3.5}/tests/fixtures/minimal_section_with_sections.yaml +0 -0
  54. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_enums.py +0 -0
  55. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_fullmap.py +0 -0
  56. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_ingests.py +0 -0
  57. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_lib.py +0 -0
  58. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_models.py +0 -0
  59. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_nlp.py +0 -0
  60. {tablassert-7.3.3 → tablassert-7.3.5}/tests/test_utils.py +0 -0
@@ -2,13 +2,28 @@
2
2
 
3
3
  All notable changes to this project are documented in this file.
4
4
 
5
+ ## 7.3.5 - 2026-04-29
6
+
7
+ ### Documentation
8
+ - Tightened the table-configuration reference so field requirements, defaults, accepted enum values, row indexing, and column-reference examples match the strict `Section` schema and section-merging behavior implemented in `models.py`, `ingests.py`, and the runtime loader.
9
+
10
+ ## 7.3.4 - 2026-04-28
11
+
12
+ ### Bug Fixes
13
+ - Fixed `downloader.from_url()` failing on URLs that trigger an immediate download. The Playwright session now opens a browser context with `accept_downloads=True`, wraps `page.goto()` inside `page.expect_download()`, and tolerates the expected `net::ERR_ABORTED` navigation error that fires when the response is a download rather than a page.
14
+
15
+ ### Documentation
16
+ - Documented `miscellaneous notes` as a freetext catch-all annotation in the table configuration and advanced-example pages — used for assay caveats, non-standard units, and qualitative observations that don't map cleanly to a structured field. Supports both `method: value` (constant) and `method: column` (per-row).
17
+ - Documented Polars regex constraints for the `regex` and `remove` transforms: patterns are passed to Polars `str.replace_all()` (Rust `regex` crate), so capturing groups (`(...)` / `\1`) and lookarounds (`(?=...)`, `(?<=...)`, `(?!...)`, `(?<!...)`) are not supported and will raise at parse time. Chain simple substitutions instead, or capture residual context in a `miscellaneous notes` annotation.
18
+
5
19
  ## 7.3.3 - 2026-04-08
6
20
 
7
21
  ### Bug Fixes
8
- - Changed datassert shard count from 16 to 12 (`SHARDS` constant in `fullmap.py`) to correspond to the updated datassert database layout.
22
+ - Changed datassert shard count to 10 (`SHARDS` constant in `fullmap.py`) to correspond to the current datassert database layout.
9
23
 
10
24
  ### Documentation
11
- - Updated all shard count references across documentation and examples to reflect the new 12-shard datassert layout.
25
+ - Updated shard count references across documentation and examples to reflect the current 10-shard datassert layout.
26
+ - Corrected provenance examples so `repo` carries the namespace prefix and `publication` carries the repository-local identifier.
12
27
 
13
28
  ## 7.3.2 - 2026-04-03
14
29
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tablassert
3
- Version: 7.3.3
3
+ Version: 7.3.5
4
4
  Summary: Extract knowledge assertions from tabular data into NCATS Translator-compliant KGX NDJSON — declaratively, with entity resolution and quality control built in.
5
5
  Project-URL: Homepage, https://github.com/SkyeAv/Tablassert
6
6
  Project-URL: Source, https://github.com/SkyeAv/Tablassert
@@ -98,14 +98,16 @@ docker run --rm \
98
98
  ```bash
99
99
  # Build a knowledge graph from a YAML configuration
100
100
  $ tablassert build-knowledge-graph graph-config.yaml
101
- ⠋ Loading table configurations...
102
- Resolving entities across 12 DuckDB shards...
103
- Compiling subgraphs...
104
- Deduplicating nodes and edges...
105
- Done — wrote nodes.ndjson and edges.ndjson to .storassert/
101
+ ⠋ Loading Tables...
102
+ Extracting Sections...
103
+ Building TCode...
104
+ Collecting Instructions...
105
+ Building Subgraphs...
106
+ ⠋ Compiling Graph...
107
+ ✓ Finished!
106
108
  ```
107
109
 
108
- Define your entities and relationships in YAML, point tablassert at your data, and get NCATS Translator-compliant KGX NDJSON out the other side — no code required.
110
+ Define your entities and relationships in YAML, point tablassert at your data, and get NCATS Translator-compliant KGX NDJSON out the other side — no code required. Intermediate section artifacts are staged in `.storassert/` during the build.
109
111
 
110
112
  ## Key Features
111
113
 
@@ -46,14 +46,16 @@ docker run --rm \
46
46
  ```bash
47
47
  # Build a knowledge graph from a YAML configuration
48
48
  $ tablassert build-knowledge-graph graph-config.yaml
49
- ⠋ Loading table configurations...
50
- Resolving entities across 12 DuckDB shards...
51
- Compiling subgraphs...
52
- Deduplicating nodes and edges...
53
- Done — wrote nodes.ndjson and edges.ndjson to .storassert/
49
+ ⠋ Loading Tables...
50
+ Extracting Sections...
51
+ Building TCode...
52
+ Collecting Instructions...
53
+ Building Subgraphs...
54
+ ⠋ Compiling Graph...
55
+ ✓ Finished!
54
56
  ```
55
57
 
56
- Define your entities and relationships in YAML, point tablassert at your data, and get NCATS Translator-compliant KGX NDJSON out the other side — no code required.
58
+ Define your entities and relationships in YAML, point tablassert at your data, and get NCATS Translator-compliant KGX NDJSON out the other side — no code required. Intermediate section artifacts are staged in `.storassert/` during the build.
57
59
 
58
60
  ## Key Features
59
61
 
@@ -36,7 +36,7 @@ Column name containing text strings to resolve.
36
36
 
37
37
  **`conns: list[object]`**
38
38
 
39
- List of 12 DuckDB shard connections to the datassert database.
39
+ List of 10 DuckDB shard connections to the datassert database.
40
40
 
41
41
  Each shard contains:
42
42
  - Synonym mappings (text → CURIE)
@@ -97,7 +97,7 @@ Returns a Polars LazyFrame with these columns added:
97
97
  | `{col} taxon` | NCBI Taxon ID | `"NCBITaxon:9606"` |
98
98
  | `{col} source` | Source database | `"HGNC"` |
99
99
  | `{col} source version` | Database version | `"2025-01"` |
100
- | `{col} nlp level` | NLP processing level | `0` or `1` |
100
+ | `{col} nlp level` | NLP processing level | `1` or `2` |
101
101
 
102
102
  ### DuckDB Query
103
103
 
@@ -125,11 +125,11 @@ from tablassert.enums import Categories
125
125
  import duckdb
126
126
  import polars as pl
127
127
 
128
- # Open all 12 shard connections
128
+ # Open all 10 shard connections
129
129
  datassert_dir = "/path/to/datassert"
130
130
  conns = [
131
131
  duckdb.connect(f"{datassert_dir}/data/{i}.duckdb", read_only=True)
132
- for i in range(12)
132
+ for i in range(10)
133
133
  ]
134
134
 
135
135
  # LazyFrame with data to resolve
@@ -167,11 +167,11 @@ from tablassert.fullmap import resolve
167
167
  from tablassert.nlp import level_one, level_two
168
168
  from tablassert.enums import Categories
169
169
 
170
- # Open all 12 shard connections
170
+ # Open all 10 shard connections
171
171
  datassert_dir = "/path/to/datassert"
172
172
  conns = [
173
173
  duckdb.connect(f"{datassert_dir}/data/{i}.duckdb", read_only=True)
174
- for i in range(12)
174
+ for i in range(10)
175
175
  ]
176
176
 
177
177
  # Map a list of gene symbols to CURIEs
@@ -2,11 +2,11 @@
2
2
 
3
3
  The `lib` module exposes `resolve_many()`, a high-level convenience function for resolving an iterable of entity strings to CURIEs without requiring manual LazyFrame construction, NLP preprocessing, or DuckDB shard management.
4
4
 
5
- It wraps the lower-level [`resolve()`](fullmap.md) pipeline — applying `level_one` and `level_two` normalization, opening all 12 DuckDB shard connections, executing entity resolution, and returning results as a plain Python dictionary.
5
+ It wraps the lower-level [`resolve()`](fullmap.md) pipeline — applying `level_one` and `level_two` normalization, opening all 10 DuckDB shard connections, executing entity resolution, and returning results as a plain Python list of row dictionaries.
6
6
 
7
7
  ## resolve_many()
8
8
 
9
- Standalone batch entity resolution function. Accepts a column name, an iterable of text strings, and a path to the datassert database, then returns resolved CURIEs and metadata as a dictionary of lists.
9
+ Standalone batch entity resolution function. Accepts a column name, an iterable of text strings, and a path to the datassert database, then returns resolved CURIEs and metadata as a list of row dictionaries.
10
10
 
11
11
  ### Function Signature
12
12
 
@@ -26,9 +26,9 @@ def resolve_many(
26
26
 
27
27
  **`col: str`**
28
28
 
29
- Column name used internally to label the Polars Series and DataFrame columns during resolution. This name propagates through the NLP and resolution pipeline and determines the keys in the returned dictionary.
29
+ Column name used internally to label the Polars Series and DataFrame columns during resolution. This name propagates through the NLP and resolution pipeline and determines the keys in each returned row dictionary.
30
30
 
31
- For example, if `col="gene"`, the returned dictionary will contain keys like `"gene"`, `"gene name"`, `"gene category"`, etc.
31
+ For example, if `col="gene"`, each returned row dictionary will contain keys like `"gene"`, `"gene name"`, `"gene category"`, etc.
32
32
 
33
33
  **`entities: Iterable[str]`**
34
34
 
@@ -38,7 +38,7 @@ Examples: `["TP53", "BRCA1", "EGFR"]`, `("aspirin", "ibuprofen")`, or a generato
38
38
 
39
39
  **`datassert: Path`**
40
40
 
41
- Filesystem path to the root of the datassert database directory. The function expects a `data/` subdirectory containing 12 DuckDB shard files (`0.duckdb` through `11.duckdb`).
41
+ Filesystem path to the root of the datassert database directory. The function expects a `data/` subdirectory containing 10 DuckDB shard files (`0.duckdb` through `9.duckdb`).
42
42
 
43
43
  Each shard contains:
44
44
  - Synonym mappings (text → CURIE)
@@ -86,7 +86,7 @@ Each dictionary contains the following keys (where `{col}` is the value of the `
86
86
  | `{col} taxon` | NCBI Taxon ID (prefixed) | `"NCBITaxon:9606"` |
87
87
  | `{col} source` | Source database | `"HGNC"` |
88
88
  | `{col} source version` | Database version | `"2025-01"` |
89
- | `{col} nlp level` | NLP processing level used for match | `0` or `1` |
89
+ | `{col} nlp level` | NLP processing level used for match | `1` or `2` |
90
90
 
91
91
  **Important:** Only entities that successfully resolve to a CURIE are included in the output. Unresolved entities are filtered out by `resolve()`. The returned list may therefore be shorter than the input iterable.
92
92
 
@@ -98,7 +98,7 @@ Each dictionary contains the following keys (where `{col}` is the value of the `
98
98
 
99
99
  2. **NLP normalization** — Applies `level_one()` (whitespace stripping + lowercasing) and `level_two()` (non-word character removal via `\W+`) to produce the two normalized columns required by `resolve()`.
100
100
 
101
- 3. **DuckDB connection management** — Opens all 12 shard connections inside a `contextlib.ExitStack`, ensuring every connection is properly closed when resolution completes or if an error occurs.
101
+ 3. **DuckDB connection management** — Opens all 10 shard connections inside a `contextlib.ExitStack`, ensuring every connection is properly closed when resolution completes or if an error occurs.
102
102
 
103
103
  4. **Entity resolution** — Delegates to `fullmap.resolve()` which queries the sharded DuckDB database, ranks matches by category priority, preferred-name exactness, NLP level, and category frequency, then deduplicates to one CURIE per input string.
104
104
 
@@ -224,8 +224,8 @@ Both levels are queried during resolution. Level one (exact case-insensitive mat
224
224
  ### Error Handling
225
225
 
226
226
  - If the `datassert` path does not contain the expected shard files, `duckdb.connect()` will raise an `IOException`.
227
- - If `entities` is empty, the function returns a dictionary with empty lists for all output columns.
228
- - The `ExitStack` ensures all 12 DuckDB connections are closed even if resolution raises an exception.
227
+ - If `entities` is empty, the function returns `[]`.
228
+ - The `ExitStack` ensures all 10 DuckDB connections are closed even if resolution raises an exception.
229
229
  - Unresolved entities are silently filtered from the output (logged at INFO level by default via `resolve()`).
230
230
 
231
231
  ## Integration
@@ -82,7 +82,7 @@ Two fuzzy matching algorithms:
82
82
  1. **Ratio:** Overall string similarity
83
83
  2. **Partial token sort ratio:** Combined token/subsequence matching
84
84
 
85
- **Threshold:** Default 20% similarity (configurable)
85
+ **Threshold:** 20% similarity
86
86
 
87
87
  ```python
88
88
  fuzz.ratio(original, preferred) >= 20
@@ -125,7 +125,7 @@ return similarity >= 0.2
125
125
  - Graph optimization level: ALL
126
126
  - ONNX session caching
127
127
 
128
- Lazy-loaded on first `BERT_audit()` call, then reused for subsequent calls.
128
+ Lazy-loaded on first `fullmap_audit()` call that reaches the embedding stage, then reused for subsequent calls.
129
129
 
130
130
  ### Model Caching
131
131
 
@@ -135,7 +135,7 @@ BioBERT is lazy-loaded on first use and cached globally for the lifetime of the
135
135
  # ? Lazy-loads BioBERT once on first batch audit call, then caches globally
136
136
  ```
137
137
 
138
- **Cache location:** In-memory (global model cache)
138
+ **Cache location:** Downloaded model files are cached on disk in `.onnxassert/`, and the loaded model object is cached in memory for the lifetime of the process.
139
139
 
140
140
  **Cache strategy:** BioBERT model loaded once on first batch audit, then reused globally
141
141
 
@@ -0,0 +1,13 @@
1
+ # Changelog
2
+
3
+ The canonical release history lives in the repository root at [`CHANGELOG.md`](https://github.com/SkyeAv/Tablassert/blob/main/CHANGELOG.md).
4
+
5
+ ## Current Release Notes
6
+
7
+ ## 7.3.5 - 2026-04-29
8
+
9
+ ### Documentation
10
+
11
+ - The table-configuration reference now matches the strict runtime schema and merge behavior, including field defaults, requiredness, accepted enum values, zero-based row indexing, and valid column-reference examples.
12
+
13
+ For older releases and the full project history, open the root `CHANGELOG.md` in the repository.
@@ -66,7 +66,7 @@ template:
66
66
  # Provenance: Publication and curation info
67
67
  provenance:
68
68
  repo: PMC
69
- publication: PMC11708054
69
+ publication: 11708054
70
70
  contributors:
71
71
  - kind: curation
72
72
  name: Skye Lane Goetz
@@ -103,12 +103,16 @@ template:
103
103
  method: value
104
104
  encoding: Spearman correlation
105
105
 
106
- # Descriptive note
106
+ # Freetext catch-all — anything that doesn't map cleanly to a structured
107
+ # annotation (study design caveats, non-standard units, qualitative
108
+ # observations) belongs here rather than being dropped.
107
109
  - annotation: miscellaneous notes
108
110
  method: value
109
111
  encoding: Correlation analysis between microbial composition and 13C-tamoxifen abundance after FDR correction
110
112
  ```
111
113
 
114
+ > **`miscellaneous notes` is a freetext escape hatch.** Use it whenever the source carries context you can't otherwise cleanly encode — assay variants, post-hoc qualifiers, "values are log-transformed", etc. It accepts `method: value` for a constant note across the whole table or `method: column` to pull per-row notes from the source.
115
+
112
116
  ## Key Techniques
113
117
 
114
118
  ### Excel Column References
@@ -143,6 +147,8 @@ The subject field uses three regex transformations in sequence:
143
147
  ```
144
148
  `"Lactobacillus sp"` → `"Lactobacillus sp. "`
145
149
 
150
+ > **Regex constraint:** Each `pattern` is handed to Polars `str.replace_all()` (Rust `regex` crate). **Capturing groups (`(...)` / `\1`) and lookarounds (`(?=...)`, `(?<=...)`, `(?!...)`, `(?<!...)`) are not allowed** and will fail validation. Express transformations as a sequence of simple anchored / character-class substitutions instead — the pipeline above is a deliberate three-step chain because no single capturing-group pattern is permitted. If the transformation can't be expressed without those features, capture the leftover context in a `miscellaneous notes` annotation rather than fighting the regex engine.
151
+
146
152
  ### Taxonomic Filtering
147
153
 
148
154
  Prevent incorrect entity resolution:
@@ -297,7 +303,7 @@ template:
297
303
 
298
304
  provenance:
299
305
  repo: PMC
300
- publication: PMC12345678
306
+ publication: 12345678
301
307
  contributors:
302
308
  - kind: curation
303
309
  name: Skye Lane Goetz
@@ -358,7 +364,7 @@ template:
358
364
 
359
365
  provenance:
360
366
  repo: PMC
361
- publication: PMC87654321
367
+ publication: 87654321
362
368
  contributors:
363
369
  - kind: curation
364
370
  name: Skye Lane Goetz
@@ -60,7 +60,7 @@ See [Table Configuration](table.md) for details.
60
60
 
61
61
  **`datassert: path`**
62
62
 
63
- Path to the [datassert](../datassert.md) directory for entity resolution. Tablassert opens 12 shard files from `datassert/data/{0..11}.duckdb`. This database contains:
63
+ Path to the [datassert](../datassert.md) directory for entity resolution. Tablassert opens 10 shard files from `datassert/data/{0..9}.duckdb`. This database contains:
64
64
  - Synonym mappings (text → CURIE)
65
65
  - Biolink categories
66
66
  - Taxonomic information
@@ -39,14 +39,14 @@ template:
39
39
 
40
40
  sections:
41
41
  - statement: # Section 1: Gene-Disease
42
- subject: {encoding: gene_column}
42
+ subject: {method: column, encoding: A}
43
43
  predicate: associated_with
44
- object: {encoding: disease_column}
44
+ object: {method: column, encoding: B}
45
45
 
46
46
  - statement: # Section 2: Gene-Pathway
47
- subject: {encoding: gene_column}
47
+ subject: {method: column, encoding: A}
48
48
  predicate: participates_in
49
- object: {encoding: pathway_column}
49
+ object: {method: column, encoding: C}
50
50
  ```
51
51
 
52
52
  ### Merge Behavior (fastmerge)
@@ -92,15 +92,15 @@ sections:
92
92
  **Single output:** Template only
93
93
  ```yaml
94
94
  template:
95
- source: {kind: text, local: data.csv}
95
+ source: {kind: text, local: data.csv, url: https://example.com/data.csv}
96
96
  statement: {...}
97
97
  ```
98
98
 
99
99
  **Multiple predicates, same source:**
100
100
  ```yaml
101
101
  template:
102
- source: {kind: excel, local: data.xlsx}
103
- provenance: {publication: PMC123}
102
+ source: {kind: excel, local: data.xlsx, url: https://example.com/data.xlsx}
103
+ provenance: {repo: PMC, publication: 123, contributors: [{name: Example User, date: 27 JAN 2026}]}
104
104
 
105
105
  sections:
106
106
  - statement: {predicate: treats}
@@ -110,14 +110,14 @@ sections:
110
110
  **Multiple columns, shared provenance:**
111
111
  ```yaml
112
112
  template:
113
- source: {kind: text, local: data.csv}
114
- provenance: {publication: PMID456}
113
+ source: {kind: text, local: data.csv, url: https://example.com/data.csv}
114
+ provenance: {repo: PMID, publication: 456, contributors: [{name: Example User, date: 27 JAN 2026}]}
115
115
  statement:
116
- subject: {encoding: gene_symbol}
116
+ subject: {method: column, encoding: A}
117
117
 
118
118
  sections:
119
- - statement: {object: {encoding: column_A}}
120
- - statement: {object: {encoding: column_B}}
119
+ - statement: {object: {method: column, encoding: B}}
120
+ - statement: {object: {method: column, encoding: C}}
121
121
  ```
122
122
 
123
123
  ## Configuration Schema
@@ -126,8 +126,8 @@ sections:
126
126
 
127
127
  | Field | Type | Required | Description |
128
128
  |-------|------|----------|-------------|
129
- | `syntax` | String | Yes | Configuration version (must be `"TC3"`) |
130
- | `status` | String | No | Development status: `"alpha"`, `"beta"`, `"primetime"` |
129
+ | `syntax` | String | No | Configuration version. Defaults to `"TC3"`. |
130
+ | `status` | String | No | Development status. Defaults to `"alpha"`; allowed values are `"alpha"`, `"beta"`, `"primetime"`. |
131
131
 
132
132
  ### Source
133
133
 
@@ -137,12 +137,12 @@ Defines the data file location and format.
137
137
 
138
138
  | Field | Type | Required | Description |
139
139
  |-------|------|----------|-------------|
140
- | `kind` | String | Yes | Must be `"excel"` |
140
+ | `kind` | String | No | Source kind. Model default is `"excel"`, but specify it explicitly in configs. |
141
141
  | `local` | Path | Yes | Local file path for caching |
142
142
  | `url` | URL | Yes | Download URL (HTTP/HTTPS) |
143
- | `sheet` | String | No | Sheet name (default: `"Sheet1"`) |
144
- | `row_slice` | List[Int\|"auto"] | No | Row range: `[start, end]` or `[start, "auto"]` |
145
- | `rows` | List[Int] | No | Specific rows to include |
143
+ | `sheet` | String | No | Sheet name. Defaults to `"Sheet1"`. |
144
+ | `row_slice` | List[Int\|"auto"] | No | Two-value zero-based crop bounds: `[start, stop]`. Each value may be an integer or `"auto"`. |
145
+ | `rows` | List[Int] | No | Zero-based row indices to keep after any `row_slice` crop. |
146
146
  | `reindex` | List[Reindex] | No | Conditional row filtering |
147
147
 
148
148
  **Example:**
@@ -153,7 +153,7 @@ source:
153
153
  url: https://example.com/data.xlsx
154
154
  sheet: "Sheet1"
155
155
  row_slice:
156
- - 2 # Start at row 2 (skip header)
156
+ - 1 # Start at the second physical row
157
157
  - auto # Read to end
158
158
  ```
159
159
 
@@ -161,12 +161,12 @@ source:
161
161
 
162
162
  | Field | Type | Required | Description |
163
163
  |-------|------|----------|-------------|
164
- | `kind` | String | Yes | Must be `"text"` |
164
+ | `kind` | String | No | Source kind. Model default is `"text"`, but specify it explicitly in configs. |
165
165
  | `local` | Path | Yes | Local file path for caching |
166
166
  | `url` | URL | Yes | Download URL |
167
- | `delimiter` | String | No | Column delimiter (default: `","`) |
168
- | `row_slice` | List[Int\|"auto"] | No | Row range |
169
- | `rows` | List[Int] | No | Specific rows |
167
+ | `delimiter` | String | No | Field delimiter. Defaults to `","`. |
168
+ | `row_slice` | List[Int\|"auto"] | No | Two-value zero-based crop bounds: `[start, stop]`. Each value may be an integer or `"auto"`. |
169
+ | `rows` | List[Int] | No | Zero-based row indices to keep after any `row_slice` crop. |
170
170
  | `reindex` | List[Reindex] | No | Conditional filtering |
171
171
 
172
172
  **Example:**
@@ -187,16 +187,16 @@ Filter rows based on column values.
187
187
 
188
188
  | Field | Type | Description |
189
189
  |-------|------|-------------|
190
- | `column` | String | Column name to evaluate |
191
- | `comparison` | String | Operator: `"eq"`, `"ne"`, `"lt"`, `"le"`, `"gt"`, `"ge"` |
190
+ | `column` | String | Source column letters to evaluate (`A`-`ZZZ`) |
191
+ | `comparison` | String | Operator. Defaults to `"ne"`; allowed values are `"eq"`, `"ne"`, `"lt"`, `"le"`, `"gt"`, `"ge"`. |
192
192
  | `comparator` | String\|Int\|Float | Value to compare against |
193
193
 
194
194
  **Example:**
195
195
  ```yaml
196
196
  reindex:
197
- - column: p_value
197
+ - column: C
198
198
  comparison: lt
199
- comparator: 0.05 # Keep rows where p_value < 0.05
199
+ comparator: 0.05 # Keep rows where column C < 0.05
200
200
  ```
201
201
 
202
202
  ### Statement (Triple Definition)
@@ -206,7 +206,7 @@ Defines subject-predicate-object relationships.
206
206
  | Field | Type | Required | Description |
207
207
  |-------|------|----------|-------------|
208
208
  | `subject` | NodeEncoding | Yes | Subject entity configuration |
209
- | `predicate` | String | Yes | Biolink predicate (e.g., `"associated_with"`) |
209
+ | `predicate` | String | No | Biolink predicate. Defaults to `"related_to"`. |
210
210
  | `object` | NodeEncoding | Yes | Object entity configuration |
211
211
  | `qualifiers` | List[Qualifier] | No | Edge qualifiers (context) |
212
212
 
@@ -215,12 +215,12 @@ Defines subject-predicate-object relationships.
215
215
  statement:
216
216
  subject:
217
217
  method: column
218
- encoding: gene_symbol
218
+ encoding: A
219
219
  prioritize: [Gene]
220
220
  predicate: treats
221
221
  object:
222
222
  method: column
223
- encoding: disease_name
223
+ encoding: B
224
224
  prioritize: [Disease]
225
225
  ```
226
226
 
@@ -230,11 +230,11 @@ Defines how to extract and resolve entities.
230
230
 
231
231
  | Field | Type | Required | Description |
232
232
  |-------|------|----------|-------------|
233
- | `method` | String | Yes | `"value"` (literal) or `"column"` (column reference) |
234
- | `encoding` | String\|Int\|Float | Yes | Literal value or column name |
233
+ | `method` | String | No | `"value"` (literal) or `"column"` (source column letters). Defaults to `"value"`. |
234
+ | `encoding` | String\|Int\|Float | Yes | Literal value or source column letters, depending on `method` |
235
235
  | `taxon` | Int | No | NCBI Taxon ID for filtering (e.g., `9606` for human) |
236
- | `prioritize` | List[String] | No | Preferred Biolink categories |
237
- | `avoid` | List[String] | No | Excluded Biolink categories |
236
+ | `prioritize` | List[String] | No | Preferred Biolink categories (must be valid `Categories` enum values such as `Gene`, `Protein`) |
237
+ | `avoid` | List[String] | No | Excluded Biolink categories (must be valid `Categories` enum values) |
238
238
  | `regex` | List[Regex] | No | Pattern replacements |
239
239
  | `fill` | String | No | Null-filling strategy: `"forward"`, `"backward"`, `"min"`, `"max"`, `"mean"`, `"zero"`, `"one"` |
240
240
  | `remove` | List[String] | No | Strings to filter out |
@@ -253,11 +253,12 @@ subject:
253
253
  encoding: CHEBI:41774 # All rows get this CURIE
254
254
  ```
255
255
 
256
- **`method: column`** - Reference a column
256
+ **`method: column`** - Reference a source column
257
257
 
258
- Excel columns use letters converted to `column_N`:
259
- - Column A `column_1` or just `"A"`
260
- - Column B `column_2` or just `"B"`
258
+ Source files are read without headers, so column references are Excel-style letters:
259
+ - Column A -> `"A"`
260
+ - Column B -> `"B"`
261
+ - Column AA -> `"AA"`
261
262
 
262
263
  ```yaml
263
264
  subject:
@@ -265,12 +266,7 @@ subject:
265
266
  encoding: A # Read from column A
266
267
  ```
267
268
 
268
- CSV/TSV columns use header names:
269
- ```yaml
270
- subject:
271
- method: column
272
- encoding: gene_symbol # Read from "gene_symbol" column
273
- ```
269
+ At runtime those letters are converted internally to Polars column names such as `column_1`, but those internal names are not valid configuration values.
274
270
 
275
271
  #### Taxonomic Filtering
276
272
 
@@ -278,7 +274,8 @@ subject:
278
274
 
279
275
  ```yaml
280
276
  subject:
281
- encoding: gene_column
277
+ method: column
278
+ encoding: A
282
279
  taxon: 9606 # Only human genes (Homo sapiens)
283
280
  ```
284
281
 
@@ -305,7 +302,8 @@ If "TP53" maps to both Gene and Protein, prefer Gene.
305
302
 
306
303
  ```yaml
307
304
  subject:
308
- encoding: organism_name
305
+ method: column
306
+ encoding: A
309
307
  prioritize:
310
308
  - OrganismTaxon
311
309
  avoid:
@@ -330,6 +328,8 @@ subject:
330
328
 
331
329
  Executed in order.
332
330
 
331
+ > **Regex dialect:** Patterns are passed directly to Polars `str.replace_all()`, which uses the Rust [`regex`](https://docs.rs/regex/) crate. Only features supported by that engine work — in particular, **capturing groups (`(...)`, `\1`) and lookarounds (`(?=...)`, `(?<=...)`, `(?!...)`, `(?<!...)` are not supported** and will raise an error at parse time. Stick to character classes, anchors (`^`, `$`), quantifiers, alternation (`a|b`), and non-capturing groups (`(?:...)`) if grouping is needed. If a transformation is too complex to express, prefer chaining several simple substitutions or capturing the residual context in a `miscellaneous notes` annotation instead.
332
+
333
333
  **`remove: list[string]`** - Filter out specific strings
334
334
 
335
335
  ```yaml
@@ -339,6 +339,8 @@ subject:
339
339
  - "^NA " # Remove rows starting with "NA "
340
340
  ```
341
341
 
342
+ Same regex constraints apply as the `regex` field — Polars-compatible patterns only, no capturing groups or lookarounds.
343
+
342
344
  **`prefix` / `suffix`** - Add text
343
345
 
344
346
  ```yaml
@@ -362,7 +364,8 @@ Available strategies:
362
364
 
363
365
  ```yaml
364
366
  subject:
365
- encoding: gene_symbol
367
+ method: column
368
+ encoding: A
366
369
  fill: forward # Propagate values down through null rows
367
370
  ```
368
371
 
@@ -370,7 +373,7 @@ subject:
370
373
  annotations:
371
374
  - annotation: expression_level
372
375
  method: column
373
- encoding: expression
376
+ encoding: C
374
377
  fill: mean # Replace nulls with column average
375
378
  ```
376
379
 
@@ -380,7 +383,8 @@ annotations:
380
383
 
381
384
  ```yaml
382
385
  object:
383
- encoding: pathway_list
386
+ method: column
387
+ encoding: B
384
388
  explode_by: ";" # "P1;P2;P3" → 3 separate edges
385
389
  ```
386
390
 
@@ -398,7 +402,7 @@ Add context to edges (anatomical location, species, etc.).
398
402
 
399
403
  | Field | Type | Description |
400
404
  |-------|------|-------------|
401
- | `qualifier` | String | Biolink qualifier (e.g., `"species_context_qualifier"`) |
405
+ | `qualifier` | String | Biolink qualifier from the `Qualifiers` enum (e.g., `"species_context_qualifier"`) |
402
406
  | (inherits NodeEncoding) | | All NodeEncoding fields available |
403
407
 
404
408
  **Example:**
@@ -415,15 +419,15 @@ Required metadata about data source.
415
419
 
416
420
  | Field | Type | Required | Description |
417
421
  |-------|------|----------|-------------|
418
- | `repo` | String | Yes | Repository: `"PMC"`, `"PMID"` |
419
- | `publication` | String | Yes | Identifier (e.g., `"PMC11708054"`, `"PMID123"`) |
422
+ | `repo` | String | No | Repository. Defaults to `"PMC"`; allowed values are `"PMC"`, `"PMID"`. |
423
+ | `publication` | String | Yes | Repository-local identifier appended to `repo:` (e.g., `"11708054"`, `"123"`) |
420
424
  | `contributors` | List[Contributor] | Yes | Curation information |
421
425
 
422
426
  **Contributor fields:**
423
427
 
424
428
  | Field | Type | Required | Description |
425
429
  |-------|------|----------|-------------|
426
- | `kind` | String | Yes | `"curation"`, `"validation"`, `"tool"` |
430
+ | `kind` | String | No | Contributor role. Defaults to `"curation"`; allowed values are `"curation"`, `"validation"`, `"tool"`. |
427
431
  | `name` | String | Yes | Contributor name |
428
432
  | `date` | String | Yes | Date (free format) |
429
433
  | `organizations` | List[String] | No | Affiliations |
@@ -433,7 +437,7 @@ Required metadata about data source.
433
437
  ```yaml
434
438
  provenance:
435
439
  repo: PMC
436
- publication: PMC11708054
440
+ publication: 11708054
437
441
  contributors:
438
442
  - kind: curation
439
443
  name: Skye Lane Goetz
@@ -467,8 +471,16 @@ annotations:
467
471
  - annotation: multiple testing correction method
468
472
  method: value
469
473
  encoding: "Benjamini Hochberg"
474
+
475
+ # Freetext catch-all for context that doesn't fit a structured field —
476
+ # study caveats, units, post-hoc notes, anything you'd otherwise lose.
477
+ - annotation: miscellaneous notes
478
+ method: value
479
+ encoding: "Values are log2 fold-change relative to vehicle control; n=3 biological replicates per arm"
470
480
  ```
471
481
 
482
+ > **Tip:** When source data carries information that can't be cleanly mapped to a structured annotation (assay-specific caveats, non-standard units, qualitative observations), add a `miscellaneous notes` annotation rather than forcing it into another field or dropping it. It accepts both `method: value` (one note for the whole table) and `method: column` (per-row notes from the source).
483
+
472
484
  ## Complete Example
473
485
 
474
486
  Minimal table configuration:
@@ -488,17 +500,17 @@ template:
488
500
  statement:
489
501
  subject:
490
502
  method: column
491
- encoding: gene
503
+ encoding: A
492
504
  prioritize: [Gene]
493
505
  predicate: associated_with
494
506
  object:
495
507
  method: column
496
- encoding: disease
508
+ encoding: B
497
509
  prioritize: [Disease]
498
510
 
499
511
  provenance:
500
512
  repo: PMID
501
- publication: PMID12345678
513
+ publication: 12345678
502
514
  contributors:
503
515
  - kind: curation
504
516
  name: Example User
@@ -507,7 +519,7 @@ template:
507
519
  annotations:
508
520
  - annotation: p value
509
521
  method: column
510
- encoding: p_val
522
+ encoding: C
511
523
  ```
512
524
 
513
525
  ## Next Steps
@@ -33,7 +33,7 @@ The build command automatically downloads BABEL exports from RENCI (`https://sta
33
33
  1. **Download** — BABEL class and synonym files are downloaded from RENCI and split into LZ4-compressed NDJSON chunks under `./datassert/downloads/`.
34
34
  2. **Lookup** — Class files (`*.ndjson.lz4`) are read to build an in-memory equivalent-identifier lookup.
35
35
  3. **Parquet Staging** — Synonym files are processed with the lookup, quality-controlled, and written as sharded Parquet files to `./datassert/parquets/`.
36
- 4. **DuckDB Generation** — Parquet files are loaded into 12 sharded DuckDB databases under `./datassert/data/`.
36
+ 4. **DuckDB Generation** — Parquet files are loaded into 10 sharded DuckDB databases under `./datassert/data/`.
37
37
 
38
38
  ### Examples
39
39
 
@@ -57,11 +57,11 @@ datassert build --use-existing-parquets
57
57
 
58
58
  ## Output Artifacts
59
59
 
60
- - 12 sharded DuckDB databases are written to `./datassert/data/{0..11}.duckdb`.
60
+ - 10 sharded DuckDB databases are written to `./datassert/data/{0..9}.duckdb`.
61
61
  - Each shard contains `SOURCES`, `CATEGORIES`, `CURIES`, and `SYNONYMS` tables, deduplicated, sorted, and indexed for query performance.
62
- - Staging Parquet files are written to `./datassert/parquets/{0..11}/`.
62
+ - Staging Parquet files are written to `./datassert/parquets/{0..9}/`.
63
63
 
64
- Terms are routed to shards deterministically via `xxhash64(term) % 12`, so a given string always hits the same shard.
64
+ Terms are routed to shards deterministically via `xxhash64(term) % 10`, so a given string always hits the same shard.
65
65
 
66
66
  ### Schema
67
67
 
@@ -76,14 +76,14 @@ Each shard contains four tables:
76
76
 
77
77
  ## Usage in Graph Config
78
78
 
79
- The `datassert:` field in a GC2 graph configuration points to the directory containing the shards. Tablassert opens all 12 shards at startup and passes the connections to `resolve()`.
79
+ The `datassert:` field in a GC2 graph configuration points to the directory containing the shards. Tablassert opens all 10 shards at startup and passes the connections to `resolve()`.
80
80
 
81
81
  ```yaml
82
82
  # graph-config.yaml (GC2)
83
83
  syntax: GC2
84
84
  name: my-graph
85
85
  version: "1.0"
86
- datassert: /path/to/datassert/ # directory containing data/0..11.duckdb
86
+ datassert: /path/to/datassert/ # directory containing data/0..9.duckdb
87
87
  tables:
88
88
  - ./TABLE/my-table.yaml
89
89
  ```
@@ -99,7 +99,7 @@ from tablassert.fullmap import resolve
99
99
  datassert_dir = "/path/to/datassert"
100
100
  conns = [
101
101
  duckdb.connect(f"{datassert_dir}/data/{i}.duckdb", read_only=True)
102
- for i in range(12)
102
+ for i in range(10)
103
103
  ]
104
104
  ```
105
105
 
@@ -81,8 +81,8 @@ docker run --rm \
81
81
 
82
82
  - **Datassert path** — The graph configuration YAML specifies the `datassert` path for the entity-resolution database. Ensure it is accessible inside the container.
83
83
  - **Multiprocessing** — `src/tablassert/cli.py:63` uses `multiprocessing.Pool` for parallel table loading and section extraction.
84
- - **DuckDB connections** — An `ExitStack` at `src/tablassert/cli.py:81` opens read-only connections to all 12 Datassert DuckDB shards concurrently.
85
- - **Entity resolution** — The `fullmap` module (`src/tablassert/fullmap.py`) shards terms across 12 DuckDB shards (`SHARDS = 12`) using xxhash64.
84
+ - **DuckDB connections** — An `ExitStack` at `src/tablassert/cli.py:81` opens read-only connections to all 10 Datassert DuckDB shards concurrently.
85
+ - **Entity resolution** — The `fullmap` module (`src/tablassert/fullmap.py`) shards terms across 10 DuckDB shards (`SHARDS = 10`) using xxhash64.
86
86
  - **Text normalization** — `src/tablassert/nlp.py` provides `level_one` (strip + lowercase) and `level_two` (regex-based cleanup).
87
87
 
88
88
  ## CI/CD Integration
@@ -33,7 +33,7 @@ template:
33
33
  - Disease
34
34
  provenance:
35
35
  repo: PMID
36
- publication: PMID12345678
36
+ publication: 12345678
37
37
  contributors:
38
38
  - kind: curation
39
39
  name: Your Name
@@ -85,7 +85,7 @@ template:
85
85
  taxon: 9606
86
86
  provenance:
87
87
  repo: PMID
88
- publication: PMID98765432
88
+ publication: 98765432
89
89
  contributors:
90
90
  - kind: curation
91
91
  name: Your Name
@@ -146,7 +146,7 @@ template:
146
146
  encoding: CHEBI:41774
147
147
  provenance:
148
148
  repo: PMC
149
- publication: PMC11708054
149
+ publication: 11708054
150
150
  contributors:
151
151
  - kind: curation
152
152
  name: Your Name
@@ -161,11 +161,15 @@ template:
161
161
  - annotation: assertion method
162
162
  method: value
163
163
  encoding: "Spearman correlation"
164
+ # Freetext catch-all for context that doesn't fit a structured field.
165
+ - annotation: miscellaneous notes
166
+ method: value
167
+ encoding: "FDR-corrected; samples pooled across two cohorts"
164
168
  ```
165
169
 
166
170
  **Key techniques:**
167
171
 
168
- - **Regex pipeline** cleans raw taxonomic strings (e.g., `d__Bacteria;p__Firmicutes;g__Lactobacillus` → `Lactobacillus`)
172
+ - **Regex pipeline** cleans raw taxonomic strings (e.g., `d__Bacteria;p__Firmicutes;g__Lactobacillus` → `Lactobacillus`). Patterns must be Polars `str.replace_all()`-compatible — no capturing groups (`(...)` / `\1`) and no lookarounds (`(?=...)`, `(?<=...)`, `(?!...)`, `(?<!...)`). Chain several simple substitutions instead.
169
173
  - **Avoid list** (`avoid: [Gene]`) prevents organism names from resolving to gene entities
170
174
  - **Fixed-value object** (`method: value`) assigns the same metabolite CURIE to all rows
171
175
  - **Excel source** with sheet name and row slicing
@@ -199,7 +203,7 @@ template:
199
203
  encoding: PLACEHOLDER
200
204
  provenance:
201
205
  repo: PMID
202
- publication: PMID11223344
206
+ publication: 11223344
203
207
  contributors:
204
208
  - kind: curation
205
209
  name: Your Name
@@ -277,7 +281,7 @@ template:
277
281
  - Disease
278
282
  provenance:
279
283
  repo: PMID
280
- publication: PMID55667788
284
+ publication: 55667788
281
285
  contributors:
282
286
  - kind: curation
283
287
  name: Your Name
@@ -330,7 +334,7 @@ template:
330
334
  - ChemicalEntity
331
335
  provenance:
332
336
  repo: PMID
333
- publication: PMID99887766
337
+ publication: 99887766
334
338
  contributors:
335
339
  - kind: curation
336
340
  name: Your Name
@@ -68,7 +68,7 @@ template:
68
68
  - Disease
69
69
  provenance:
70
70
  repo: PMID
71
- publication: PMID12345678
71
+ publication: 12345678
72
72
  contributors:
73
73
  - kind: curation
74
74
  name: Tutorial Example
@@ -17,4 +17,4 @@ nav:
17
17
  - Batch Resolution: api/lib.md
18
18
  - Quality Control: api/qc.md
19
19
  - Utilities: api/utils.md
20
- - Changelog: ../CHANGELOG.md
20
+ - Changelog: changelog.md
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tablassert"
3
- version = "7.3.3"
3
+ version = "7.3.5"
4
4
  description = "Extract knowledge assertions from tabular data into NCATS Translator-compliant KGX NDJSON — declaratively, with entity resolution and quality control built in."
5
5
  authors = [
6
6
  { name = "Skye Lane Goetz", email = "sgoetz@isbscience.org" }
@@ -29,13 +29,24 @@ def from_url(website: str, p: Path, timeout: int = 60_000, retries: int = 3) ->
29
29
  try:
30
30
  with sync_playwright() as pw:
31
31
  browser = pw.chromium.launch(headless=True)
32
- page = browser.new_page()
33
- page.goto(website, wait_until="networkidle", timeout=timeout)
32
+ context = browser.new_context(accept_downloads=True)
33
+
34
+ page = context.new_page()
34
35
  with page.expect_download(timeout=timeout) as info:
35
- download = info.value
36
- download.save_as(p)
36
+ try:
37
+ page.goto(website, wait_until="load", timeout=timeout)
38
+ except Exception as e:
39
+ if "net::ERR_ABORTED" not in str(e):
40
+ raise
41
+
42
+ download = info.value
43
+ download.save_as(p)
44
+
45
+ context.close()
37
46
  browser.close()
47
+
38
48
  return p
49
+
39
50
  except Exception as e:
40
51
  last = e
41
52
  if attempt < retries - 1:
@@ -33,7 +33,9 @@ class TablaBase(BaseModel):
33
33
 
34
34
 
35
35
  class Reindex(TablaBase):
36
- column: str = Field(..., description="Source column letters used for row filtering.", examples=["A", "AA"])
36
+ column: str = Field(
37
+ ..., pattern=r"^[A-Z]{1,3}$", description="Source column letters used for row filtering.", examples=["A", "AA"]
38
+ )
37
39
  comparison: Comparisons = Field(
38
40
  Comparisons.NE,
39
41
  description="Comparison operator used in reindex filtering.",
@@ -2211,7 +2211,7 @@ wheels = [
2211
2211
 
2212
2212
  [[package]]
2213
2213
  name = "tablassert"
2214
- version = "7.3.3"
2214
+ version = "7.3.5"
2215
2215
  source = { editable = "." }
2216
2216
  dependencies = [
2217
2217
  { name = "duckdb" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes