tablassist 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tablassist-0.1.0/.gitignore +10 -0
- tablassist-0.1.0/PKG-INFO +159 -0
- tablassist-0.1.0/README.md +121 -0
- tablassist-0.1.0/pyproject.toml +99 -0
- tablassist-0.1.0/src/tablassist/__init__.py +0 -0
- tablassist-0.1.0/src/tablassist/cli.py +268 -0
- tablassist-0.1.0/src/tablassist/utils.py +49 -0
- tablassist-0.1.0/test/fixtures/invalid-config.yaml +10 -0
- tablassist-0.1.0/test/fixtures/preview.csv +3 -0
- tablassist-0.1.0/test/fixtures/valid-config.yaml +15 -0
- tablassist-0.1.0/test/test_cli.py +36 -0
- tablassist-0.1.0/uv.lock +2673 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tablassist
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-assisted table configuration generation for Tablassert — entity resolution, YAML validation, and Biolink documentation lookup.
|
|
5
|
+
Project-URL: Homepage, https://github.com/SkyeAv/Tablassist
|
|
6
|
+
Project-URL: Source, https://github.com/SkyeAv/Tablassist
|
|
7
|
+
Author-email: Skye Lane Goetz <sgoetz@isbscience.org>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
Keywords: bioinformatics,biolink,data quality control,entity resolution,knowledge graph,ncats translator,tablassert,tablassist,table mining,yaml configuration
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Framework :: Pydantic
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Healthcare Industry
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
21
|
+
Requires-Python: >=3.13
|
|
22
|
+
Requires-Dist: cyclopts>=4.10.1
|
|
23
|
+
Requires-Dist: fastexcel>=0.19.0
|
|
24
|
+
Requires-Dist: httpx>=0.28.1
|
|
25
|
+
Requires-Dist: polars>=1.39.3
|
|
26
|
+
Requires-Dist: pydantic>=2.12.5
|
|
27
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
28
|
+
Requires-Dist: tablassert>=7.2.1
|
|
29
|
+
Requires-Dist: textract>=1.6.5
|
|
30
|
+
Requires-Dist: trafilatura>=2.0.0
|
|
31
|
+
Provides-Extra: rt
|
|
32
|
+
Requires-Dist: polars[rtcompat]>=1.39.0; extra == 'rt'
|
|
33
|
+
Requires-Dist: tablassert[rtcompat]>=7.2.1; extra == 'rt'
|
|
34
|
+
Provides-Extra: rtcompat
|
|
35
|
+
Requires-Dist: polars[rtcompat]>=1.39.0; extra == 'rtcompat'
|
|
36
|
+
Requires-Dist: tablassert[rtcompat]>=7.2.1; extra == 'rtcompat'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# Tablassist CLI
|
|
40
|
+
|
|
41
|
+
[](https://pypi.org/project/tablassist/)
|
|
42
|
+
[](https://pypi.org/project/tablassist/)
|
|
43
|
+
[](https://github.com/SkyeAv/Tablassist/blob/master/LICENSE)
|
|
44
|
+
|
|
45
|
+
Python CLI tool for AI-assisted [Tablassert](https://github.com/SkyeAv/Tablassert) table configuration generation — entity resolution, YAML validation, and Biolink documentation lookup.
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install tablassist
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
An optional extra is available for CPU compatibility:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install "tablassist[rtcompat]" # Polars build for CPUs without required instructions
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Requirements
|
|
60
|
+
|
|
61
|
+
- Python >= 3.13
|
|
62
|
+
- Environment variables `TABLASSIST_USERNAME` and `TABLASSIST_API_KEY` for API-accessing commands
|
|
63
|
+
|
|
64
|
+
## Usage
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Fetch table configuration documentation
|
|
68
|
+
tablassist docs-table-config
|
|
69
|
+
|
|
70
|
+
# Fetch advanced configuration examples
|
|
71
|
+
tablassist docs-advanced-examples
|
|
72
|
+
|
|
73
|
+
# Fetch the CLI tutorial
|
|
74
|
+
tablassist docs-tutorial
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Entity Resolution
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# Search for entity CURIEs by term
|
|
81
|
+
tablassist search-curies "breast cancer"
|
|
82
|
+
|
|
83
|
+
# Get canonical info for a specific CURIE
|
|
84
|
+
tablassist get-curie-info "MONDO:0007254"
|
|
85
|
+
|
|
86
|
+
# Search gene CURIEs within an NCBI taxon
|
|
87
|
+
tablassist search-gene-curies "BRCA1" --ncbi-taxon 9606
|
|
88
|
+
|
|
89
|
+
# Resolve an NCBI Taxon ID from an organism name
|
|
90
|
+
tablassist resolve-taxon-id "Homo sapiens"
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Biolink Reference
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# List all supported categories, predicates, or qualifiers
|
|
97
|
+
tablassist list-categories
|
|
98
|
+
tablassist list-predicates
|
|
99
|
+
tablassist list-qualifiers
|
|
100
|
+
|
|
101
|
+
# Fetch documentation for a specific Biolink element
|
|
102
|
+
tablassist docs-category "Gene"
|
|
103
|
+
tablassist docs-predicate "interacts_with"
|
|
104
|
+
tablassist docs-qualifier "qualified_predicate"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### YAML Validation
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Validate a full config file
|
|
111
|
+
tablassist validate-config-file config.yaml
|
|
112
|
+
|
|
113
|
+
# Validate a single section from a YAML string
|
|
114
|
+
tablassist validate-section-str '<yaml>'
|
|
115
|
+
|
|
116
|
+
# Validate a full config from a YAML string
|
|
117
|
+
tablassist validate-config-str '<yaml>'
|
|
118
|
+
|
|
119
|
+
# Get the Section JSON schema
|
|
120
|
+
tablassist section-schema
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Data Preview
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# List sheets in an Excel file
|
|
127
|
+
tablassist excel-sheets data.xlsx
|
|
128
|
+
|
|
129
|
+
# Preview rows from an Excel sheet
|
|
130
|
+
tablassist preview-excel data.xlsx "Sheet1" 10
|
|
131
|
+
|
|
132
|
+
# Preview rows from a CSV file
|
|
133
|
+
tablassist preview-csv data.csv 10
|
|
134
|
+
|
|
135
|
+
# Extract text from a document (PDF, DOCX, etc.)
|
|
136
|
+
tablassist extract-text document.pdf
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### PMC Archive Download
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# Download and extract a PMC tar archive
|
|
143
|
+
tablassist download-pmc-tar 12345 --dest-dir ./output
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Development
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
uv sync # install dependencies
|
|
150
|
+
uv run ruff check . # lint
|
|
151
|
+
uv run ruff check --fix . # lint with auto-fix
|
|
152
|
+
uv run ruff format . # format
|
|
153
|
+
uv run pyright # type check
|
|
154
|
+
uv run pytest # run all tests
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
[Apache License 2.0](../LICENSE)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Tablassist CLI
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/tablassist/)
|
|
4
|
+
[](https://pypi.org/project/tablassist/)
|
|
5
|
+
[](https://github.com/SkyeAv/Tablassist/blob/master/LICENSE)
|
|
6
|
+
|
|
7
|
+
Python CLI tool for AI-assisted [Tablassert](https://github.com/SkyeAv/Tablassert) table configuration generation — entity resolution, YAML validation, and Biolink documentation lookup.
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install tablassist
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
An optional extra is available for CPU compatibility:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install "tablassist[rtcompat]" # Polars build for CPUs without required instructions
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Requirements
|
|
22
|
+
|
|
23
|
+
- Python >= 3.13
|
|
24
|
+
- Environment variables `TABLASSIST_USERNAME` and `TABLASSIST_API_KEY` for API-accessing commands
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Fetch table configuration documentation
|
|
30
|
+
tablassist docs-table-config
|
|
31
|
+
|
|
32
|
+
# Fetch advanced configuration examples
|
|
33
|
+
tablassist docs-advanced-examples
|
|
34
|
+
|
|
35
|
+
# Fetch the CLI tutorial
|
|
36
|
+
tablassist docs-tutorial
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Entity Resolution
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Search for entity CURIEs by term
|
|
43
|
+
tablassist search-curies "breast cancer"
|
|
44
|
+
|
|
45
|
+
# Get canonical info for a specific CURIE
|
|
46
|
+
tablassist get-curie-info "MONDO:0007254"
|
|
47
|
+
|
|
48
|
+
# Search gene CURIEs within an NCBI taxon
|
|
49
|
+
tablassist search-gene-curies "BRCA1" --ncbi-taxon 9606
|
|
50
|
+
|
|
51
|
+
# Resolve an NCBI Taxon ID from an organism name
|
|
52
|
+
tablassist resolve-taxon-id "Homo sapiens"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Biolink Reference
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# List all supported categories, predicates, or qualifiers
|
|
59
|
+
tablassist list-categories
|
|
60
|
+
tablassist list-predicates
|
|
61
|
+
tablassist list-qualifiers
|
|
62
|
+
|
|
63
|
+
# Fetch documentation for a specific Biolink element
|
|
64
|
+
tablassist docs-category "Gene"
|
|
65
|
+
tablassist docs-predicate "interacts_with"
|
|
66
|
+
tablassist docs-qualifier "qualified_predicate"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### YAML Validation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# Validate a full config file
|
|
73
|
+
tablassist validate-config-file config.yaml
|
|
74
|
+
|
|
75
|
+
# Validate a single section from a YAML string
|
|
76
|
+
tablassist validate-section-str '<yaml>'
|
|
77
|
+
|
|
78
|
+
# Validate a full config from a YAML string
|
|
79
|
+
tablassist validate-config-str '<yaml>'
|
|
80
|
+
|
|
81
|
+
# Get the Section JSON schema
|
|
82
|
+
tablassist section-schema
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Data Preview
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# List sheets in an Excel file
|
|
89
|
+
tablassist excel-sheets data.xlsx
|
|
90
|
+
|
|
91
|
+
# Preview rows from an Excel sheet
|
|
92
|
+
tablassist preview-excel data.xlsx "Sheet1" 10
|
|
93
|
+
|
|
94
|
+
# Preview rows from a CSV file
|
|
95
|
+
tablassist preview-csv data.csv 10
|
|
96
|
+
|
|
97
|
+
# Extract text from a document (PDF, DOCX, etc.)
|
|
98
|
+
tablassist extract-text document.pdf
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### PMC Archive Download
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
# Download and extract a PMC tar archive
|
|
105
|
+
tablassist download-pmc-tar 12345 --dest-dir ./output
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Development
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
uv sync # install dependencies
|
|
112
|
+
uv run ruff check . # lint
|
|
113
|
+
uv run ruff check --fix . # lint with auto-fix
|
|
114
|
+
uv run ruff format . # format
|
|
115
|
+
uv run pyright # type check
|
|
116
|
+
uv run pytest # run all tests
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## License
|
|
120
|
+
|
|
121
|
+
[Apache License 2.0](../LICENSE)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "tablassist"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "AI-assisted table configuration generation for Tablassert — entity resolution, YAML validation, and Biolink documentation lookup."
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Skye Lane Goetz", email = "sgoetz@isbscience.org" },
|
|
7
|
+
]
|
|
8
|
+
keywords = [
|
|
9
|
+
"knowledge graph",
|
|
10
|
+
"bioinformatics",
|
|
11
|
+
"entity resolution",
|
|
12
|
+
"ncats translator",
|
|
13
|
+
"yaml configuration",
|
|
14
|
+
"table mining",
|
|
15
|
+
"tablassert",
|
|
16
|
+
"tablassist",
|
|
17
|
+
"biolink",
|
|
18
|
+
"data quality control",
|
|
19
|
+
]
|
|
20
|
+
readme = "README.md"
|
|
21
|
+
license = "Apache-2.0"
|
|
22
|
+
classifiers = [
|
|
23
|
+
"License :: OSI Approved :: Apache Software License",
|
|
24
|
+
"Development Status :: 4 - Beta",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"Intended Audience :: Healthcare Industry",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Framework :: Pydantic",
|
|
32
|
+
"Operating System :: OS Independent",
|
|
33
|
+
"Environment :: Console",
|
|
34
|
+
]
|
|
35
|
+
requires-python = ">=3.13"
|
|
36
|
+
dependencies = [
|
|
37
|
+
"cyclopts>=4.10.1",
|
|
38
|
+
"fastexcel>=0.19.0",
|
|
39
|
+
"httpx>=0.28.1",
|
|
40
|
+
"polars>=1.39.3",
|
|
41
|
+
"pydantic>=2.12.5",
|
|
42
|
+
"pyyaml>=6.0.3",
|
|
43
|
+
"tablassert>=7.2.1",
|
|
44
|
+
"textract>=1.6.5",
|
|
45
|
+
"trafilatura>=2.0.0",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
[project.urls]
|
|
49
|
+
Homepage = "https://github.com/SkyeAv/Tablassist"
|
|
50
|
+
Source = "https://github.com/SkyeAv/Tablassist"
|
|
51
|
+
|
|
52
|
+
[build-system]
|
|
53
|
+
requires = [
|
|
54
|
+
"hatchling",
|
|
55
|
+
]
|
|
56
|
+
build-backend = "hatchling.build"
|
|
57
|
+
|
|
58
|
+
[tool.hatch.build.targets.wheel]
|
|
59
|
+
packages = [
|
|
60
|
+
"./src/tablassist",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
[project.scripts]
|
|
64
|
+
tablassist = "tablassist.cli:serve"
|
|
65
|
+
|
|
66
|
+
[project.optional-dependencies]
|
|
67
|
+
rtcompat = [
|
|
68
|
+
"tablassert[rtcompat]>=7.2.1",
|
|
69
|
+
"polars[rtcompat]>=1.39.0",
|
|
70
|
+
]
|
|
71
|
+
rt = [
|
|
72
|
+
"tablassist[rtcompat]",
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
[tool.uv]
|
|
76
|
+
override-dependencies = [
|
|
77
|
+
"six>=1.16.0",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
[dependency-groups]
|
|
81
|
+
dev = [
|
|
82
|
+
"mkdocs>=1.6.1",
|
|
83
|
+
"pyright>=1.1.408",
|
|
84
|
+
"pytest>=9.0.2",
|
|
85
|
+
"ruff>=0.15.6",
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
[tool.pytest.ini_options]
|
|
89
|
+
testpaths = ["test"]
|
|
90
|
+
|
|
91
|
+
[tool.ruff]
|
|
92
|
+
line-length = 120
|
|
93
|
+
indent-width = 4
|
|
94
|
+
target-version = "py313"
|
|
95
|
+
|
|
96
|
+
[tool.ruff.format]
|
|
97
|
+
quote-style = "double"
|
|
98
|
+
indent-style = "space"
|
|
99
|
+
skip-magic-trailing-comma = true
|
|
File without changes
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Literal, Optional, Union
|
|
6
|
+
|
|
7
|
+
import fastexcel
|
|
8
|
+
import httpx
|
|
9
|
+
import polars as pl
|
|
10
|
+
import textract
|
|
11
|
+
import yaml
|
|
12
|
+
from cyclopts import App
|
|
13
|
+
from tablassert.enums import Categories, Predicates, Qualifiers
|
|
14
|
+
from tablassert.ingests import from_yaml
|
|
15
|
+
from tablassert.models import Section
|
|
16
|
+
|
|
17
|
+
from tablassist.utils import (
|
|
18
|
+
get_biolink_html_documentation,
|
|
19
|
+
get_json_response,
|
|
20
|
+
get_static_content,
|
|
21
|
+
parse_yaml_string,
|
|
22
|
+
validate_section,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
CLI: App = App()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@CLI.command
|
|
29
|
+
def docs_table_config() -> str:
|
|
30
|
+
"""Fetch Tablassert table configuration spec documentation."""
|
|
31
|
+
url: str = "https://raw.githubusercontent.com/SkyeAv/Tablassert/main/docs/configuration/table.md"
|
|
32
|
+
return get_static_content(url)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@CLI.command
|
|
36
|
+
def docs_advanced_examples() -> str:
|
|
37
|
+
"""Fetch advanced table configuration examples documentation."""
|
|
38
|
+
url: str = "https://raw.githubusercontent.com/SkyeAv/Tablassert/main/docs/configuration/advanced-example.md"
|
|
39
|
+
return get_static_content(url)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@CLI.command
|
|
43
|
+
def docs_tutorial() -> str:
|
|
44
|
+
"""Fetch Tablassert CLI tutorial documentation."""
|
|
45
|
+
url: str = "https://raw.githubusercontent.com/SkyeAv/Tablassert/blob/main/docs/tutorial.md"
|
|
46
|
+
return get_static_content(url)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@CLI.command
|
|
50
|
+
def example_no_sections() -> str:
|
|
51
|
+
"""Fetch a production YAML config example without sections."""
|
|
52
|
+
url: str = (
|
|
53
|
+
"https://raw.githubusercontent.com/glusman-team/MOKGConfiguration/refs/heads/master/TABLE/MBKG/ALAM1.yaml"
|
|
54
|
+
)
|
|
55
|
+
return get_static_content(url)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@CLI.command
|
|
59
|
+
def example_with_sections() -> str:
|
|
60
|
+
"""Fetch a production YAML config example with sections."""
|
|
61
|
+
url: str = (
|
|
62
|
+
"https://raw.githubusercontent.com/glusman-team/MOKGConfiguration/refs/heads/master/TABLE/MBKG/BLANTON1.yaml"
|
|
63
|
+
)
|
|
64
|
+
return get_static_content(url)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
TABLASSIST_USERNAME: str = os.environ.get("TABLASSIST_USERNAME", "")
|
|
68
|
+
TABLASSIST_API_KEY: str = os.environ.get("TABLASSIST_API_KEY", "")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@CLI.command
|
|
72
|
+
def search_curies(term: str) -> Union[list[Any], dict[str, Any]]:
|
|
73
|
+
"""Search CURIE candidates by term via Configurator API."""
|
|
74
|
+
url: str = "https://hypatia.systemsbiology.net/configurator-api/search-for-curies"
|
|
75
|
+
params: dict[str, Any] = {"username": TABLASSIST_USERNAME, "api-key": TABLASSIST_API_KEY, "term": term}
|
|
76
|
+
|
|
77
|
+
return get_json_response(url, params)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@CLI.command
|
|
81
|
+
def get_curie_info(curie: str) -> Union[list[Any], dict[str, Any]]:
|
|
82
|
+
"""Resolve a single canonical CURIE record."""
|
|
83
|
+
url: str = "https://hypatia.systemsbiology.net/configurator-api/get-canonical-curie-info"
|
|
84
|
+
params: dict[str, Any] = {"username": TABLASSIST_USERNAME, "api-key": TABLASSIST_API_KEY, "curie": curie}
|
|
85
|
+
|
|
86
|
+
return get_json_response(url, params)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@CLI.command
|
|
90
|
+
def download_pmc_tar(pmc_id: int, dest_dir: Path = Path(".")) -> dict[str, Any]:
|
|
91
|
+
"""Download and extract a PMC tar archive by PMC ID."""
|
|
92
|
+
url: str = "https://hypatia.systemsbiology.net/configurator-api/download-from-pmc-tars"
|
|
93
|
+
|
|
94
|
+
params: dict[str, Any] = {"username": TABLASSIST_USERNAME, "api-key": TABLASSIST_API_KEY, "pmc-id": pmc_id}
|
|
95
|
+
|
|
96
|
+
with httpx.stream("GET", url, params=params) as r:
|
|
97
|
+
if r.status_code == 404:
|
|
98
|
+
error: dict[str, Any] = r.json()
|
|
99
|
+
return error
|
|
100
|
+
|
|
101
|
+
d: str = r.headers["content-disposition"]
|
|
102
|
+
matches: object = re.search(r"filename=(.+)", d)
|
|
103
|
+
|
|
104
|
+
filename: str = matches.group(1) if matches else "download.tar.xz"
|
|
105
|
+
p: Path = dest_dir / filename
|
|
106
|
+
with p.open("wb") as f:
|
|
107
|
+
for chunk in r.iter_bytes():
|
|
108
|
+
f.write(chunk)
|
|
109
|
+
|
|
110
|
+
cmd: list[str] = ["tar", "-xvf", f"{p}", "&&", "ls", "-lh", f"{dest_dir}"]
|
|
111
|
+
r: Any = subprocess.run(cmd, shell=True, capture_output=True, text=True)
|
|
112
|
+
|
|
113
|
+
return {"status": "ok", "stdout": r.stdout, "stderr": r.stderr}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@CLI.command
|
|
117
|
+
def search_gene_curies(term: str, ncbi_taxon: int = 9606) -> Union[list[Any], dict[str, Any]]:
|
|
118
|
+
"""Search gene CURIE candidates by term within an NCBI taxon."""
|
|
119
|
+
url: str = "https://hypatia.systemsbiology.net/configurator-api/search-for-gene-curies-in-ncbi-taxon"
|
|
120
|
+
params: dict[str, Any] = {
|
|
121
|
+
"username": TABLASSIST_USERNAME,
|
|
122
|
+
"api-key": TABLASSIST_API_KEY,
|
|
123
|
+
"term": term,
|
|
124
|
+
"ncbi-taxon-id": ncbi_taxon,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return get_json_response(url, params)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@CLI.command
|
|
131
|
+
def resolve_taxon_id(organism_name: str) -> Union[list[Any], dict[str, Any]]:
|
|
132
|
+
"""Resolve an NCBI Taxon ID from an organism name."""
|
|
133
|
+
url: str = "https://hypatia.systemsbiology.net/configurator-api/get-ncbi-taxon-id-from-organism-name"
|
|
134
|
+
params: dict[str, Any] = {
|
|
135
|
+
"username": TABLASSIST_USERNAME,
|
|
136
|
+
"api-key": TABLASSIST_API_KEY,
|
|
137
|
+
"organism-name": organism_name,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return get_json_response(url, params)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@CLI.command
|
|
144
|
+
def list_categories() -> list[str]:
|
|
145
|
+
"""List all supported Biolink categories."""
|
|
146
|
+
return [x.value for x in Categories]
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@CLI.command
|
|
150
|
+
def list_predicates() -> list[str]:
|
|
151
|
+
"""List all supported Biolink predicates."""
|
|
152
|
+
return [x.value for x in Predicates]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@CLI.command
|
|
156
|
+
def list_qualifiers() -> list[str]:
|
|
157
|
+
"""List all supported Biolink qualifiers."""
|
|
158
|
+
return [x.value for x in Qualifiers]
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@CLI.command
|
|
162
|
+
def section_schema() -> dict[str, Any]:
|
|
163
|
+
"""Return the Section Pydantic model as JSON schema."""
|
|
164
|
+
return Section.model_json_schema()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@CLI.command
|
|
168
|
+
def validate_section_str(yaml_string: str) -> dict[str, Any]:
|
|
169
|
+
"""Validate a single YAML table configuration section from a string."""
|
|
170
|
+
raw: Any = parse_yaml_string(yaml_string)
|
|
171
|
+
if isinstance(raw, dict) and "error" in raw:
|
|
172
|
+
return raw
|
|
173
|
+
|
|
174
|
+
return validate_section(raw)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@CLI.command
|
|
178
|
+
def validate_config_str(yaml_string: str) -> Union[dict[str, Any], list[dict[str, Any]]]:
|
|
179
|
+
"""Validate a full YAML table configuration from a string."""
|
|
180
|
+
raw: Any = parse_yaml_string(yaml_string)
|
|
181
|
+
if isinstance(raw, dict) and "error" in raw:
|
|
182
|
+
return raw
|
|
183
|
+
|
|
184
|
+
sections: list[dict[str, Any]] = raw if isinstance(raw, list) else [raw]
|
|
185
|
+
|
|
186
|
+
errors: list[dict[str, Any]] = []
|
|
187
|
+
for s in sections:
|
|
188
|
+
errors += [validate_section(s)]
|
|
189
|
+
|
|
190
|
+
return errors
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@CLI.command
|
|
194
|
+
def validate_config_file(yaml_file: Path) -> Union[dict[str, Any], list[dict[str, Any]]]:
|
|
195
|
+
"""Validate a full YAML table configuration from a file path."""
|
|
196
|
+
try:
|
|
197
|
+
raw: Any = from_yaml(yaml_file)
|
|
198
|
+
except yaml.scanner.ScannerError as e: # pyright: ignore
|
|
199
|
+
return {"error": f"YAML Syntax error at line {e.problem_mark.line + 1}: {e.problem}"}
|
|
200
|
+
except yaml.parser.ParserError as e: # pyright: ignore
|
|
201
|
+
return {"error": f"YAML Parser error: {e}"}
|
|
202
|
+
except yaml.YAMLError as e:
|
|
203
|
+
return {"error": f"YAML error: {e}"}
|
|
204
|
+
|
|
205
|
+
sections: list[dict[str, Any]] = raw if isinstance(raw, list) else [raw]
|
|
206
|
+
|
|
207
|
+
errors: list[dict[str, Any]] = []
|
|
208
|
+
for s in sections:
|
|
209
|
+
errors += [validate_section(s)]
|
|
210
|
+
|
|
211
|
+
return errors
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@CLI.command
|
|
215
|
+
def docs_category(category: str) -> str:
|
|
216
|
+
"""Fetch Biolink documentation for a specific category."""
|
|
217
|
+
return get_biolink_html_documentation(category) or f"ERROR | {category} is not a supported biolink category"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@CLI.command
|
|
221
|
+
def docs_predicate(predicate: str) -> str:
|
|
222
|
+
"""Fetch Biolink documentation for a specific predicate."""
|
|
223
|
+
return get_biolink_html_documentation(predicate) or f"ERROR | {predicate} is not a supported biolink predicate"
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@CLI.command
|
|
227
|
+
def docs_qualifier(qualifier: str) -> str:
|
|
228
|
+
"""Fetch Biolink documentation for a specific qualifier."""
|
|
229
|
+
return get_biolink_html_documentation(qualifier) or f"ERROR | {qualifier} is not a supported biolink qualifier"
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@CLI.command
|
|
233
|
+
def extract_text(file: Path, extension: Optional[str] = None) -> str:
|
|
234
|
+
"""Extract text from a file using textract (PDF, DOCX, etc.)."""
|
|
235
|
+
if file.suffix == "pdf":
|
|
236
|
+
return textract.process(file, method="pdfminer")
|
|
237
|
+
elif extension:
|
|
238
|
+
return textract.process(file, extension=extension)
|
|
239
|
+
else:
|
|
240
|
+
return textract.process(file)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@CLI.command
|
|
244
|
+
def excel_sheets(file: Path) -> list[str]:
|
|
245
|
+
"""List sheet names in an Excel spreadsheet."""
|
|
246
|
+
wb: Any = fastexcel.read_excel(file)
|
|
247
|
+
return wb.sheet_names
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@CLI.command
|
|
251
|
+
def preview_excel(
|
|
252
|
+
file: Path, sheet_name: str, n_rows: int, engine: Literal["calamine", "openpyxl", "xlsx2csv"] = "calamine"
|
|
253
|
+
) -> dict[str, Any]:
|
|
254
|
+
"""Preview the first N rows of an Excel sheet as a dict."""
|
|
255
|
+
df: pl.DataFrame = pl.read_excel(source=file, sheet_name=sheet_name, engine=engine, infer_schema_length=None)
|
|
256
|
+
df = df.head(n_rows)
|
|
257
|
+
return df.to_dict(as_series=False)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@CLI.command
|
|
261
|
+
def preview_csv(file: Path, n_rows: int, separator: str = ",") -> dict[str, Any]:
|
|
262
|
+
"""Preview the first N rows of a CSV/tabular file as a dict."""
|
|
263
|
+
df: pl.DataFrame = pl.read_csv(source=file, n_rows=n_rows, separator=separator)
|
|
264
|
+
return df.to_dict(as_series=False)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def serve() -> None:
|
|
268
|
+
CLI()
|