tcr-explorer 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tcr_explorer-0.1.0/LICENSE +21 -0
- tcr_explorer-0.1.0/PKG-INFO +223 -0
- tcr_explorer-0.1.0/README.md +186 -0
- tcr_explorer-0.1.0/pyproject.toml +78 -0
- tcr_explorer-0.1.0/setup.cfg +4 -0
- tcr_explorer-0.1.0/src/tcr_explorer/__init__.py +1 -0
- tcr_explorer-0.1.0/src/tcr_explorer/annotator.py +66 -0
- tcr_explorer-0.1.0/src/tcr_explorer/api.py +1151 -0
- tcr_explorer-0.1.0/src/tcr_explorer/ask.py +150 -0
- tcr_explorer-0.1.0/src/tcr_explorer/bootstrap.py +208 -0
- tcr_explorer-0.1.0/src/tcr_explorer/cdr_enricher.py +230 -0
- tcr_explorer-0.1.0/src/tcr_explorer/config.py +25 -0
- tcr_explorer-0.1.0/src/tcr_explorer/constant_regions.py +81 -0
- tcr_explorer-0.1.0/src/tcr_explorer/d_regions.py +17 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/blosum62.json +1 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/ATTRIBUTION.md +33 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/C-region-motifs.tsv +33 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/IMGTgeneDLwarnings.txt +10 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/J-region-motifs.tsv +100 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRA.fasta +1344 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRB.fasta +1530 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRD.fasta +225 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRG.fasta +510 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/data-production-date.tsv +6 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/imgt-data.fasta +3854 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/C-region-motifs.tsv +15 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/IMGTgeneDLwarnings.txt +10 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/J-region-motifs.tsv +104 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRA.fasta +2442 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRB.fasta +482 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRD.fasta +349 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRG.fasta +243 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/data-production-date.tsv +6 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/imgt-data.fasta +3930 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data_paths.py +73 -0
- tcr_explorer-0.1.0/src/tcr_explorer/data_sources.py +101 -0
- tcr_explorer-0.1.0/src/tcr_explorer/dossier.py +409 -0
- tcr_explorer-0.1.0/src/tcr_explorer/dossier_epitopes.py +97 -0
- tcr_explorer-0.1.0/src/tcr_explorer/dossier_models.py +241 -0
- tcr_explorer-0.1.0/src/tcr_explorer/fasta_parser.py +74 -0
- tcr_explorer-0.1.0/src/tcr_explorer/file_ingest.py +128 -0
- tcr_explorer-0.1.0/src/tcr_explorer/frontend.py +220 -0
- tcr_explorer-0.1.0/src/tcr_explorer/germline_db.py +115 -0
- tcr_explorer-0.1.0/src/tcr_explorer/germline_sets.py +133 -0
- tcr_explorer-0.1.0/src/tcr_explorer/input_router.py +90 -0
- tcr_explorer-0.1.0/src/tcr_explorer/kmer_aligner.py +94 -0
- tcr_explorer-0.1.0/src/tcr_explorer/llm_client.py +58 -0
- tcr_explorer-0.1.0/src/tcr_explorer/mcp_clients.py +16 -0
- tcr_explorer-0.1.0/src/tcr_explorer/mcp_server.py +110 -0
- tcr_explorer-0.1.0/src/tcr_explorer/models.py +289 -0
- tcr_explorer-0.1.0/src/tcr_explorer/msa.py +333 -0
- tcr_explorer-0.1.0/src/tcr_explorer/nl_query.py +102 -0
- tcr_explorer-0.1.0/src/tcr_explorer/py.typed +0 -0
- tcr_explorer-0.1.0/src/tcr_explorer/query_nl.py +85 -0
- tcr_explorer-0.1.0/src/tcr_explorer/query_router.py +123 -0
- tcr_explorer-0.1.0/src/tcr_explorer/reconstructor.py +392 -0
- tcr_explorer-0.1.0/src/tcr_explorer/records.py +542 -0
- tcr_explorer-0.1.0/src/tcr_explorer/records_build.py +585 -0
- tcr_explorer-0.1.0/src/tcr_explorer/similarity.py +207 -0
- tcr_explorer-0.1.0/src/tcr_explorer/tcr_align.py +373 -0
- tcr_explorer-0.1.0/src/tcr_explorer.egg-info/PKG-INFO +223 -0
- tcr_explorer-0.1.0/src/tcr_explorer.egg-info/SOURCES.txt +144 -0
- tcr_explorer-0.1.0/src/tcr_explorer.egg-info/dependency_links.txt +1 -0
- tcr_explorer-0.1.0/src/tcr_explorer.egg-info/entry_points.txt +3 -0
- tcr_explorer-0.1.0/src/tcr_explorer.egg-info/requires.txt +16 -0
- tcr_explorer-0.1.0/src/tcr_explorer.egg-info/top_level.txt +1 -0
- tcr_explorer-0.1.0/tests/test_align_api.py +15 -0
- tcr_explorer-0.1.0/tests/test_annotator.py +17 -0
- tcr_explorer-0.1.0/tests/test_api_prediction.py +61 -0
- tcr_explorer-0.1.0/tests/test_ask.py +124 -0
- tcr_explorer-0.1.0/tests/test_ask_api.py +13 -0
- tcr_explorer-0.1.0/tests/test_assign_api.py +40 -0
- tcr_explorer-0.1.0/tests/test_baseline_clients.py +20 -0
- tcr_explorer-0.1.0/tests/test_batman_cache.py +56 -0
- tcr_explorer-0.1.0/tests/test_batman_enrichment.py +38 -0
- tcr_explorer-0.1.0/tests/test_batman_pmhc.py +307 -0
- tcr_explorer-0.1.0/tests/test_batman_scorer.py +100 -0
- tcr_explorer-0.1.0/tests/test_batman_server.py +184 -0
- tcr_explorer-0.1.0/tests/test_batman_tcrdist.py +318 -0
- tcr_explorer-0.1.0/tests/test_batman_training_data.py +73 -0
- tcr_explorer-0.1.0/tests/test_bootstrap.py +166 -0
- tcr_explorer-0.1.0/tests/test_caching.py +91 -0
- tcr_explorer-0.1.0/tests/test_cdr_enricher.py +192 -0
- tcr_explorer-0.1.0/tests/test_ci_workflows.py +223 -0
- tcr_explorer-0.1.0/tests/test_config.py +35 -0
- tcr_explorer-0.1.0/tests/test_constant_regions_human.py +54 -0
- tcr_explorer-0.1.0/tests/test_data_paths.py +30 -0
- tcr_explorer-0.1.0/tests/test_data_sources.py +65 -0
- tcr_explorer-0.1.0/tests/test_dossier.py +92 -0
- tcr_explorer-0.1.0/tests/test_dossier_api.py +47 -0
- tcr_explorer-0.1.0/tests/test_dossier_epitopes.py +52 -0
- tcr_explorer-0.1.0/tests/test_dossier_honesty.py +25 -0
- tcr_explorer-0.1.0/tests/test_dossier_models.py +27 -0
- tcr_explorer-0.1.0/tests/test_dossier_neighbours.py +58 -0
- tcr_explorer-0.1.0/tests/test_download_wiring.py +16 -0
- tcr_explorer-0.1.0/tests/test_env_config.py +254 -0
- tcr_explorer-0.1.0/tests/test_frontend.py +147 -0
- tcr_explorer-0.1.0/tests/test_germline_db.py +53 -0
- tcr_explorer-0.1.0/tests/test_germline_sets.py +31 -0
- tcr_explorer-0.1.0/tests/test_health.py +49 -0
- tcr_explorer-0.1.0/tests/test_input_router.py +37 -0
- tcr_explorer-0.1.0/tests/test_kmer_aligner.py +29 -0
- tcr_explorer-0.1.0/tests/test_llm_client.py +54 -0
- tcr_explorer-0.1.0/tests/test_makefile.py +99 -0
- tcr_explorer-0.1.0/tests/test_mcp_align.py +5 -0
- tcr_explorer-0.1.0/tests/test_mcp_server.py +25 -0
- tcr_explorer-0.1.0/tests/test_mhc_integration.py +112 -0
- tcr_explorer-0.1.0/tests/test_mhc_organism.py +54 -0
- tcr_explorer-0.1.0/tests/test_mhc_server.py +385 -0
- tcr_explorer-0.1.0/tests/test_models.py +309 -0
- tcr_explorer-0.1.0/tests/test_msa.py +92 -0
- tcr_explorer-0.1.0/tests/test_msa_codon.py +66 -0
- tcr_explorer-0.1.0/tests/test_nl_query.py +203 -0
- tcr_explorer-0.1.0/tests/test_packaged_data.py +21 -0
- tcr_explorer-0.1.0/tests/test_pagination.py +58 -0
- tcr_explorer-0.1.0/tests/test_query_api.py +25 -0
- tcr_explorer-0.1.0/tests/test_query_nl.py +26 -0
- tcr_explorer-0.1.0/tests/test_query_router.py +61 -0
- tcr_explorer-0.1.0/tests/test_reconstruct_infer.py +100 -0
- tcr_explorer-0.1.0/tests/test_reconstruction_fullchain.py +77 -0
- tcr_explorer-0.1.0/tests/test_reconstructor_frame.py +49 -0
- tcr_explorer-0.1.0/tests/test_record_builder.py +49 -0
- tcr_explorer-0.1.0/tests/test_records_api.py +20 -0
- tcr_explorer-0.1.0/tests/test_records_build.py +83 -0
- tcr_explorer-0.1.0/tests/test_records_null_robustness.py +93 -0
- tcr_explorer-0.1.0/tests/test_records_retrieval.py +115 -0
- tcr_explorer-0.1.0/tests/test_requirements.py +143 -0
- tcr_explorer-0.1.0/tests/test_schema_descriptions.py +156 -0
- tcr_explorer-0.1.0/tests/test_search_hla_mhc.py +22 -0
- tcr_explorer-0.1.0/tests/test_similar_api.py +21 -0
- tcr_explorer-0.1.0/tests/test_similarity.py +71 -0
- tcr_explorer-0.1.0/tests/test_similarity_models.py +14 -0
- tcr_explorer-0.1.0/tests/test_tcr_align.py +215 -0
- tcr_explorer-0.1.0/tests/test_tempo_api_integration.py +76 -0
- tcr_explorer-0.1.0/tests/test_tempo_baseline.py +91 -0
- tcr_explorer-0.1.0/tests/test_tempo_batcave.py +47 -0
- tcr_explorer-0.1.0/tests/test_tempo_binary.py +73 -0
- tcr_explorer-0.1.0/tests/test_tempo_crossreact.py +145 -0
- tcr_explorer-0.1.0/tests/test_tempo_pv_fix.py +29 -0
- tcr_explorer-0.1.0/tests/test_tempo_scorer.py +78 -0
- tcr_explorer-0.1.0/tests/test_tempo_server.py +106 -0
- tcr_explorer-0.1.0/tests/test_tempo_tsp.py +61 -0
- tcr_explorer-0.1.0/tests/test_ui.py +27 -0
- tcr_explorer-0.1.0/tests/test_ui_browser.py +463 -0
- tcr_explorer-0.1.0/tests/test_unitcr_index_build.py +44 -0
- tcr_explorer-0.1.0/tests/test_validate_config.py +707 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kilian Maire
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tcr-explorer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Federated TCR analysis: records retrieval, germline allele assignment, reconstruction, dossiers, and similarity, over a web UI, REST API, and MCP server
|
|
5
|
+
Author: Kilian Maire
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/KilianMaire/tcr-explorer
|
|
8
|
+
Project-URL: Issues, https://github.com/KilianMaire/tcr-explorer/issues
|
|
9
|
+
Keywords: TCR,T-cell-receptor,immunology,AIRR,VDJ,germline,IMGT,VDJdb,IEDB,McPAS,bioinformatics,MCP,immune-repertoire,epitope
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: mcp<2,>=1.0.0
|
|
22
|
+
Requires-Dist: starlette<0.48,>=0.40
|
|
23
|
+
Requires-Dist: fastapi==0.116.1
|
|
24
|
+
Requires-Dist: uvicorn[standard]==0.35.0
|
|
25
|
+
Requires-Dist: pydantic==2.11.7
|
|
26
|
+
Requires-Dist: python-multipart==0.0.20
|
|
27
|
+
Requires-Dist: httpx==0.28.1
|
|
28
|
+
Requires-Dist: pandas>=2.0.0
|
|
29
|
+
Requires-Dist: pyarrow>=15.0.0
|
|
30
|
+
Requires-Dist: biopython==1.85
|
|
31
|
+
Requires-Dist: tidytcells>=2.0.0
|
|
32
|
+
Requires-Dist: platformdirs>=4
|
|
33
|
+
Requires-Dist: stitchr>=1.3
|
|
34
|
+
Provides-Extra: ui-legacy
|
|
35
|
+
Requires-Dist: streamlit>=1.35.0; extra == "ui-legacy"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# TCR Explorer
|
|
39
|
+
|
|
40
|
+
A federated tool for T cell receptor analysis. It retrieves known TCR records (VDJdb, IEDB, McPAS, TCR3d), assigns germline V and J genes down to the allele level, reconstructs full membrane bound chains, builds per receptor dossiers, and finds similar receptors. The same pure functions back a web UI, a REST API, and an MCP server, so an assistant can drive the whole tool.
|
|
41
|
+
|
|
42
|
+
## How the data works
|
|
43
|
+
|
|
44
|
+
The package ships the IMGT germline (bundled under CC BY 4.0) but **no record datasets**. On first use you run `tcr-explorer-refresh` once. It downloads the four record datasets (VDJdb, IEDB, McPAS, TCR3d) from each source's own official endpoint into a local folder, then harmonizes them into a single records index. After that, everything runs in one process against that local index, offline, until you refresh again to pull fresh data.
|
|
45
|
+
|
|
46
|
+
This means the tool never redistributes the record datasets (their licenses vary): each user fetches those directly from the source under that source's own terms. The germline is different: IMGT is CC BY 4.0, which permits redistribution with attribution, so it is bundled and germline features work offline out of the box. To pull a newer IMGT germline yourself, run `tcr-explorer-refresh --germline`. See [Data sources](#data-sources).
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- Python 3.11 or newer.
|
|
51
|
+
- Internet access for the initial `tcr-explorer-refresh` (a few minutes, roughly 60 MB). Offline afterward until you refresh.
|
|
52
|
+
|
|
53
|
+
## Install
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install tcr-explorer
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Or from a checkout:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
python -m venv .venv
|
|
63
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
64
|
+
pip install -e .
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## First run
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
tcr-explorer-refresh
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
This downloads the four record datasets and builds the index into a local data folder (a platform specific user data directory, or wherever `TCR_EXPLORER_DATA` points). The IMGT germline is already bundled, so this step does not touch IMGT. Re-run it any time to update the records. If a tool is used before the first refresh, it returns a clear message asking you to run this command.
|
|
74
|
+
|
|
75
|
+
To pull a fresher IMGT germline than the bundled one, run `tcr-explorer-refresh --germline` (needs IMGT/GENE-DB reachable). It writes into the local data folder, which the tool prefers over the bundled copy.
|
|
76
|
+
|
|
77
|
+
## Use it
|
|
78
|
+
|
|
79
|
+
Two front doors, both a single process.
|
|
80
|
+
|
|
81
|
+
### As an MCP server (recommended)
|
|
82
|
+
|
|
83
|
+
Point your own assistant at TCR Explorer over MCP and ask questions in plain language. This targets Claude Desktop and Claude Code, which run a local stdio MCP server. ChatGPT does not run local stdio MCP servers the same way, so use Claude for the paste and go flow. See [Connect your assistant](#connect-your-assistant).
|
|
84
|
+
|
|
85
|
+
### As a web app and REST API
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
PYTHONPATH=src uvicorn tcr_explorer.api:app --port 8000
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Open the query box at <http://localhost:8000/ui>, or call the REST API directly. Health check:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
curl http://localhost:8000/health # {"status":"ok"}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Connect your assistant
|
|
98
|
+
|
|
99
|
+
TCR Explorer ships an MCP server (console entry point `tcr-explorer-mcp`). Add this to your assistant's MCP configuration:
|
|
100
|
+
|
|
101
|
+
```json
|
|
102
|
+
{"mcpServers":{"tcr-explorer":{"command":"uvx","args":["--from","tcr-explorer","tcr-explorer-mcp"]}}}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Or paste this prompt into Claude to have it set the connection up for you:
|
|
106
|
+
|
|
107
|
+
```
|
|
108
|
+
Set up the TCR Explorer MCP server so you can answer T cell receptor questions against real immunology databases. First install it (pip install tcr-explorer, or use uvx --from tcr-explorer), then run tcr-explorer-refresh once in a terminal to download the datasets into a local folder (a few minutes). Then add an MCP server named tcr-explorer that runs `uvx --from tcr-explorer tcr-explorer-mcp` (if uvx is unavailable, run python -m tcr_explorer.mcp_server). It exposes these read only tools: retrieve_tcr_records, assign_tcr_alleles, get_tcr_dossier, find_similar_tcrs, align_tcr_genes, and ask_tcr. If a tool reports the data is not downloaded yet, tell me to run tcr-explorer-refresh. After adding it, confirm the connection and suggest three example questions I can ask.
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Until the package is on PyPI, the git form works: `uvx --from git+<your-repo-url> tcr-explorer-mcp`.
|
|
112
|
+
|
|
113
|
+
The read only MCP tools are `retrieve_tcr_records`, `assign_tcr_alleles`, `get_tcr_dossier`, `find_similar_tcrs`, `align_tcr_genes`, and `ask_tcr`.
|
|
114
|
+
|
|
115
|
+
## REST API
|
|
116
|
+
|
|
117
|
+
All of these run in process against the local index.
|
|
118
|
+
|
|
119
|
+
### Unified query box
|
|
120
|
+
|
|
121
|
+
**POST** `/v1/tcr/query` routes a single input (a CDR3, a full chain, a gene name, a record id, or a phrase) to the right tool.
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
curl -s -X POST http://localhost:8000/v1/tcr/query \
|
|
125
|
+
-H "Content-Type: application/json" \
|
|
126
|
+
-d '{"query":"CASSLGGAGGTDTQYF","species":"human"}'
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Germline assignment
|
|
130
|
+
|
|
131
|
+
**POST** `/v1/tcr/assign` assigns a TCR sequence (nucleotide or amino acid, CDR3, region, or full chain) to V and J alleles, with per region identity, co optimal ties, CDR3 extraction, and an honest refusal to call a V allele from a bare CDR3.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
curl -s -X POST http://localhost:8000/v1/tcr/assign \
|
|
135
|
+
-H "Content-Type: application/json" \
|
|
136
|
+
-d '{"sequence":"CASSLGGAGGTDTQYF","species":"human"}'
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Records retrieval
|
|
140
|
+
|
|
141
|
+
**POST** `/v1/tcr/records` searches the harmonized records index.
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
curl -s -X POST http://localhost:8000/v1/tcr/records \
|
|
145
|
+
-H "Content-Type: application/json" \
|
|
146
|
+
-d '{"cdr3":"CASSLGGAGGTDTQYF","species":"human","limit":20}'
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Chain reconstruction
|
|
150
|
+
|
|
151
|
+
**POST** `/reconstruct` builds a full membrane bound chain. Provide V, J, and CDR3, or a CDR3 alone (V and J are inferred from the records that carry the same CDR3).
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
curl -s -X POST http://localhost:8000/reconstruct \
|
|
155
|
+
-H "Content-Type: application/json" \
|
|
156
|
+
-d '{"cdr3_aa":"CASSLGGAGGTDTQYF","species":"human"}'
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### CDR1 and CDR2 prediction
|
|
160
|
+
|
|
161
|
+
**GET** `/predict/cdr` returns germline CDR1 and CDR2 for a TCR V gene from IMGT germline data.
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
curl "http://localhost:8000/predict/cdr?v_gene=TRBV12-3&species=human"
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Optional: MHC allele sequences
|
|
168
|
+
|
|
169
|
+
The records index does not contain MHC allele sequences. If you want live lookup of those from EBI IMGT/HLA and IPD-MHC, start the optional hla and mhc proxies with one command:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
docker-compose up
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The `/search` endpoint is scoped to these two sources: `{"source": "hla"}` or `{"source": "mhc"}`. Any other source returns HTTP 400 pointing at `/v1/tcr/records`, which is where TCR record search lives.
|
|
176
|
+
|
|
177
|
+
## Environment variables
|
|
178
|
+
|
|
179
|
+
All optional.
|
|
180
|
+
|
|
181
|
+
| Variable | Default | Description |
|
|
182
|
+
|----------|---------|-------------|
|
|
183
|
+
| `TCR_EXPLORER_DATA` | platform user data dir | Local folder where `tcr-explorer-refresh` downloads datasets and builds the index |
|
|
184
|
+
| `TCR_EXPLORER_MAX_AGE_DAYS` | `30` | Age after which the local index is flagged stale (a refresh is suggested in query warnings, never forced) |
|
|
185
|
+
| `RECORDS_INDEX_PATH` | `<data dir>/records_index.parquet` | Override the records index path directly |
|
|
186
|
+
| `HLA_SERVER_URL` | `http://127.0.0.1:8101` | HLA allele sequence proxy (optional) |
|
|
187
|
+
| `MHC_SERVER_URL` | `http://127.0.0.1:8105` | IPD-MHC allele sequence proxy (optional) |
|
|
188
|
+
| `LLM_BASE_URL` | *(empty)* | OpenAI compatible endpoint for the free text `ask` path (falls back to a heuristic parser when unset) |
|
|
189
|
+
| `LLM_MODEL` | `local-model` | Model id for the `ask` path |
|
|
190
|
+
|
|
191
|
+
## Data sources
|
|
192
|
+
|
|
193
|
+
TCR Explorer cites the following. The four record datasets are downloaded on your machine from their official endpoints and are not redistributed; the IMGT germline is bundled with the package under CC BY 4.0. Please cite the ones you use.
|
|
194
|
+
|
|
195
|
+
- **VDJdb** (downloaded). Goncharov M. et al. VDJdb in the pandemic era: a compendium of T cell receptors specific for SARS-CoV-2. Nature Methods, 2022. <https://github.com/antigenomics/vdjdb-db>
|
|
196
|
+
- **IEDB** (downloaded, CC BY 4.0). Vita R. et al. The Immune Epitope Database (IEDB): 2024 update. Nucleic Acids Research, 2025. <https://www.iedb.org>
|
|
197
|
+
- **McPAS-TCR** (downloaded). Tickotsky N. et al. McPAS-TCR: a manually curated catalogue of pathology associated T cell receptor sequences. Bioinformatics, 2017. <https://friedmanlab.weizmann.ac.il/McPAS-TCR/>
|
|
198
|
+
- **TCR3d** (downloaded). Lin V. et al. TCR3d 2.0: expanding the T cell receptor structure database. Nucleic Acids Research, 2025. <https://tcr3d.ibbr.umd.edu>
|
|
199
|
+
- **IMGT germline** (bundled, CC BY 4.0, release 20268-7). Lefranc M-P. et al. IMGT, the international ImMunoGeneTics information system. Reformatted via stitchr and IMGTgeneDL (MIT). See `src/tcr_explorer/data/germline/ATTRIBUTION.md`. <https://www.imgt.org>
|
|
200
|
+
|
|
201
|
+
## Run tests
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
PYTHONPATH=src pytest tests/ -v
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Architecture
|
|
208
|
+
|
|
209
|
+
The core is a set of single source pure functions (records retrieval, germline assignment, reconstruction, dossiers, similarity) that read from the locally downloaded index. The REST API, the MCP server, and the web query box all call these same functions in one process.
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
MCP server REST API + /ui query box
|
|
213
|
+
\ /
|
|
214
|
+
\ /
|
|
215
|
+
single source pure functions
|
|
216
|
+
(records, assign, reconstruct,
|
|
217
|
+
dossier, similar)
|
|
218
|
+
|
|
|
219
|
+
local index (built by tcr-explorer-refresh
|
|
220
|
+
from the downloaded records) + bundled IMGT germline
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
IMGT (IMGT/HLA, IMGT/GENE-DB, IMGT germline, IMGT numbering) is a data source cited throughout. TCR Explorer is an independent tool and is not affiliated with IMGT.
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# TCR Explorer
|
|
2
|
+
|
|
3
|
+
A federated tool for T cell receptor analysis. It retrieves known TCR records (VDJdb, IEDB, McPAS, TCR3d), assigns germline V and J genes down to the allele level, reconstructs full membrane bound chains, builds per receptor dossiers, and finds similar receptors. The same pure functions back a web UI, a REST API, and an MCP server, so an assistant can drive the whole tool.
|
|
4
|
+
|
|
5
|
+
## How the data works
|
|
6
|
+
|
|
7
|
+
The package ships the IMGT germline (bundled under CC BY 4.0) but **no record datasets**. On first use you run `tcr-explorer-refresh` once. It downloads the four record datasets (VDJdb, IEDB, McPAS, TCR3d) from each source's own official endpoint into a local folder, then harmonizes them into a single records index. After that, everything runs in one process against that local index, offline, until you refresh again to pull fresh data.
|
|
8
|
+
|
|
9
|
+
This means the tool never redistributes the record datasets (their licenses vary): each user fetches those directly from the source under that source's own terms. The germline is different: IMGT is CC BY 4.0, which permits redistribution with attribution, so it is bundled and germline features work offline out of the box. To pull a newer IMGT germline yourself, run `tcr-explorer-refresh --germline`. See [Data sources](#data-sources).
|
|
10
|
+
|
|
11
|
+
## Requirements
|
|
12
|
+
|
|
13
|
+
- Python 3.11 or newer.
|
|
14
|
+
- Internet access for the initial `tcr-explorer-refresh` (a few minutes, roughly 60 MB). Offline afterward until you refresh.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install tcr-explorer
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Or from a checkout:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
python -m venv .venv
|
|
26
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
27
|
+
pip install -e .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## First run
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
tcr-explorer-refresh
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
This downloads the four record datasets and builds the index into a local data folder (a platform specific user data directory, or wherever `TCR_EXPLORER_DATA` points). The IMGT germline is already bundled, so this step does not touch IMGT. Re-run it any time to update the records. If a tool is used before the first refresh, it returns a clear message asking you to run this command.
|
|
37
|
+
|
|
38
|
+
To pull a fresher IMGT germline than the bundled one, run `tcr-explorer-refresh --germline` (needs IMGT/GENE-DB reachable). It writes into the local data folder, which the tool prefers over the bundled copy.
|
|
39
|
+
|
|
40
|
+
## Use it
|
|
41
|
+
|
|
42
|
+
Two front doors, both a single process.
|
|
43
|
+
|
|
44
|
+
### As an MCP server (recommended)
|
|
45
|
+
|
|
46
|
+
Point your own assistant at TCR Explorer over MCP and ask questions in plain language. This targets Claude Desktop and Claude Code, which run a local stdio MCP server. ChatGPT does not run local stdio MCP servers the same way, so use Claude for the paste and go flow. See [Connect your assistant](#connect-your-assistant).
|
|
47
|
+
|
|
48
|
+
### As a web app and REST API
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
PYTHONPATH=src uvicorn tcr_explorer.api:app --port 8000
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Open the query box at <http://localhost:8000/ui>, or call the REST API directly. Health check:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
curl http://localhost:8000/health # {"status":"ok"}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Connect your assistant
|
|
61
|
+
|
|
62
|
+
TCR Explorer ships an MCP server (console entry point `tcr-explorer-mcp`). Add this to your assistant's MCP configuration:
|
|
63
|
+
|
|
64
|
+
```json
|
|
65
|
+
{"mcpServers":{"tcr-explorer":{"command":"uvx","args":["--from","tcr-explorer","tcr-explorer-mcp"]}}}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Or paste this prompt into Claude to have it set the connection up for you:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
Set up the TCR Explorer MCP server so you can answer T cell receptor questions against real immunology databases. First install it (pip install tcr-explorer, or use uvx --from tcr-explorer), then run tcr-explorer-refresh once in a terminal to download the datasets into a local folder (a few minutes). Then add an MCP server named tcr-explorer that runs `uvx --from tcr-explorer tcr-explorer-mcp` (if uvx is unavailable, run python -m tcr_explorer.mcp_server). It exposes these read only tools: retrieve_tcr_records, assign_tcr_alleles, get_tcr_dossier, find_similar_tcrs, align_tcr_genes, and ask_tcr. If a tool reports the data is not downloaded yet, tell me to run tcr-explorer-refresh. After adding it, confirm the connection and suggest three example questions I can ask.
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Until the package is on PyPI, the git form works: `uvx --from git+<your-repo-url> tcr-explorer-mcp`.
|
|
75
|
+
|
|
76
|
+
The read only MCP tools are `retrieve_tcr_records`, `assign_tcr_alleles`, `get_tcr_dossier`, `find_similar_tcrs`, `align_tcr_genes`, and `ask_tcr`.
|
|
77
|
+
|
|
78
|
+
## REST API
|
|
79
|
+
|
|
80
|
+
All of these run in process against the local index.
|
|
81
|
+
|
|
82
|
+
### Unified query box
|
|
83
|
+
|
|
84
|
+
**POST** `/v1/tcr/query` routes a single input (a CDR3, a full chain, a gene name, a record id, or a phrase) to the right tool.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
curl -s -X POST http://localhost:8000/v1/tcr/query \
|
|
88
|
+
-H "Content-Type: application/json" \
|
|
89
|
+
-d '{"query":"CASSLGGAGGTDTQYF","species":"human"}'
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Germline assignment
|
|
93
|
+
|
|
94
|
+
**POST** `/v1/tcr/assign` assigns a TCR sequence (nucleotide or amino acid, CDR3, region, or full chain) to V and J alleles, with per region identity, co optimal ties, CDR3 extraction, and an honest refusal to call a V allele from a bare CDR3.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
curl -s -X POST http://localhost:8000/v1/tcr/assign \
|
|
98
|
+
-H "Content-Type: application/json" \
|
|
99
|
+
-d '{"sequence":"CASSLGGAGGTDTQYF","species":"human"}'
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Records retrieval
|
|
103
|
+
|
|
104
|
+
**POST** `/v1/tcr/records` searches the harmonized records index.
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
curl -s -X POST http://localhost:8000/v1/tcr/records \
|
|
108
|
+
-H "Content-Type: application/json" \
|
|
109
|
+
-d '{"cdr3":"CASSLGGAGGTDTQYF","species":"human","limit":20}'
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Chain reconstruction
|
|
113
|
+
|
|
114
|
+
**POST** `/reconstruct` builds a full membrane bound chain. Provide V, J, and CDR3, or a CDR3 alone (V and J are inferred from the records that carry the same CDR3).
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
curl -s -X POST http://localhost:8000/reconstruct \
|
|
118
|
+
-H "Content-Type: application/json" \
|
|
119
|
+
-d '{"cdr3_aa":"CASSLGGAGGTDTQYF","species":"human"}'
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### CDR1 and CDR2 prediction
|
|
123
|
+
|
|
124
|
+
**GET** `/predict/cdr` returns germline CDR1 and CDR2 for a TCR V gene from IMGT germline data.
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
curl "http://localhost:8000/predict/cdr?v_gene=TRBV12-3&species=human"
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Optional: MHC allele sequences
|
|
131
|
+
|
|
132
|
+
The records index does not contain MHC allele sequences. If you want live lookup of those from EBI IMGT/HLA and IPD-MHC, start the optional hla and mhc proxies with one command:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
docker-compose up
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
The `/search` endpoint is scoped to these two sources: `{"source": "hla"}` or `{"source": "mhc"}`. Any other source returns HTTP 400 pointing at `/v1/tcr/records`, which is where TCR record search lives.
|
|
139
|
+
|
|
140
|
+
## Environment variables
|
|
141
|
+
|
|
142
|
+
All optional.
|
|
143
|
+
|
|
144
|
+
| Variable | Default | Description |
|
|
145
|
+
|----------|---------|-------------|
|
|
146
|
+
| `TCR_EXPLORER_DATA` | platform user data dir | Local folder where `tcr-explorer-refresh` downloads datasets and builds the index |
|
|
147
|
+
| `TCR_EXPLORER_MAX_AGE_DAYS` | `30` | Age after which the local index is flagged stale (a refresh is suggested in query warnings, never forced) |
|
|
148
|
+
| `RECORDS_INDEX_PATH` | `<data dir>/records_index.parquet` | Override the records index path directly |
|
|
149
|
+
| `HLA_SERVER_URL` | `http://127.0.0.1:8101` | HLA allele sequence proxy (optional) |
|
|
150
|
+
| `MHC_SERVER_URL` | `http://127.0.0.1:8105` | IPD-MHC allele sequence proxy (optional) |
|
|
151
|
+
| `LLM_BASE_URL` | *(empty)* | OpenAI compatible endpoint for the free text `ask` path (falls back to a heuristic parser when unset) |
|
|
152
|
+
| `LLM_MODEL` | `local-model` | Model id for the `ask` path |
|
|
153
|
+
|
|
154
|
+
## Data sources
|
|
155
|
+
|
|
156
|
+
TCR Explorer cites the following. The four record datasets are downloaded on your machine from their official endpoints and are not redistributed; the IMGT germline is bundled with the package under CC BY 4.0. Please cite the ones you use.
|
|
157
|
+
|
|
158
|
+
- **VDJdb** (downloaded). Goncharov M. et al. VDJdb in the pandemic era: a compendium of T cell receptors specific for SARS-CoV-2. Nature Methods, 2022. <https://github.com/antigenomics/vdjdb-db>
|
|
159
|
+
- **IEDB** (downloaded, CC BY 4.0). Vita R. et al. The Immune Epitope Database (IEDB): 2024 update. Nucleic Acids Research, 2025. <https://www.iedb.org>
|
|
160
|
+
- **McPAS-TCR** (downloaded). Tickotsky N. et al. McPAS-TCR: a manually curated catalogue of pathology associated T cell receptor sequences. Bioinformatics, 2017. <https://friedmanlab.weizmann.ac.il/McPAS-TCR/>
|
|
161
|
+
- **TCR3d** (downloaded). Lin V. et al. TCR3d 2.0: expanding the T cell receptor structure database. Nucleic Acids Research, 2025. <https://tcr3d.ibbr.umd.edu>
|
|
162
|
+
- **IMGT germline** (bundled, CC BY 4.0, release 20268-7). Lefranc M-P. et al. IMGT, the international ImMunoGeneTics information system. Reformatted via stitchr and IMGTgeneDL (MIT). See `src/tcr_explorer/data/germline/ATTRIBUTION.md`. <https://www.imgt.org>
|
|
163
|
+
|
|
164
|
+
## Run tests
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
PYTHONPATH=src pytest tests/ -v
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Architecture
|
|
171
|
+
|
|
172
|
+
The core is a set of single source pure functions (records retrieval, germline assignment, reconstruction, dossiers, similarity) that read from the locally downloaded index. The REST API, the MCP server, and the web query box all call these same functions in one process.
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
MCP server REST API + /ui query box
|
|
176
|
+
\ /
|
|
177
|
+
\ /
|
|
178
|
+
single source pure functions
|
|
179
|
+
(records, assign, reconstruct,
|
|
180
|
+
dossier, similar)
|
|
181
|
+
|
|
|
182
|
+
local index (built by tcr-explorer-refresh
|
|
183
|
+
from the downloaded records) + bundled IMGT germline
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
IMGT (IMGT/HLA, IMGT/GENE-DB, IMGT germline, IMGT numbering) is a data source cited throughout. TCR Explorer is an independent tool and is not affiliated with IMGT.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tcr-explorer"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Federated TCR analysis: records retrieval, germline allele assignment, reconstruction, dossiers, and similarity, over a web UI, REST API, and MCP server"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
authors = [{ name = "Kilian Maire" }]
|
|
14
|
+
keywords = [
|
|
15
|
+
"TCR", "T-cell-receptor", "immunology", "AIRR", "VDJ", "germline",
|
|
16
|
+
"IMGT", "VDJdb", "IEDB", "McPAS", "bioinformatics", "MCP",
|
|
17
|
+
"immune-repertoire", "epitope",
|
|
18
|
+
]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 4 - Beta",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Intended Audience :: Science/Research",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Typing :: Typed",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"mcp>=1.0.0,<2",
|
|
31
|
+
"starlette>=0.40,<0.48",
|
|
32
|
+
"fastapi==0.116.1",
|
|
33
|
+
"uvicorn[standard]==0.35.0",
|
|
34
|
+
"pydantic==2.11.7",
|
|
35
|
+
"python-multipart==0.0.20",
|
|
36
|
+
"httpx==0.28.1",
|
|
37
|
+
"pandas>=2.0.0",
|
|
38
|
+
"pyarrow>=15.0.0",
|
|
39
|
+
"biopython==1.85",
|
|
40
|
+
"tidytcells>=2.0.0",
|
|
41
|
+
"platformdirs>=4",
|
|
42
|
+
"stitchr>=1.3",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
ui-legacy = ["streamlit>=1.35.0"]
|
|
47
|
+
|
|
48
|
+
[project.scripts]
|
|
49
|
+
tcr-explorer-mcp = "tcr_explorer.mcp_server:main"
|
|
50
|
+
tcr-explorer-refresh = "tcr_explorer.bootstrap:main"
|
|
51
|
+
|
|
52
|
+
[project.urls]
|
|
53
|
+
Homepage = "https://github.com/KilianMaire/tcr-explorer"
|
|
54
|
+
Issues = "https://github.com/KilianMaire/tcr-explorer/issues"
|
|
55
|
+
|
|
56
|
+
[tool.setuptools]
|
|
57
|
+
package-dir = {"" = "src"}
|
|
58
|
+
|
|
59
|
+
[tool.setuptools.packages.find]
|
|
60
|
+
where = ["src"]
|
|
61
|
+
|
|
62
|
+
[tool.setuptools.package-data]
|
|
63
|
+
tcr_explorer = [
|
|
64
|
+
"py.typed",
|
|
65
|
+
"data/*.json",
|
|
66
|
+
"data/germline/*.md",
|
|
67
|
+
"data/germline/*/*.fasta",
|
|
68
|
+
"data/germline/*/*.tsv",
|
|
69
|
+
"data/germline/*/*.txt",
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
[tool.pytest.ini_options]
|
|
73
|
+
testpaths = ["tests"]
|
|
74
|
+
pythonpath = ["src", "."]
|
|
75
|
+
|
|
76
|
+
[tool.ruff]
|
|
77
|
+
line-length = 100
|
|
78
|
+
target-version = "py311"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__ = []
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Annotator interface: selects between IgBLAST (authoritative, deferred) and the
|
|
3
|
+
always-available k-mer aligner fallback (Task 3).
|
|
4
|
+
|
|
5
|
+
Selection rule: IgBLAST is only attempted when mode=="full", the input is
|
|
6
|
+
nucleotide (not protein), and the `igblastn` binary is present on PATH. The
|
|
7
|
+
actual subprocess invocation is deferred (`_run_igblast` is a stub returning
|
|
8
|
+
None), so this module currently always falls through to the k-mer backend;
|
|
9
|
+
the point of this module is the selection/fallback logic and the
|
|
10
|
+
`igblast_unavailable` warning, not the IgBLAST call itself.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
import shutil
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Optional
|
|
16
|
+
from .kmer_aligner import annotate_sequence, KmerAnnotation
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Annotation:
|
|
21
|
+
v_call: Optional[str] = None
|
|
22
|
+
j_call: Optional[str] = None
|
|
23
|
+
d_call: Optional[str] = None
|
|
24
|
+
v_score: Optional[float] = None
|
|
25
|
+
j_score: Optional[float] = None
|
|
26
|
+
chain: str = "unknown"
|
|
27
|
+
source: str = "kmer_align"
|
|
28
|
+
confidence: str = "low"
|
|
29
|
+
warnings: list[tuple[str, str]] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def igblast_available() -> bool:
|
|
33
|
+
return shutil.which("igblastn") is not None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _run_igblast(seq: str, species: str) -> Optional[Annotation]:
|
|
37
|
+
# Deferred: real igblastn -outfmt 19 invocation + AIRR TSV parse.
|
|
38
|
+
# Returning None signals "not implemented / failed" so callers fall back.
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _from_kmer(k: KmerAnnotation, source: str, confidence: str) -> Annotation:
|
|
43
|
+
return Annotation(
|
|
44
|
+
v_call=k.v_call, j_call=k.j_call, d_call=k.d_call,
|
|
45
|
+
v_score=k.v_score, j_score=k.j_score, chain=k.chain,
|
|
46
|
+
source=source, confidence=confidence, warnings=list(k.warnings),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def annotate(seq: str, species: str, is_protein: bool, mode: str) -> Annotation:
|
|
51
|
+
use_igblast = (mode == "full") and (not is_protein) and igblast_available()
|
|
52
|
+
if use_igblast:
|
|
53
|
+
res = _run_igblast(seq, species)
|
|
54
|
+
if res is not None:
|
|
55
|
+
res.source, res.confidence = "igblast", "high"
|
|
56
|
+
return res
|
|
57
|
+
# fell through: igblast present but failed
|
|
58
|
+
|
|
59
|
+
k = annotate_sequence(seq, species, is_protein)
|
|
60
|
+
ann = _from_kmer(k, "kmer_align", "medium" if not is_protein else "low")
|
|
61
|
+
if mode == "full" and not is_protein and not igblast_available():
|
|
62
|
+
ann.warnings.append((
|
|
63
|
+
"igblast_unavailable",
|
|
64
|
+
"igblastn not found on PATH; used the k-mer backend",
|
|
65
|
+
))
|
|
66
|
+
return ann
|