tcr-explorer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. tcr_explorer-0.1.0/LICENSE +21 -0
  2. tcr_explorer-0.1.0/PKG-INFO +223 -0
  3. tcr_explorer-0.1.0/README.md +186 -0
  4. tcr_explorer-0.1.0/pyproject.toml +78 -0
  5. tcr_explorer-0.1.0/setup.cfg +4 -0
  6. tcr_explorer-0.1.0/src/tcr_explorer/__init__.py +1 -0
  7. tcr_explorer-0.1.0/src/tcr_explorer/annotator.py +66 -0
  8. tcr_explorer-0.1.0/src/tcr_explorer/api.py +1151 -0
  9. tcr_explorer-0.1.0/src/tcr_explorer/ask.py +150 -0
  10. tcr_explorer-0.1.0/src/tcr_explorer/bootstrap.py +208 -0
  11. tcr_explorer-0.1.0/src/tcr_explorer/cdr_enricher.py +230 -0
  12. tcr_explorer-0.1.0/src/tcr_explorer/config.py +25 -0
  13. tcr_explorer-0.1.0/src/tcr_explorer/constant_regions.py +81 -0
  14. tcr_explorer-0.1.0/src/tcr_explorer/d_regions.py +17 -0
  15. tcr_explorer-0.1.0/src/tcr_explorer/data/blosum62.json +1 -0
  16. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/ATTRIBUTION.md +33 -0
  17. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/C-region-motifs.tsv +33 -0
  18. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/IMGTgeneDLwarnings.txt +10 -0
  19. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/J-region-motifs.tsv +100 -0
  20. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRA.fasta +1344 -0
  21. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRB.fasta +1530 -0
  22. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRD.fasta +225 -0
  23. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/TRG.fasta +510 -0
  24. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/data-production-date.tsv +6 -0
  25. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/HUMAN/imgt-data.fasta +3854 -0
  26. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/C-region-motifs.tsv +15 -0
  27. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/IMGTgeneDLwarnings.txt +10 -0
  28. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/J-region-motifs.tsv +104 -0
  29. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRA.fasta +2442 -0
  30. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRB.fasta +482 -0
  31. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRD.fasta +349 -0
  32. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/TRG.fasta +243 -0
  33. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/data-production-date.tsv +6 -0
  34. tcr_explorer-0.1.0/src/tcr_explorer/data/germline/MOUSE/imgt-data.fasta +3930 -0
  35. tcr_explorer-0.1.0/src/tcr_explorer/data_paths.py +73 -0
  36. tcr_explorer-0.1.0/src/tcr_explorer/data_sources.py +101 -0
  37. tcr_explorer-0.1.0/src/tcr_explorer/dossier.py +409 -0
  38. tcr_explorer-0.1.0/src/tcr_explorer/dossier_epitopes.py +97 -0
  39. tcr_explorer-0.1.0/src/tcr_explorer/dossier_models.py +241 -0
  40. tcr_explorer-0.1.0/src/tcr_explorer/fasta_parser.py +74 -0
  41. tcr_explorer-0.1.0/src/tcr_explorer/file_ingest.py +128 -0
  42. tcr_explorer-0.1.0/src/tcr_explorer/frontend.py +220 -0
  43. tcr_explorer-0.1.0/src/tcr_explorer/germline_db.py +115 -0
  44. tcr_explorer-0.1.0/src/tcr_explorer/germline_sets.py +133 -0
  45. tcr_explorer-0.1.0/src/tcr_explorer/input_router.py +90 -0
  46. tcr_explorer-0.1.0/src/tcr_explorer/kmer_aligner.py +94 -0
  47. tcr_explorer-0.1.0/src/tcr_explorer/llm_client.py +58 -0
  48. tcr_explorer-0.1.0/src/tcr_explorer/mcp_clients.py +16 -0
  49. tcr_explorer-0.1.0/src/tcr_explorer/mcp_server.py +110 -0
  50. tcr_explorer-0.1.0/src/tcr_explorer/models.py +289 -0
  51. tcr_explorer-0.1.0/src/tcr_explorer/msa.py +333 -0
  52. tcr_explorer-0.1.0/src/tcr_explorer/nl_query.py +102 -0
  53. tcr_explorer-0.1.0/src/tcr_explorer/py.typed +0 -0
  54. tcr_explorer-0.1.0/src/tcr_explorer/query_nl.py +85 -0
  55. tcr_explorer-0.1.0/src/tcr_explorer/query_router.py +123 -0
  56. tcr_explorer-0.1.0/src/tcr_explorer/reconstructor.py +392 -0
  57. tcr_explorer-0.1.0/src/tcr_explorer/records.py +542 -0
  58. tcr_explorer-0.1.0/src/tcr_explorer/records_build.py +585 -0
  59. tcr_explorer-0.1.0/src/tcr_explorer/similarity.py +207 -0
  60. tcr_explorer-0.1.0/src/tcr_explorer/tcr_align.py +373 -0
  61. tcr_explorer-0.1.0/src/tcr_explorer.egg-info/PKG-INFO +223 -0
  62. tcr_explorer-0.1.0/src/tcr_explorer.egg-info/SOURCES.txt +144 -0
  63. tcr_explorer-0.1.0/src/tcr_explorer.egg-info/dependency_links.txt +1 -0
  64. tcr_explorer-0.1.0/src/tcr_explorer.egg-info/entry_points.txt +3 -0
  65. tcr_explorer-0.1.0/src/tcr_explorer.egg-info/requires.txt +16 -0
  66. tcr_explorer-0.1.0/src/tcr_explorer.egg-info/top_level.txt +1 -0
  67. tcr_explorer-0.1.0/tests/test_align_api.py +15 -0
  68. tcr_explorer-0.1.0/tests/test_annotator.py +17 -0
  69. tcr_explorer-0.1.0/tests/test_api_prediction.py +61 -0
  70. tcr_explorer-0.1.0/tests/test_ask.py +124 -0
  71. tcr_explorer-0.1.0/tests/test_ask_api.py +13 -0
  72. tcr_explorer-0.1.0/tests/test_assign_api.py +40 -0
  73. tcr_explorer-0.1.0/tests/test_baseline_clients.py +20 -0
  74. tcr_explorer-0.1.0/tests/test_batman_cache.py +56 -0
  75. tcr_explorer-0.1.0/tests/test_batman_enrichment.py +38 -0
  76. tcr_explorer-0.1.0/tests/test_batman_pmhc.py +307 -0
  77. tcr_explorer-0.1.0/tests/test_batman_scorer.py +100 -0
  78. tcr_explorer-0.1.0/tests/test_batman_server.py +184 -0
  79. tcr_explorer-0.1.0/tests/test_batman_tcrdist.py +318 -0
  80. tcr_explorer-0.1.0/tests/test_batman_training_data.py +73 -0
  81. tcr_explorer-0.1.0/tests/test_bootstrap.py +166 -0
  82. tcr_explorer-0.1.0/tests/test_caching.py +91 -0
  83. tcr_explorer-0.1.0/tests/test_cdr_enricher.py +192 -0
  84. tcr_explorer-0.1.0/tests/test_ci_workflows.py +223 -0
  85. tcr_explorer-0.1.0/tests/test_config.py +35 -0
  86. tcr_explorer-0.1.0/tests/test_constant_regions_human.py +54 -0
  87. tcr_explorer-0.1.0/tests/test_data_paths.py +30 -0
  88. tcr_explorer-0.1.0/tests/test_data_sources.py +65 -0
  89. tcr_explorer-0.1.0/tests/test_dossier.py +92 -0
  90. tcr_explorer-0.1.0/tests/test_dossier_api.py +47 -0
  91. tcr_explorer-0.1.0/tests/test_dossier_epitopes.py +52 -0
  92. tcr_explorer-0.1.0/tests/test_dossier_honesty.py +25 -0
  93. tcr_explorer-0.1.0/tests/test_dossier_models.py +27 -0
  94. tcr_explorer-0.1.0/tests/test_dossier_neighbours.py +58 -0
  95. tcr_explorer-0.1.0/tests/test_download_wiring.py +16 -0
  96. tcr_explorer-0.1.0/tests/test_env_config.py +254 -0
  97. tcr_explorer-0.1.0/tests/test_frontend.py +147 -0
  98. tcr_explorer-0.1.0/tests/test_germline_db.py +53 -0
  99. tcr_explorer-0.1.0/tests/test_germline_sets.py +31 -0
  100. tcr_explorer-0.1.0/tests/test_health.py +49 -0
  101. tcr_explorer-0.1.0/tests/test_input_router.py +37 -0
  102. tcr_explorer-0.1.0/tests/test_kmer_aligner.py +29 -0
  103. tcr_explorer-0.1.0/tests/test_llm_client.py +54 -0
  104. tcr_explorer-0.1.0/tests/test_makefile.py +99 -0
  105. tcr_explorer-0.1.0/tests/test_mcp_align.py +5 -0
  106. tcr_explorer-0.1.0/tests/test_mcp_server.py +25 -0
  107. tcr_explorer-0.1.0/tests/test_mhc_integration.py +112 -0
  108. tcr_explorer-0.1.0/tests/test_mhc_organism.py +54 -0
  109. tcr_explorer-0.1.0/tests/test_mhc_server.py +385 -0
  110. tcr_explorer-0.1.0/tests/test_models.py +309 -0
  111. tcr_explorer-0.1.0/tests/test_msa.py +92 -0
  112. tcr_explorer-0.1.0/tests/test_msa_codon.py +66 -0
  113. tcr_explorer-0.1.0/tests/test_nl_query.py +203 -0
  114. tcr_explorer-0.1.0/tests/test_packaged_data.py +21 -0
  115. tcr_explorer-0.1.0/tests/test_pagination.py +58 -0
  116. tcr_explorer-0.1.0/tests/test_query_api.py +25 -0
  117. tcr_explorer-0.1.0/tests/test_query_nl.py +26 -0
  118. tcr_explorer-0.1.0/tests/test_query_router.py +61 -0
  119. tcr_explorer-0.1.0/tests/test_reconstruct_infer.py +100 -0
  120. tcr_explorer-0.1.0/tests/test_reconstruction_fullchain.py +77 -0
  121. tcr_explorer-0.1.0/tests/test_reconstructor_frame.py +49 -0
  122. tcr_explorer-0.1.0/tests/test_record_builder.py +49 -0
  123. tcr_explorer-0.1.0/tests/test_records_api.py +20 -0
  124. tcr_explorer-0.1.0/tests/test_records_build.py +83 -0
  125. tcr_explorer-0.1.0/tests/test_records_null_robustness.py +93 -0
  126. tcr_explorer-0.1.0/tests/test_records_retrieval.py +115 -0
  127. tcr_explorer-0.1.0/tests/test_requirements.py +143 -0
  128. tcr_explorer-0.1.0/tests/test_schema_descriptions.py +156 -0
  129. tcr_explorer-0.1.0/tests/test_search_hla_mhc.py +22 -0
  130. tcr_explorer-0.1.0/tests/test_similar_api.py +21 -0
  131. tcr_explorer-0.1.0/tests/test_similarity.py +71 -0
  132. tcr_explorer-0.1.0/tests/test_similarity_models.py +14 -0
  133. tcr_explorer-0.1.0/tests/test_tcr_align.py +215 -0
  134. tcr_explorer-0.1.0/tests/test_tempo_api_integration.py +76 -0
  135. tcr_explorer-0.1.0/tests/test_tempo_baseline.py +91 -0
  136. tcr_explorer-0.1.0/tests/test_tempo_batcave.py +47 -0
  137. tcr_explorer-0.1.0/tests/test_tempo_binary.py +73 -0
  138. tcr_explorer-0.1.0/tests/test_tempo_crossreact.py +145 -0
  139. tcr_explorer-0.1.0/tests/test_tempo_pv_fix.py +29 -0
  140. tcr_explorer-0.1.0/tests/test_tempo_scorer.py +78 -0
  141. tcr_explorer-0.1.0/tests/test_tempo_server.py +106 -0
  142. tcr_explorer-0.1.0/tests/test_tempo_tsp.py +61 -0
  143. tcr_explorer-0.1.0/tests/test_ui.py +27 -0
  144. tcr_explorer-0.1.0/tests/test_ui_browser.py +463 -0
  145. tcr_explorer-0.1.0/tests/test_unitcr_index_build.py +44 -0
  146. tcr_explorer-0.1.0/tests/test_validate_config.py +707 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kilian Maire
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,223 @@
1
+ Metadata-Version: 2.4
2
+ Name: tcr-explorer
3
+ Version: 0.1.0
4
+ Summary: Federated TCR analysis: records retrieval, germline allele assignment, reconstruction, dossiers, and similarity, over a web UI, REST API, and MCP server
5
+ Author: Kilian Maire
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/KilianMaire/tcr-explorer
8
+ Project-URL: Issues, https://github.com/KilianMaire/tcr-explorer/issues
9
+ Keywords: TCR,T-cell-receptor,immunology,AIRR,VDJ,germline,IMGT,VDJdb,IEDB,McPAS,bioinformatics,MCP,immune-repertoire,epitope
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: mcp<2,>=1.0.0
22
+ Requires-Dist: starlette<0.48,>=0.40
23
+ Requires-Dist: fastapi==0.116.1
24
+ Requires-Dist: uvicorn[standard]==0.35.0
25
+ Requires-Dist: pydantic==2.11.7
26
+ Requires-Dist: python-multipart==0.0.20
27
+ Requires-Dist: httpx==0.28.1
28
+ Requires-Dist: pandas>=2.0.0
29
+ Requires-Dist: pyarrow>=15.0.0
30
+ Requires-Dist: biopython==1.85
31
+ Requires-Dist: tidytcells>=2.0.0
32
+ Requires-Dist: platformdirs>=4
33
+ Requires-Dist: stitchr>=1.3
34
+ Provides-Extra: ui-legacy
35
+ Requires-Dist: streamlit>=1.35.0; extra == "ui-legacy"
36
+ Dynamic: license-file
37
+
38
+ # TCR Explorer
39
+
40
+ A federated tool for T cell receptor analysis. It retrieves known TCR records (VDJdb, IEDB, McPAS, TCR3d), assigns germline V and J genes down to the allele level, reconstructs full membrane bound chains, builds per receptor dossiers, and finds similar receptors. The same pure functions back a web UI, a REST API, and an MCP server, so an assistant can drive the whole tool.
41
+
42
+ ## How the data works
43
+
44
+ The package ships the IMGT germline (bundled under CC BY 4.0) but **no record datasets**. On first use you run `tcr-explorer-refresh` once. It downloads the four record datasets (VDJdb, IEDB, McPAS, TCR3d) from each source's own official endpoint into a local folder, then harmonizes them into a single records index. After that, everything runs in one process against that local index, offline, until you refresh again to pull fresh data.
45
+
46
+ This means the tool never redistributes the record datasets (their licenses vary): each user fetches those directly from the source under that source's own terms. The germline is different: IMGT is CC BY 4.0, which permits redistribution with attribution, so it is bundled and germline features work offline out of the box. To pull a newer IMGT germline yourself, run `tcr-explorer-refresh --germline`. See [Data sources](#data-sources).
47
+
48
+ ## Requirements
49
+
50
+ - Python 3.11 or newer.
51
+ - Internet access for the initial `tcr-explorer-refresh` (a few minutes, roughly 60 MB). Offline afterward until you refresh.
52
+
53
+ ## Install
54
+
55
+ ```bash
56
+ pip install tcr-explorer
57
+ ```
58
+
59
+ Or from a checkout:
60
+
61
+ ```bash
62
+ python -m venv .venv
63
+ source .venv/bin/activate # Windows: .venv\Scripts\activate
64
+ pip install -e .
65
+ ```
66
+
67
+ ## First run
68
+
69
+ ```bash
70
+ tcr-explorer-refresh
71
+ ```
72
+
73
+ This downloads the four record datasets and builds the index into a local data folder (a platform specific user data directory, or wherever `TCR_EXPLORER_DATA` points). The IMGT germline is already bundled, so this step does not touch IMGT. Re-run it any time to update the records. If a tool is used before the first refresh, it returns a clear message asking you to run this command.
74
+
75
+ To pull a fresher IMGT germline than the bundled one, run `tcr-explorer-refresh --germline` (needs IMGT/GENE-DB reachable). It writes into the local data folder, which the tool prefers over the bundled copy.
76
+
77
+ ## Use it
78
+
79
+ Two front doors, both a single process.
80
+
81
+ ### As an MCP server (recommended)
82
+
83
+ Point your own assistant at TCR Explorer over MCP and ask questions in plain language. This targets Claude Desktop and Claude Code, which run a local stdio MCP server. ChatGPT does not run local stdio MCP servers the same way, so use Claude for the paste and go flow. See [Connect your assistant](#connect-your-assistant).
84
+
85
+ ### As a web app and REST API
86
+
87
+ ```bash
88
+ PYTHONPATH=src uvicorn tcr_explorer.api:app --port 8000
89
+ ```
90
+
91
+ Open the query box at <http://localhost:8000/ui>, or call the REST API directly. Health check:
92
+
93
+ ```bash
94
+ curl http://localhost:8000/health # {"status":"ok"}
95
+ ```
96
+
97
+ ## Connect your assistant
98
+
99
+ TCR Explorer ships an MCP server (console entry point `tcr-explorer-mcp`). Add this to your assistant's MCP configuration:
100
+
101
+ ```json
102
+ {"mcpServers":{"tcr-explorer":{"command":"uvx","args":["--from","tcr-explorer","tcr-explorer-mcp"]}}}
103
+ ```
104
+
105
+ Or paste this prompt into Claude to have it set the connection up for you:
106
+
107
+ ```
108
+ Set up the TCR Explorer MCP server so you can answer T cell receptor questions against real immunology databases. First install it (pip install tcr-explorer, or use uvx --from tcr-explorer), then run tcr-explorer-refresh once in a terminal to download the datasets into a local folder (a few minutes). Then add an MCP server named tcr-explorer that runs `uvx --from tcr-explorer tcr-explorer-mcp` (if uvx is unavailable, run python -m tcr_explorer.mcp_server). It exposes these read only tools: retrieve_tcr_records, assign_tcr_alleles, get_tcr_dossier, find_similar_tcrs, align_tcr_genes, and ask_tcr. If a tool reports the data is not downloaded yet, tell me to run tcr-explorer-refresh. After adding it, confirm the connection and suggest three example questions I can ask.
109
+ ```
110
+
111
+ Until the package is on PyPI, the git form works: `uvx --from git+<your-repo-url> tcr-explorer-mcp`.
112
+
113
+ The read only MCP tools are `retrieve_tcr_records`, `assign_tcr_alleles`, `get_tcr_dossier`, `find_similar_tcrs`, `align_tcr_genes`, and `ask_tcr`.
114
+
115
+ ## REST API
116
+
117
+ All of these run in process against the local index.
118
+
119
+ ### Unified query box
120
+
121
+ **POST** `/v1/tcr/query` routes a single input (a CDR3, a full chain, a gene name, a record id, or a phrase) to the right tool.
122
+
123
+ ```bash
124
+ curl -s -X POST http://localhost:8000/v1/tcr/query \
125
+ -H "Content-Type: application/json" \
126
+ -d '{"query":"CASSLGGAGGTDTQYF","species":"human"}'
127
+ ```
128
+
129
+ ### Germline assignment
130
+
131
+ **POST** `/v1/tcr/assign` assigns a TCR sequence (nucleotide or amino acid, CDR3, region, or full chain) to V and J alleles, with per region identity, co optimal ties, CDR3 extraction, and an honest refusal to call a V allele from a bare CDR3.
132
+
133
+ ```bash
134
+ curl -s -X POST http://localhost:8000/v1/tcr/assign \
135
+ -H "Content-Type: application/json" \
136
+ -d '{"sequence":"CASSLGGAGGTDTQYF","species":"human"}'
137
+ ```
138
+
139
+ ### Records retrieval
140
+
141
+ **POST** `/v1/tcr/records` searches the harmonized records index.
142
+
143
+ ```bash
144
+ curl -s -X POST http://localhost:8000/v1/tcr/records \
145
+ -H "Content-Type: application/json" \
146
+ -d '{"cdr3":"CASSLGGAGGTDTQYF","species":"human","limit":20}'
147
+ ```
148
+
149
+ ### Chain reconstruction
150
+
151
+ **POST** `/reconstruct` builds a full membrane bound chain. Provide V, J, and CDR3, or a CDR3 alone (V and J are inferred from the records that carry the same CDR3).
152
+
153
+ ```bash
154
+ curl -s -X POST http://localhost:8000/reconstruct \
155
+ -H "Content-Type: application/json" \
156
+ -d '{"cdr3_aa":"CASSLGGAGGTDTQYF","species":"human"}'
157
+ ```
158
+
159
+ ### CDR1 and CDR2 prediction
160
+
161
+ **GET** `/predict/cdr` returns germline CDR1 and CDR2 for a TCR V gene from IMGT germline data.
162
+
163
+ ```bash
164
+ curl "http://localhost:8000/predict/cdr?v_gene=TRBV12-3&species=human"
165
+ ```
166
+
167
+ ## Optional: MHC allele sequences
168
+
169
+ The records index does not contain MHC allele sequences. If you want live lookup of those from EBI IMGT/HLA and IPD-MHC, start the optional hla and mhc proxies with one command:
170
+
171
+ ```bash
172
+ docker-compose up
173
+ ```
174
+
175
+ The `/search` endpoint is scoped to these two sources: `{"source": "hla"}` or `{"source": "mhc"}`. Any other source returns HTTP 400 pointing at `/v1/tcr/records`, which is where TCR record search lives.
176
+
177
+ ## Environment variables
178
+
179
+ All optional.
180
+
181
+ | Variable | Default | Description |
182
+ |----------|---------|-------------|
183
+ | `TCR_EXPLORER_DATA` | platform user data dir | Local folder where `tcr-explorer-refresh` downloads datasets and builds the index |
184
+ | `TCR_EXPLORER_MAX_AGE_DAYS` | `30` | Age after which the local index is flagged stale (a refresh is suggested in query warnings, never forced) |
185
+ | `RECORDS_INDEX_PATH` | `<data dir>/records_index.parquet` | Override the records index path directly |
186
+ | `HLA_SERVER_URL` | `http://127.0.0.1:8101` | HLA allele sequence proxy (optional) |
187
+ | `MHC_SERVER_URL` | `http://127.0.0.1:8105` | IPD-MHC allele sequence proxy (optional) |
188
+ | `LLM_BASE_URL` | *(empty)* | OpenAI compatible endpoint for the free text `ask` path (falls back to a heuristic parser when unset) |
189
+ | `LLM_MODEL` | `local-model` | Model id for the `ask` path |
190
+
191
+ ## Data sources
192
+
193
+ TCR Explorer cites the following. The four record datasets are downloaded on your machine from their official endpoints and are not redistributed; the IMGT germline is bundled with the package under CC BY 4.0. Please cite the ones you use.
194
+
195
+ - **VDJdb** (downloaded). Goncharov M. et al. VDJdb in the pandemic era: a compendium of T cell receptors specific for SARS-CoV-2. Nature Methods, 2022. <https://github.com/antigenomics/vdjdb-db>
196
+ - **IEDB** (downloaded, CC BY 4.0). Vita R. et al. The Immune Epitope Database (IEDB): 2024 update. Nucleic Acids Research, 2025. <https://www.iedb.org>
197
+ - **McPAS-TCR** (downloaded). Tickotsky N. et al. McPAS-TCR: a manually curated catalogue of pathology associated T cell receptor sequences. Bioinformatics, 2017. <https://friedmanlab.weizmann.ac.il/McPAS-TCR/>
198
+ - **TCR3d** (downloaded). Lin V. et al. TCR3d 2.0: expanding the T cell receptor structure database. Nucleic Acids Research, 2025. <https://tcr3d.ibbr.umd.edu>
199
+ - **IMGT germline** (bundled, CC BY 4.0, release 20268-7). Lefranc M-P. et al. IMGT, the international ImMunoGeneTics information system. Reformatted via stitchr and IMGTgeneDL (MIT). See `src/tcr_explorer/data/germline/ATTRIBUTION.md`. <https://www.imgt.org>
200
+
201
+ ## Run tests
202
+
203
+ ```bash
204
+ PYTHONPATH=src pytest tests/ -v
205
+ ```
206
+
207
+ ## Architecture
208
+
209
+ The core is a set of single source pure functions (records retrieval, germline assignment, reconstruction, dossiers, similarity) that read from the locally downloaded index. The REST API, the MCP server, and the web query box all call these same functions in one process.
210
+
211
+ ```
212
+ MCP server REST API + /ui query box
213
+ \ /
214
+ \ /
215
+ single source pure functions
216
+ (records, assign, reconstruct,
217
+ dossier, similar)
218
+ |
219
+ local index (built by tcr-explorer-refresh
220
+ from the downloaded records) + bundled IMGT germline
221
+ ```
222
+
223
+ IMGT (IMGT/HLA, IMGT/GENE-DB, IMGT germline, IMGT numbering) is a data source cited throughout. TCR Explorer is an independent tool and is not affiliated with IMGT.
@@ -0,0 +1,186 @@
1
+ # TCR Explorer
2
+
3
+ A federated tool for T cell receptor analysis. It retrieves known TCR records (VDJdb, IEDB, McPAS, TCR3d), assigns germline V and J genes down to the allele level, reconstructs full membrane bound chains, builds per receptor dossiers, and finds similar receptors. The same pure functions back a web UI, a REST API, and an MCP server, so an assistant can drive the whole tool.
4
+
5
+ ## How the data works
6
+
7
+ The package ships the IMGT germline (bundled under CC BY 4.0) but **no record datasets**. On first use you run `tcr-explorer-refresh` once. It downloads the four record datasets (VDJdb, IEDB, McPAS, TCR3d) from each source's own official endpoint into a local folder, then harmonizes them into a single records index. After that, everything runs in one process against that local index, offline, until you refresh again to pull fresh data.
8
+
9
+ This means the tool never redistributes the record datasets (their licenses vary): each user fetches those directly from the source under that source's own terms. The germline is different: IMGT is CC BY 4.0, which permits redistribution with attribution, so it is bundled and germline features work offline out of the box. To pull a newer IMGT germline yourself, run `tcr-explorer-refresh --germline`. See [Data sources](#data-sources).
10
+
11
+ ## Requirements
12
+
13
+ - Python 3.11 or newer.
14
+ - Internet access for the initial `tcr-explorer-refresh` (a few minutes, roughly 60 MB). Offline afterward until you refresh.
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install tcr-explorer
20
+ ```
21
+
22
+ Or from a checkout:
23
+
24
+ ```bash
25
+ python -m venv .venv
26
+ source .venv/bin/activate # Windows: .venv\Scripts\activate
27
+ pip install -e .
28
+ ```
29
+
30
+ ## First run
31
+
32
+ ```bash
33
+ tcr-explorer-refresh
34
+ ```
35
+
36
+ This downloads the four record datasets and builds the index into a local data folder (a platform specific user data directory, or wherever `TCR_EXPLORER_DATA` points). The IMGT germline is already bundled, so this step does not touch IMGT. Re-run it any time to update the records. If a tool is used before the first refresh, it returns a clear message asking you to run this command.
37
+
38
+ To pull a fresher IMGT germline than the bundled one, run `tcr-explorer-refresh --germline` (needs IMGT/GENE-DB reachable). It writes into the local data folder, which the tool prefers over the bundled copy.
39
+
40
+ ## Use it
41
+
42
+ Two front doors, both a single process.
43
+
44
+ ### As an MCP server (recommended)
45
+
46
+ Point your own assistant at TCR Explorer over MCP and ask questions in plain language. This targets Claude Desktop and Claude Code, which run a local stdio MCP server. ChatGPT does not run local stdio MCP servers the same way, so use Claude for the paste and go flow. See [Connect your assistant](#connect-your-assistant).
47
+
48
+ ### As a web app and REST API
49
+
50
+ ```bash
51
+ PYTHONPATH=src uvicorn tcr_explorer.api:app --port 8000
52
+ ```
53
+
54
+ Open the query box at <http://localhost:8000/ui>, or call the REST API directly. Health check:
55
+
56
+ ```bash
57
+ curl http://localhost:8000/health # {"status":"ok"}
58
+ ```
59
+
60
+ ## Connect your assistant
61
+
62
+ TCR Explorer ships an MCP server (console entry point `tcr-explorer-mcp`). Add this to your assistant's MCP configuration:
63
+
64
+ ```json
65
+ {"mcpServers":{"tcr-explorer":{"command":"uvx","args":["--from","tcr-explorer","tcr-explorer-mcp"]}}}
66
+ ```
67
+
68
+ Or paste this prompt into Claude to have it set the connection up for you:
69
+
70
+ ```
71
+ Set up the TCR Explorer MCP server so you can answer T cell receptor questions against real immunology databases. First install it (pip install tcr-explorer, or use uvx --from tcr-explorer), then run tcr-explorer-refresh once in a terminal to download the datasets into a local folder (a few minutes). Then add an MCP server named tcr-explorer that runs `uvx --from tcr-explorer tcr-explorer-mcp` (if uvx is unavailable, run python -m tcr_explorer.mcp_server). It exposes these read only tools: retrieve_tcr_records, assign_tcr_alleles, get_tcr_dossier, find_similar_tcrs, align_tcr_genes, and ask_tcr. If a tool reports the data is not downloaded yet, tell me to run tcr-explorer-refresh. After adding it, confirm the connection and suggest three example questions I can ask.
72
+ ```
73
+
74
+ Until the package is on PyPI, the git form works: `uvx --from git+<your-repo-url> tcr-explorer-mcp`.
75
+
76
+ The read only MCP tools are `retrieve_tcr_records`, `assign_tcr_alleles`, `get_tcr_dossier`, `find_similar_tcrs`, `align_tcr_genes`, and `ask_tcr`.
77
+
78
+ ## REST API
79
+
80
+ All of these run in process against the local index.
81
+
82
+ ### Unified query box
83
+
84
+ **POST** `/v1/tcr/query` routes a single input (a CDR3, a full chain, a gene name, a record id, or a phrase) to the right tool.
85
+
86
+ ```bash
87
+ curl -s -X POST http://localhost:8000/v1/tcr/query \
88
+ -H "Content-Type: application/json" \
89
+ -d '{"query":"CASSLGGAGGTDTQYF","species":"human"}'
90
+ ```
91
+
92
+ ### Germline assignment
93
+
94
+ **POST** `/v1/tcr/assign` assigns a TCR sequence (nucleotide or amino acid, CDR3, region, or full chain) to V and J alleles, with per region identity, co optimal ties, CDR3 extraction, and an honest refusal to call a V allele from a bare CDR3.
95
+
96
+ ```bash
97
+ curl -s -X POST http://localhost:8000/v1/tcr/assign \
98
+ -H "Content-Type: application/json" \
99
+ -d '{"sequence":"CASSLGGAGGTDTQYF","species":"human"}'
100
+ ```
101
+
102
+ ### Records retrieval
103
+
104
+ **POST** `/v1/tcr/records` searches the harmonized records index.
105
+
106
+ ```bash
107
+ curl -s -X POST http://localhost:8000/v1/tcr/records \
108
+ -H "Content-Type: application/json" \
109
+ -d '{"cdr3":"CASSLGGAGGTDTQYF","species":"human","limit":20}'
110
+ ```
111
+
112
+ ### Chain reconstruction
113
+
114
+ **POST** `/reconstruct` builds a full membrane bound chain. Provide V, J, and CDR3, or a CDR3 alone (V and J are inferred from the records that carry the same CDR3).
115
+
116
+ ```bash
117
+ curl -s -X POST http://localhost:8000/reconstruct \
118
+ -H "Content-Type: application/json" \
119
+ -d '{"cdr3_aa":"CASSLGGAGGTDTQYF","species":"human"}'
120
+ ```
121
+
122
+ ### CDR1 and CDR2 prediction
123
+
124
+ **GET** `/predict/cdr` returns germline CDR1 and CDR2 for a TCR V gene from IMGT germline data.
125
+
126
+ ```bash
127
+ curl "http://localhost:8000/predict/cdr?v_gene=TRBV12-3&species=human"
128
+ ```
129
+
130
+ ## Optional: MHC allele sequences
131
+
132
+ The records index does not contain MHC allele sequences. If you want live lookup of those from EBI IMGT/HLA and IPD-MHC, start the optional hla and mhc proxies with one command:
133
+
134
+ ```bash
135
+ docker-compose up
136
+ ```
137
+
138
+ The `/search` endpoint is scoped to these two sources: `{"source": "hla"}` or `{"source": "mhc"}`. Any other source returns HTTP 400 pointing at `/v1/tcr/records`, which is where TCR record search lives.
139
+
140
+ ## Environment variables
141
+
142
+ All optional.
143
+
144
+ | Variable | Default | Description |
145
+ |----------|---------|-------------|
146
+ | `TCR_EXPLORER_DATA` | platform user data dir | Local folder where `tcr-explorer-refresh` downloads datasets and builds the index |
147
+ | `TCR_EXPLORER_MAX_AGE_DAYS` | `30` | Age after which the local index is flagged stale (a refresh is suggested in query warnings, never forced) |
148
+ | `RECORDS_INDEX_PATH` | `<data dir>/records_index.parquet` | Override the records index path directly |
149
+ | `HLA_SERVER_URL` | `http://127.0.0.1:8101` | HLA allele sequence proxy (optional) |
150
+ | `MHC_SERVER_URL` | `http://127.0.0.1:8105` | IPD-MHC allele sequence proxy (optional) |
151
+ | `LLM_BASE_URL` | *(empty)* | OpenAI compatible endpoint for the free text `ask` path (falls back to a heuristic parser when unset) |
152
+ | `LLM_MODEL` | `local-model` | Model id for the `ask` path |
153
+
154
+ ## Data sources
155
+
156
+ TCR Explorer cites the following. The four record datasets are downloaded on your machine from their official endpoints and are not redistributed; the IMGT germline is bundled with the package under CC BY 4.0. Please cite the ones you use.
157
+
158
+ - **VDJdb** (downloaded). Goncharov M. et al. VDJdb in the pandemic era: a compendium of T cell receptors specific for SARS-CoV-2. Nature Methods, 2022. <https://github.com/antigenomics/vdjdb-db>
159
+ - **IEDB** (downloaded, CC BY 4.0). Vita R. et al. The Immune Epitope Database (IEDB): 2024 update. Nucleic Acids Research, 2025. <https://www.iedb.org>
160
+ - **McPAS-TCR** (downloaded). Tickotsky N. et al. McPAS-TCR: a manually curated catalogue of pathology associated T cell receptor sequences. Bioinformatics, 2017. <https://friedmanlab.weizmann.ac.il/McPAS-TCR/>
161
+ - **TCR3d** (downloaded). Lin V. et al. TCR3d 2.0: expanding the T cell receptor structure database. Nucleic Acids Research, 2025. <https://tcr3d.ibbr.umd.edu>
162
+ - **IMGT germline** (bundled, CC BY 4.0, release 20268-7). Lefranc M-P. et al. IMGT, the international ImMunoGeneTics information system. Reformatted via stitchr and IMGTgeneDL (MIT). See `src/tcr_explorer/data/germline/ATTRIBUTION.md`. <https://www.imgt.org>
163
+
164
+ ## Run tests
165
+
166
+ ```bash
167
+ PYTHONPATH=src pytest tests/ -v
168
+ ```
169
+
170
+ ## Architecture
171
+
172
+ The core is a set of single source pure functions (records retrieval, germline assignment, reconstruction, dossiers, similarity) that read from the locally downloaded index. The REST API, the MCP server, and the web query box all call these same functions in one process.
173
+
174
+ ```
175
+ MCP server REST API + /ui query box
176
+ \ /
177
+ \ /
178
+ single source pure functions
179
+ (records, assign, reconstruct,
180
+ dossier, similar)
181
+ |
182
+ local index (built by tcr-explorer-refresh
183
+ from the downloaded records) + bundled IMGT germline
184
+ ```
185
+
186
+ IMGT (IMGT/HLA, IMGT/GENE-DB, IMGT germline, IMGT numbering) is a data source cited throughout. TCR Explorer is an independent tool and is not affiliated with IMGT.
@@ -0,0 +1,78 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tcr-explorer"
7
+ version = "0.1.0"
8
+ description = "Federated TCR analysis: records retrieval, germline allele assignment, reconstruction, dossiers, and similarity, over a web UI, REST API, and MCP server"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [{ name = "Kilian Maire" }]
14
+ keywords = [
15
+ "TCR", "T-cell-receptor", "immunology", "AIRR", "VDJ", "germline",
16
+ "IMGT", "VDJdb", "IEDB", "McPAS", "bioinformatics", "MCP",
17
+ "immune-repertoire", "epitope",
18
+ ]
19
+ classifiers = [
20
+ "Development Status :: 4 - Beta",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Intended Audience :: Science/Research",
25
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
26
+ "Operating System :: OS Independent",
27
+ "Typing :: Typed",
28
+ ]
29
+ dependencies = [
30
+ "mcp>=1.0.0,<2",
31
+ "starlette>=0.40,<0.48",
32
+ "fastapi==0.116.1",
33
+ "uvicorn[standard]==0.35.0",
34
+ "pydantic==2.11.7",
35
+ "python-multipart==0.0.20",
36
+ "httpx==0.28.1",
37
+ "pandas>=2.0.0",
38
+ "pyarrow>=15.0.0",
39
+ "biopython==1.85",
40
+ "tidytcells>=2.0.0",
41
+ "platformdirs>=4",
42
+ "stitchr>=1.3",
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ ui-legacy = ["streamlit>=1.35.0"]
47
+
48
+ [project.scripts]
49
+ tcr-explorer-mcp = "tcr_explorer.mcp_server:main"
50
+ tcr-explorer-refresh = "tcr_explorer.bootstrap:main"
51
+
52
+ [project.urls]
53
+ Homepage = "https://github.com/KilianMaire/tcr-explorer"
54
+ Issues = "https://github.com/KilianMaire/tcr-explorer/issues"
55
+
56
+ [tool.setuptools]
57
+ package-dir = {"" = "src"}
58
+
59
+ [tool.setuptools.packages.find]
60
+ where = ["src"]
61
+
62
+ [tool.setuptools.package-data]
63
+ tcr_explorer = [
64
+ "py.typed",
65
+ "data/*.json",
66
+ "data/germline/*.md",
67
+ "data/germline/*/*.fasta",
68
+ "data/germline/*/*.tsv",
69
+ "data/germline/*/*.txt",
70
+ ]
71
+
72
+ [tool.pytest.ini_options]
73
+ testpaths = ["tests"]
74
+ pythonpath = ["src", "."]
75
+
76
+ [tool.ruff]
77
+ line-length = 100
78
+ target-version = "py311"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __all__ = []
@@ -0,0 +1,66 @@
1
+ """
2
+ Annotator interface: selects between IgBLAST (authoritative, deferred) and the
3
+ always-available k-mer aligner fallback (Task 3).
4
+
5
+ Selection rule: IgBLAST is only attempted when mode=="full", the input is
6
+ nucleotide (not protein), and the `igblastn` binary is present on PATH. The
7
+ actual subprocess invocation is deferred (`_run_igblast` is a stub returning
8
+ None), so this module currently always falls through to the k-mer backend;
9
+ the point of this module is the selection/fallback logic and the
10
+ `igblast_unavailable` warning, not the IgBLAST call itself.
11
+ """
12
+ from __future__ import annotations
13
+ import shutil
14
+ from dataclasses import dataclass, field
15
+ from typing import Optional
16
+ from .kmer_aligner import annotate_sequence, KmerAnnotation
17
+
18
+
19
+ @dataclass
20
+ class Annotation:
21
+ v_call: Optional[str] = None
22
+ j_call: Optional[str] = None
23
+ d_call: Optional[str] = None
24
+ v_score: Optional[float] = None
25
+ j_score: Optional[float] = None
26
+ chain: str = "unknown"
27
+ source: str = "kmer_align"
28
+ confidence: str = "low"
29
+ warnings: list[tuple[str, str]] = field(default_factory=list)
30
+
31
+
32
+ def igblast_available() -> bool:
33
+ return shutil.which("igblastn") is not None
34
+
35
+
36
+ def _run_igblast(seq: str, species: str) -> Optional[Annotation]:
37
+ # Deferred: real igblastn -outfmt 19 invocation + AIRR TSV parse.
38
+ # Returning None signals "not implemented / failed" so callers fall back.
39
+ return None
40
+
41
+
42
+ def _from_kmer(k: KmerAnnotation, source: str, confidence: str) -> Annotation:
43
+ return Annotation(
44
+ v_call=k.v_call, j_call=k.j_call, d_call=k.d_call,
45
+ v_score=k.v_score, j_score=k.j_score, chain=k.chain,
46
+ source=source, confidence=confidence, warnings=list(k.warnings),
47
+ )
48
+
49
+
50
+ def annotate(seq: str, species: str, is_protein: bool, mode: str) -> Annotation:
51
+ use_igblast = (mode == "full") and (not is_protein) and igblast_available()
52
+ if use_igblast:
53
+ res = _run_igblast(seq, species)
54
+ if res is not None:
55
+ res.source, res.confidence = "igblast", "high"
56
+ return res
57
+ # fell through: igblast present but failed
58
+
59
+ k = annotate_sequence(seq, species, is_protein)
60
+ ann = _from_kmer(k, "kmer_align", "medium" if not is_protein else "low")
61
+ if mode == "full" and not is_protein and not igblast_available():
62
+ ann.warnings.append((
63
+ "igblast_unavailable",
64
+ "igblastn not found on PATH; used the k-mer backend",
65
+ ))
66
+ return ann