stcrpy 1.0.3__tar.gz → 1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. stcrpy-1.0.5/PKG-INFO +285 -0
  2. stcrpy-1.0.5/README.md +258 -0
  3. stcrpy-1.0.5/pyproject.toml +48 -0
  4. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/__init__.py +1 -1
  5. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_formats/tcr_formats.py +20 -1
  6. stcrpy-1.0.5/stcrpy/tcr_geometry/TCRAngle.py +177 -0
  7. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/Acoreset.txt +30 -0
  8. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/Bcoreset.txt +30 -0
  9. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/pcA.txt +3 -0
  10. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/pcB.txt +3 -0
  11. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  12. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  13. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  14. stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  15. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_interactions/TCRInteractionProfiler.py +1 -1
  16. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_methods/tcr_batch_operations.py +14 -10
  17. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_methods/tcr_methods.py +23 -22
  18. stcrpy-1.0.5/stcrpy/tcr_metrics/tcr_dockq.py +404 -0
  19. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/MHC.py +389 -4
  20. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/TCR.py +252 -0
  21. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/TCRParser.py +349 -189
  22. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/annotate.py +6 -1
  23. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/utils/region_definitions.py +9 -0
  24. stcrpy-1.0.5/stcrpy/tcr_processing/utils/symmetry_mates.py +90 -0
  25. stcrpy-1.0.5/stcrpy.egg-info/PKG-INFO +285 -0
  26. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy.egg-info/SOURCES.txt +13 -3
  27. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy.egg-info/requires.txt +6 -0
  28. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy.egg-info/top_level.txt +2 -0
  29. stcrpy-1.0.5/test/test_symmetry_mates.py +11 -0
  30. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_datasets.py +1 -1
  31. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_formats.py +4 -2
  32. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_interactions.py +4 -4
  33. stcrpy-1.0.5/test/test_tcr_methods.py +18 -0
  34. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_metrics.py +53 -0
  35. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_processing.py +130 -8
  36. stcrpy-1.0.3/PKG-INFO +0 -173
  37. stcrpy-1.0.3/README.md +0 -145
  38. stcrpy-1.0.3/setup.py +0 -31
  39. stcrpy-1.0.3/stcrpy.egg-info/PKG-INFO +0 -173
  40. stcrpy-1.0.3/test/test_tcr_methods.py +0 -18
  41. {stcrpy-1.0.3 → stcrpy-1.0.5}/LICENCE +0 -0
  42. {stcrpy-1.0.3 → stcrpy-1.0.5}/examples/__init__.py +0 -0
  43. {stcrpy-1.0.3 → stcrpy-1.0.5}/examples/egnn.py +0 -0
  44. {stcrpy-1.0.3 → stcrpy-1.0.5}/setup.cfg +0 -0
  45. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_datasets/__init__.py +0 -0
  46. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_datasets/tcr_graph_dataset.py +0 -0
  47. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_datasets/tcr_selector.py +0 -0
  48. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  49. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_datasets/utils.py +0 -0
  50. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_formats/__init__.py +0 -0
  51. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_formats/tcr_haddock.py +0 -0
  52. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/TCRCoM.py +0 -0
  53. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/TCRCoM_LICENCE +0 -0
  54. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/TCRDock.py +0 -0
  55. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/TCRGeom.py +0 -0
  56. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/TCRGeomFiltering.py +0 -0
  57. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/__init__.py +0 -0
  58. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  59. /stcrpy-1.0.3/stcrpy/tcr_geometry/reference_data/reference_A.pdb → /stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/consensus_A.pdb +0 -0
  60. /stcrpy-1.0.3/stcrpy/tcr_geometry/reference_data/reference_B.pdb → /stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/consensus_B.pdb +0 -0
  61. /stcrpy-1.0.3/stcrpy/tcr_geometry/reference_data/reference_D.pdb → /stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/consensus_D.pdb +0 -0
  62. /stcrpy-1.0.3/stcrpy/tcr_geometry/reference_data/reference_G.pdb → /stcrpy-1.0.5/stcrpy/tcr_geometry/reference_data/consensus_G.pdb +0 -0
  63. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +0 -0
  64. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +0 -0
  65. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_geometry/reference_data/reference_data.py +0 -0
  66. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_interactions/PLIPParser.py +0 -0
  67. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +0 -0
  68. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_interactions/__init__.py +0 -0
  69. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_interactions/utils.py +0 -0
  70. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_methods/__init__.py +0 -0
  71. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_methods/tcr_reformatting.py +0 -0
  72. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_metrics/__init__.py +0 -0
  73. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_metrics/constants.py +0 -0
  74. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_metrics/tcr_interface_rmsd.py +0 -0
  75. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_metrics/tcr_rmsd.py +0 -0
  76. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_ml/__init__.py +0 -0
  77. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_ml/geometry_predictor.py +0 -0
  78. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/AGchain.py +0 -0
  79. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/Chemical_components.py +0 -0
  80. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/Entity.py +0 -0
  81. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/Fragment.py +0 -0
  82. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/Holder.py +0 -0
  83. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/MHCchain.py +0 -0
  84. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/Model.py +0 -0
  85. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/Select.py +0 -0
  86. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/TCRIO.py +0 -0
  87. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/TCRStructure.py +0 -0
  88. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/TCRchain.py +0 -0
  89. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/__init__.py +0 -0
  90. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/utils/__init__.py +0 -0
  91. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/utils/common.py +0 -0
  92. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/tcr_processing/utils/constants.py +0 -0
  93. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/utils/__init__.py +0 -0
  94. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy/utils/error_stream.py +0 -0
  95. {stcrpy-1.0.3 → stcrpy-1.0.5}/stcrpy.egg-info/dependency_links.txt +0 -0
  96. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_annotations.py +0 -0
  97. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_geometry.py +0 -0
  98. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_geometry_filters.py +0 -0
  99. {stcrpy-1.0.3 → stcrpy-1.0.5}/test/test_tcr_sequence_operations.py +0 -0
stcrpy-1.0.5/PKG-INFO ADDED
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: stcrpy
3
+ Version: 1.0.5
4
+ Summary: Set of methods to parse, annotate, and calculate features of TCR structures
5
+ Maintainer-email: Nele Quast <quast@stats.ox.ac.uk>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENCE
9
+ License-File: stcrpy/tcr_geometry/TCRCoM_LICENCE
10
+ Requires-Dist: biopython
11
+ Requires-Dist: numpy==1.26.4
12
+ Requires-Dist: lxml
13
+ Requires-Dist: openbabel-wheel==3.1.1.21
14
+ Requires-Dist: rdkit
15
+ Requires-Dist: anarci-mhc
16
+ Requires-Dist: pandas
17
+ Requires-Dist: matplotlib
18
+ Requires-Dist: scipy
19
+ Requires-Dist: requests
20
+ Requires-Dist: scikit-learn
21
+ Requires-Dist: DockQ
22
+ Provides-Extra: ml-datasets
23
+ Requires-Dist: einops; extra == "ml-datasets"
24
+ Requires-Dist: torch; extra == "ml-datasets"
25
+ Requires-Dist: torch_geometric; extra == "ml-datasets"
26
+ Dynamic: license-file
27
+
28
+
29
+
30
+ <img src="./stcrpy_logo.png" alt="drawing" width="300"/>
31
+
32
+
33
+ # STCRpy
34
+ [![stcrpy installation](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml)
35
+ [![stcrpy unittests](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml)
36
+ [![stcrpy_docs](https://readthedocs.org/projects/stcrpy/badge/?version=latest)](https://stcrpy.readthedocs.io/en/latest/)
37
+
38
+
39
+ Structural TCR python (STCRpy) is a software suite for analysing and processing T-cell receptor structures.
40
+
41
+ Please feel free to reach out with any comments or feedback.
42
+
43
+ Under review, please cite:
44
+
45
+ **Quast, N. , Deane, C., & Raybould, M. (2025). STCRpy: a software suite for TCR:pMHC structure parsing, interaction profiling, and machine learning dataset preparation. BioRxiv. https://doi.org/10.1101/2025.04.25.650667**
46
+
47
+ <img src="./stcrpy_main_fig.png" alt="drawing" width="1500"/>
48
+
49
+
50
+
51
+ # Installation
52
+
53
+ ## TL;DR installation
54
+ ```
55
+ pip install stcrpy
56
+ pip install plip
57
+ conda install pymol-open-source -y
58
+ ANARCI --build_models # this step will take a few minutes
59
+ ```
60
+
61
+ ## Step by step installation
62
+ We recommend installing STCRpy in a [conda](https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation) (or [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)) environment using python 3.9 to 3.12. You can also use a python virtual environment if you do not need pymol visualisations.
63
+
64
+ <details> <summary>conda</summary>
65
+
66
+ ```
67
+ conda create -n stcrpy_env python==3.12 -y
68
+ conda activate stcrpy_env
69
+ ```
70
+
71
+ </details>
72
+ <details> <summary>mamba</summary>
73
+
74
+ ```
75
+ mamba create -n stcrpy_env python==3.12 -y
76
+ mamba activate stcrpy_env
77
+ ```
78
+
79
+ </details>
80
+ <details> <summary>venv</summary>
81
+
82
+ ```
83
+ python -m venv stcrpy_env
84
+ source stcrpy_env/bin/activate
85
+ ```
86
+
87
+ </details>
88
+
89
+
90
+ The core functionality of STCRpy can be installed as follows:
91
+ ```
92
+ pip install stcrpy
93
+ ```
94
+
95
+ After installing stcrpy, the anarci HMM models must be built to enable annotation.
96
+ ```
97
+ ANARCI --build_models # this step will take a few minutes
98
+ ```
99
+
100
+ To enable interaction profiling, install PLIP (Adasme et. al., 2021):
101
+ ```
102
+ pip install plip
103
+ ```
104
+
105
+ To enable pymol visualisations, install pymol open source locally within the environment. Unfortunately, pymol currently needs to be installed even if you already have a pymol version. Be sure to install pymol within a managed conda (or mamba) environment to prevent interference with any existing versions.
106
+ ```
107
+ conda install pymol-open-source -y
108
+ ```
109
+
110
+ To generate pytorch and pytorch-geometric compatible datasets (see the [pytorch docs](https://pytorch.org/get-started/locally/) for hardware specific instructions):
111
+ ```
112
+ pip install stcrpy[ml_datasets]
113
+ ```
114
+
115
+ > Note that the installs for pytorch can be platform specific.
116
+ > If errors are ecountered here it is best to manually install the depedencies following the [pytorch installation docs](https://pytorch.org/get-started/locally/).
117
+ > For example:
118
+ > ```
119
+ > pip install torch --index-url https://download.pytorch.org/whl/cpu
120
+ > pip install torch_geometric
121
+ > ```
122
+ > This installs the CPU version of pytorch (for GPU / CUDA versions follow the install [pytorch installation docs](https://pytorch.org/get-started/locally/)).
123
+ >
124
+ > The EGNN example also uses `einops`. Which can be manually installed as follows:
125
+ > ```
126
+ > pip install einops
127
+ > ```
128
+
129
+ # Documentation
130
+ STCRpy [documentation](https://stcrpy.readthedocs.io/en/latest/) is hosted on ReadtheDocs.
131
+
132
+ # Examples
133
+ STCRpy generates and operates on TCR structure objects. The majority of the API can be accessed through functions of the format: `tcr.some_stcrpy_function()`. ([See TCR object docs here](https://stcrpy.readthedocs.io/en/latest/stcrpy.tcr_processing.html#stcrpy.tcr_processing.TCR.TCR)). TCR objects are associated with their MHC and antigen if these are presented in the structure.
134
+
135
+ A notebook with examples can be found under [examples/STCRpy_examples.ipynb](./examples/STCRpy_examples.ipynb)
136
+
137
+ First import STCRpy:
138
+ ```
139
+ import stcrpy
140
+ ```
141
+
142
+ ### To fetch a TCR structure from STCRDab or the PDB:
143
+ ```
144
+ multiple_tcrs = stcrpy.fetch_TCRs("8gvb")
145
+ ```
146
+ This will return a list of all of the TCR structures found in the PDB file, represented as TCR structure objects.
147
+
148
+ ### To load a TCR structure from a PDB or MMCIF file:
149
+ ```
150
+ tcr = stcrpy.load_TCR("filename.{pdb, cif}")
151
+ ```
152
+
153
+ ### To load multiple TCR structures from a list of files at once:
154
+ ```
155
+ multiple_tcrs = stcrpy.load_TCRs([file_1, file_2, file_3])
156
+ ```
157
+
158
+ ### To save a TCR object to PDB or MMCIF files:
159
+ ```
160
+ tcr.save(filename.{pdb, cif}) # save the TCR and it's associated MHC and antigen
161
+ tcr.save(filename.{pdb, cif}, TCR_only=True) # save the TCR only
162
+ ```
163
+
164
+ ### To calculate the TCR to pMHC geometry:
165
+ ```
166
+ tcr.calculate_geometry() # change the 'mode' keyword argument to change the geometry calculation method. See paper / documentation for details.
167
+ ```
168
+
169
+ ### To score the TCR to pMHC geometry:
170
+ ```
171
+ tcr.score_docking_geometry()
172
+ ```
173
+
174
+ ### To profile interactions:
175
+ ```
176
+ tcr.profile_peptide_interactions() # interaction profiling parameters can be adjusted, see documentation for details
177
+ ```
178
+
179
+ ### To visualise interactions:
180
+ ```
181
+ tcr.visualise_interactions()
182
+ ```
183
+
184
+ ### To run full analysis on a set of TCR structures:
185
+ ```
186
+ from stcrpy.tcr_methods.tcr_batch_operations import analyse_tcrs
187
+ germlines_and_alleles_df, geometries_df, interactions_df = analyse_tcrs(list_or_dict_of_files)
188
+ ```
189
+
190
+ ### To generate graph datasets:
191
+ ```
192
+ dataset = TCRGraphDataset(
193
+ root=PATH_TO_DATASET,
194
+ data_paths=PATH_TO_TCR_FILES
195
+ )
196
+ ```
197
+
198
+ ### To calculate TCR prediction metrics such as RMSD, interface RMSD (of the TCR:pMHC interface) or DockQ scores:
199
+
200
+ ```
201
+ # RMSD
202
+ from stcrpy.tcr_metrics import RMSD
203
+
204
+ rmsd_calculator = RMSD()
205
+ rmsd = rmsd_calculator.calculate_rmsd(pred_tcr, reference_tcr, save_alignment=False) # Calculates the RMSD of each region of the TCR. To check the alignment set save_alignment to True.
206
+
207
+ # To calculate RMSD for a set of predictions against a set of reference structures from files:
208
+ files = list(zip(prediction_files, reference_files))
209
+ rmsd_df = rmsd_calculator.rmsd_from_files(files)
210
+
211
+
212
+
213
+ # Interface RMSD of TCR:pMHC interface
214
+ from stcrpy.tcr_metrics import InterfaceRMSD
215
+
216
+ interface_rmsd_calculator = InterfaceRMSD()
217
+ irmsds = interface_rmsd_calculator.get_interface_rmsd(tcr, reference_tcr)
218
+
219
+ # DockQ
220
+ from stcrpy.tcr_metrics.tcr_dockq import TCRDockQ
221
+
222
+ dockq_calculator = TCRDockQ() # by default this will merge the TCR and pMHC chains and calculate DockQ of the complete TCR:pMHC interface. To calculate DockQ scores per chain, use TCR_pMHC_interface=False
223
+ dockq_results = dockq_calculator.tcr_dockq(tcr, reference_tcr, save_merged_complex=False) # to investigate the merged TCR:pMHC structure set save_merged_complex=True
224
+
225
+ ```
226
+
227
+ ### Torsion angles and internal coordinates
228
+ STCRpy builds upon the Biopython PDB module, and you can calculate the internal coordinates, such as backbone torsion angles, using the [`internal_coordinates` function](https://biopython.org/docs/dev/api/Bio.PDB.internal_coords.html).
229
+
230
+ ```
231
+ # internal coordinate calculations should be made per chain
232
+ for c in tcr.get_chains():
233
+ c.atom_to_internal_coordinates() # calculate the internal coordinates
234
+
235
+ # internal coordinates can be accessed per residue:
236
+ res = next(tcr.get_residues())
237
+ res.internal_coord.get_angle("psi") # retrieve angles via angle keys
238
+ ```
239
+
240
+ ### Domain angles between TCR chains
241
+ STCRpy can be used to calculate the geometry and angles between the TCR variable domains of abTCRs and gdTCRs. This follows the ABangle implementation [(Dunbar et al. 2013)](https://academic.oup.com/peds/article/26/10/611/1509255).
242
+ ```
243
+ tcr.get_TCR_angles()
244
+
245
+ # returns dictionary of TCR domain angles and measurements.
246
+ # For example:
247
+ # {
248
+ # 'BA': -56.72234454750631,
249
+ # 'BC1': 122.55277240895967,
250
+ # 'AC1': 73.96532018128327,
251
+ # 'BC2': 82.63524566165464,
252
+ # 'AC2': 99.60327202896609,
253
+ # 'dc': np.float64(15.606353954437227)
254
+ # }
255
+
256
+ ```
257
+
258
+
259
+ # Symmetry mate handling
260
+ Some TCR:pMHC crystals are formed of repeating cell units in which the TCR and the antigen do not directly contact.
261
+ STCRpy generates symmetry mates in these cases to pair pMHC with TCRs in the structure.
262
+ Note that symmetry mate generation requires pymol to be installed. By default, symmetry mate generation is enabled, however, it can be toggled by setting:
263
+ `include_symmetry_mates=False` in `get_tcr_structure`.
264
+
265
+ ## Example:
266
+ ```
267
+ tcr_6ulr_paired_antigen = stcrpy.fetch_TCRs("6ulr")
268
+ tcr_6ulr_no_antigen = stcrpy.fetch_TCRs("6ulr", include_symmetry_mates=False) # does not generate symmetry mates
269
+
270
+ ```
271
+
272
+
273
+
274
+ # Limitations
275
+
276
+ ## Connected peptide chains
277
+ STCRpy is currently not configured to handle cases where the antigen peptide is connected to the TCR or MHC chain - this is primarily because the parsing pipeline operates on chain objects and it can be tricky to consistently separate the peptide segment from the remainder of the TCR chain. A known case is PDB code 6MNO.
278
+
279
+ ## Gamma-Delta TCR geometry
280
+ STCRpy supports gamma-delta TCR parsing, interaction profiling and visusalisation, but is not currently configured to calculate gd-TCR geometry.
281
+
282
+ ## MHC Class II geometry scoring
283
+ STCRpy can be used to calculate and characterise the geometries of TCRs to MHC class II antigen, however, due to the smaller number of complexes we have not fit parametric distributions to the geometry features, which means it is not possible to calculate a geometry score.
284
+
285
+
stcrpy-1.0.5/README.md ADDED
@@ -0,0 +1,258 @@
1
+
2
+
3
+ <img src="./stcrpy_logo.png" alt="drawing" width="300"/>
4
+
5
+
6
+ # STCRpy
7
+ [![stcrpy installation](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml)
8
+ [![stcrpy unittests](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml)
9
+ [![stcrpy_docs](https://readthedocs.org/projects/stcrpy/badge/?version=latest)](https://stcrpy.readthedocs.io/en/latest/)
10
+
11
+
12
+ Structural TCR python (STCRpy) is a software suite for analysing and processing T-cell receptor structures.
13
+
14
+ Please feel free to reach out with any comments or feedback.
15
+
16
+ Under review, please cite:
17
+
18
+ **Quast, N. , Deane, C., & Raybould, M. (2025). STCRpy: a software suite for TCR:pMHC structure parsing, interaction profiling, and machine learning dataset preparation. BioRxiv. https://doi.org/10.1101/2025.04.25.650667**
19
+
20
+ <img src="./stcrpy_main_fig.png" alt="drawing" width="1500"/>
21
+
22
+
23
+
24
+ # Installation
25
+
26
+ ## TL;DR installation
27
+ ```
28
+ pip install stcrpy
29
+ pip install plip
30
+ conda install pymol-open-source -y
31
+ ANARCI --build_models # this step will take a few minutes
32
+ ```
33
+
34
+ ## Step by step installation
35
+ We recommend installing STCRpy in a [conda](https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation) (or [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)) environment using python 3.9 to 3.12. You can also use a python virtual environment if you do not need pymol visualisations.
36
+
37
+ <details> <summary>conda</summary>
38
+
39
+ ```
40
+ conda create -n stcrpy_env python==3.12 -y
41
+ conda activate stcrpy_env
42
+ ```
43
+
44
+ </details>
45
+ <details> <summary>mamba</summary>
46
+
47
+ ```
48
+ mamba create -n stcrpy_env python==3.12 -y
49
+ mamba activate stcrpy_env
50
+ ```
51
+
52
+ </details>
53
+ <details> <summary>venv</summary>
54
+
55
+ ```
56
+ python -m venv stcrpy_env
57
+ source stcrpy_env/bin/activate
58
+ ```
59
+
60
+ </details>
61
+
62
+
63
+ The core functionality of STCRpy can be installed as follows:
64
+ ```
65
+ pip install stcrpy
66
+ ```
67
+
68
+ After installing stcrpy, the anarci HMM models must be built to enable annotation.
69
+ ```
70
+ ANARCI --build_models # this step will take a few minutes
71
+ ```
72
+
73
+ To enable interaction profiling, install PLIP (Adasme et. al., 2021):
74
+ ```
75
+ pip install plip
76
+ ```
77
+
78
+ To enable pymol visualisations, install pymol open source locally within the environment. Unfortunately, pymol currently needs to be installed even if you already have a pymol version. Be sure to install pymol within a managed conda (or mamba) environment to prevent interference with any existing versions.
79
+ ```
80
+ conda install pymol-open-source -y
81
+ ```
82
+
83
+ To generate pytorch and pytorch-geometric compatible datasets (see the [pytorch docs](https://pytorch.org/get-started/locally/) for hardware specific instructions):
84
+ ```
85
+ pip install stcrpy[ml_datasets]
86
+ ```
87
+
88
+ > Note that the installs for pytorch can be platform specific.
89
+ > If errors are ecountered here it is best to manually install the depedencies following the [pytorch installation docs](https://pytorch.org/get-started/locally/).
90
+ > For example:
91
+ > ```
92
+ > pip install torch --index-url https://download.pytorch.org/whl/cpu
93
+ > pip install torch_geometric
94
+ > ```
95
+ > This installs the CPU version of pytorch (for GPU / CUDA versions follow the install [pytorch installation docs](https://pytorch.org/get-started/locally/)).
96
+ >
97
+ > The EGNN example also uses `einops`. Which can be manually installed as follows:
98
+ > ```
99
+ > pip install einops
100
+ > ```
101
+
102
+ # Documentation
103
+ STCRpy [documentation](https://stcrpy.readthedocs.io/en/latest/) is hosted on ReadtheDocs.
104
+
105
+ # Examples
106
+ STCRpy generates and operates on TCR structure objects. The majority of the API can be accessed through functions of the format: `tcr.some_stcrpy_function()`. ([See TCR object docs here](https://stcrpy.readthedocs.io/en/latest/stcrpy.tcr_processing.html#stcrpy.tcr_processing.TCR.TCR)). TCR objects are associated with their MHC and antigen if these are presented in the structure.
107
+
108
+ A notebook with examples can be found under [examples/STCRpy_examples.ipynb](./examples/STCRpy_examples.ipynb)
109
+
110
+ First import STCRpy:
111
+ ```
112
+ import stcrpy
113
+ ```
114
+
115
+ ### To fetch a TCR structure from STCRDab or the PDB:
116
+ ```
117
+ multiple_tcrs = stcrpy.fetch_TCRs("8gvb")
118
+ ```
119
+ This will return a list of all of the TCR structures found in the PDB file, represented as TCR structure objects.
120
+
121
+ ### To load a TCR structure from a PDB or MMCIF file:
122
+ ```
123
+ tcr = stcrpy.load_TCR("filename.{pdb, cif}")
124
+ ```
125
+
126
+ ### To load multiple TCR structures from a list of files at once:
127
+ ```
128
+ multiple_tcrs = stcrpy.load_TCRs([file_1, file_2, file_3])
129
+ ```
130
+
131
+ ### To save a TCR object to PDB or MMCIF files:
132
+ ```
133
+ tcr.save(filename.{pdb, cif}) # save the TCR and it's associated MHC and antigen
134
+ tcr.save(filename.{pdb, cif}, TCR_only=True) # save the TCR only
135
+ ```
136
+
137
+ ### To calculate the TCR to pMHC geometry:
138
+ ```
139
+ tcr.calculate_geometry() # change the 'mode' keyword argument to change the geometry calculation method. See paper / documentation for details.
140
+ ```
141
+
142
+ ### To score the TCR to pMHC geometry:
143
+ ```
144
+ tcr.score_docking_geometry()
145
+ ```
146
+
147
+ ### To profile interactions:
148
+ ```
149
+ tcr.profile_peptide_interactions() # interaction profiling parameters can be adjusted, see documentation for details
150
+ ```
151
+
152
+ ### To visualise interactions:
153
+ ```
154
+ tcr.visualise_interactions()
155
+ ```
156
+
157
+ ### To run full analysis on a set of TCR structures:
158
+ ```
159
+ from stcrpy.tcr_methods.tcr_batch_operations import analyse_tcrs
160
+ germlines_and_alleles_df, geometries_df, interactions_df = analyse_tcrs(list_or_dict_of_files)
161
+ ```
162
+
163
+ ### To generate graph datasets:
164
+ ```
165
+ dataset = TCRGraphDataset(
166
+ root=PATH_TO_DATASET,
167
+ data_paths=PATH_TO_TCR_FILES
168
+ )
169
+ ```
170
+
171
+ ### To calculate TCR prediction metrics such as RMSD, interface RMSD (of the TCR:pMHC interface) or DockQ scores:
172
+
173
+ ```
174
+ # RMSD
175
+ from stcrpy.tcr_metrics import RMSD
176
+
177
+ rmsd_calculator = RMSD()
178
+ rmsd = rmsd_calculator.calculate_rmsd(pred_tcr, reference_tcr, save_alignment=False) # Calculates the RMSD of each region of the TCR. To check the alignment set save_alignment to True.
179
+
180
+ # To calculate RMSD for a set of predictions against a set of reference structures from files:
181
+ files = list(zip(prediction_files, reference_files))
182
+ rmsd_df = rmsd_calculator.rmsd_from_files(files)
183
+
184
+
185
+
186
+ # Interface RMSD of TCR:pMHC interface
187
+ from stcrpy.tcr_metrics import InterfaceRMSD
188
+
189
+ interface_rmsd_calculator = InterfaceRMSD()
190
+ irmsds = interface_rmsd_calculator.get_interface_rmsd(tcr, reference_tcr)
191
+
192
+ # DockQ
193
+ from stcrpy.tcr_metrics.tcr_dockq import TCRDockQ
194
+
195
+ dockq_calculator = TCRDockQ() # by default this will merge the TCR and pMHC chains and calculate DockQ of the complete TCR:pMHC interface. To calculate DockQ scores per chain, use TCR_pMHC_interface=False
196
+ dockq_results = dockq_calculator.tcr_dockq(tcr, reference_tcr, save_merged_complex=False) # to investigate the merged TCR:pMHC structure set save_merged_complex=True
197
+
198
+ ```
199
+
200
+ ### Torsion angles and internal coordinates
201
+ STCRpy builds upon the Biopython PDB module, and you can calculate the internal coordinates, such as backbone torsion angles, using the [`internal_coordinates` function](https://biopython.org/docs/dev/api/Bio.PDB.internal_coords.html).
202
+
203
+ ```
204
+ # internal coordinate calculations should be made per chain
205
+ for c in tcr.get_chains():
206
+ c.atom_to_internal_coordinates() # calculate the internal coordinates
207
+
208
+ # internal coordinates can be accessed per residue:
209
+ res = next(tcr.get_residues())
210
+ res.internal_coord.get_angle("psi") # retrieve angles via angle keys
211
+ ```
212
+
213
+ ### Domain angles between TCR chains
214
+ STCRpy can be used to calculate the geometry and angles between the TCR variable domains of abTCRs and gdTCRs. This follows the ABangle implementation [(Dunbar et al. 2013)](https://academic.oup.com/peds/article/26/10/611/1509255).
215
+ ```
216
+ tcr.get_TCR_angles()
217
+
218
+ # returns dictionary of TCR domain angles and measurements.
219
+ # For example:
220
+ # {
221
+ # 'BA': -56.72234454750631,
222
+ # 'BC1': 122.55277240895967,
223
+ # 'AC1': 73.96532018128327,
224
+ # 'BC2': 82.63524566165464,
225
+ # 'AC2': 99.60327202896609,
226
+ # 'dc': np.float64(15.606353954437227)
227
+ # }
228
+
229
+ ```
230
+
231
+
232
+ # Symmetry mate handling
233
+ Some TCR:pMHC crystals are formed of repeating cell units in which the TCR and the antigen do not directly contact.
234
+ STCRpy generates symmetry mates in these cases to pair pMHC with TCRs in the structure.
235
+ Note that symmetry mate generation requires pymol to be installed. By default, symmetry mate generation is enabled, however, it can be toggled by setting:
236
+ `include_symmetry_mates=False` in `get_tcr_structure`.
237
+
238
+ ## Example:
239
+ ```
240
+ tcr_6ulr_paired_antigen = stcrpy.fetch_TCRs("6ulr")
241
+ tcr_6ulr_no_antigen = stcrpy.fetch_TCRs("6ulr", include_symmetry_mates=False) # does not generate symmetry mates
242
+
243
+ ```
244
+
245
+
246
+
247
+ # Limitations
248
+
249
+ ## Connected peptide chains
250
+ STCRpy is currently not configured to handle cases where the antigen peptide is connected to the TCR or MHC chain - this is primarily because the parsing pipeline operates on chain objects and it can be tricky to consistently separate the peptide segment from the remainder of the TCR chain. A known case is PDB code 6MNO.
251
+
252
+ ## Gamma-Delta TCR geometry
253
+ STCRpy supports gamma-delta TCR parsing, interaction profiling and visusalisation, but is not currently configured to calculate gd-TCR geometry.
254
+
255
+ ## MHC Class II geometry scoring
256
+ STCRpy can be used to calculate and characterise the geometries of TCRs to MHC class II antigen, however, due to the smaller number of complexes we have not fit parametric distributions to the geometry features, which means it is not possible to calculate a geometry score.
257
+
258
+
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "stcrpy"
7
+ version = "1.0.5"
8
+ description = "Set of methods to parse, annotate, and calculate features of TCR structures"
9
+ readme = "README.md"
10
+ license-files = [
11
+ "LICENCE",
12
+ "stcrpy/tcr_geometry/TCRCoM_LICENCE",
13
+ ]
14
+ maintainers = [
15
+ {name = "Nele Quast", email = "quast@stats.ox.ac.uk"}
16
+ ]
17
+ requires-python = ">=3.10"
18
+ dependencies = [
19
+ "biopython",
20
+ "numpy==1.26.4",
21
+ "lxml",
22
+ "openbabel-wheel==3.1.1.21",
23
+ "rdkit",
24
+ "anarci-mhc",
25
+ "pandas",
26
+ "matplotlib",
27
+ "scipy",
28
+ "requests",
29
+ "scikit-learn",
30
+ "DockQ",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ ml_datasets = [
35
+ "einops",
36
+ "torch",
37
+ "torch_geometric",
38
+ ]
39
+
40
+ [tool.setuptools]
41
+ include-package-data = true
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["."]
45
+ exclude = ["test", "test.*"]
46
+
47
+ [tool.setuptools.package-data]
48
+ stcrpy = ["tcr_geometry/reference_data/*", ]
@@ -2,4 +2,4 @@ from .tcr_processing.TCRParser import TCRParser
2
2
  from .tcr_processing.TCRIO import TCRIO
3
3
  from .tcr_geometry.TCRDock import TCRDock
4
4
  from .tcr_geometry.TCRGeom import TCRGeom
5
- from .tcr_methods.tcr_methods import load_TCRs, fetch_TCR, yield_TCRs, load_TCR
5
+ from .tcr_methods.tcr_methods import load_TCRs, fetch_TCRs, yield_TCRs, load_TCR
@@ -99,7 +99,7 @@ def get_sequences(
99
99
  for chain in entity.get_chains()
100
100
  }
101
101
  except AttributeError as e:
102
- if entity.level == "C":
102
+ if entity.level == "C" or entity.level == "F": # covers chains and fragments
103
103
  sequences = {
104
104
  entity.id: seq1(
105
105
  "".join(
@@ -112,3 +112,22 @@ def get_sequences(
112
112
  if amino_acids_only:
113
113
  sequences = {k: seq.replace("X", "") for k, seq in sequences.items()}
114
114
  return sequences
115
+
116
+
117
+ def merge_chains(chains, new_chain_id=None):
118
+ from Bio import PDB
119
+
120
+ if new_chain_id is None:
121
+ new_chain_id = f"{chains[0].id}_{chains[1].id}"
122
+ new_chain = PDB.Chain.Chain(new_chain_id)
123
+ new_res_id = 1
124
+
125
+ for chain in chains:
126
+ for residue in chain.get_residues():
127
+ new_residue = residue.copy()
128
+ new_residue.id = (" ", new_res_id, " ")
129
+
130
+ new_chain.add(new_residue)
131
+ new_res_id += 1
132
+
133
+ return new_chain