stcrpy 1.0.0__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. stcrpy/__init__.py +1 -1
  2. stcrpy/tcr_formats/tcr_formats.py +20 -1
  3. stcrpy/tcr_geometry/TCRAngle.py +177 -0
  4. stcrpy/tcr_geometry/TCRDock.py +4 -1
  5. stcrpy/tcr_geometry/reference_data/Acoreset.txt +30 -0
  6. stcrpy/tcr_geometry/reference_data/Bcoreset.txt +30 -0
  7. stcrpy/tcr_geometry/reference_data/consensus_A.pdb +31 -0
  8. stcrpy/tcr_geometry/reference_data/consensus_B.pdb +31 -0
  9. stcrpy/tcr_geometry/reference_data/consensus_D.pdb +31 -0
  10. stcrpy/tcr_geometry/reference_data/consensus_G.pdb +31 -0
  11. stcrpy/tcr_geometry/reference_data/pcA.txt +3 -0
  12. stcrpy/tcr_geometry/reference_data/pcB.txt +3 -0
  13. stcrpy/tcr_interactions/TCRInteractionProfiler.py +1 -1
  14. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +21 -0
  15. stcrpy/tcr_methods/tcr_batch_operations.py +14 -10
  16. stcrpy/tcr_methods/tcr_methods.py +23 -22
  17. stcrpy/tcr_metrics/tcr_dockq.py +404 -0
  18. stcrpy/tcr_processing/Chemical_components.py +4 -4
  19. stcrpy/tcr_processing/Entity.py +15 -16
  20. stcrpy/tcr_processing/MHC.py +456 -4
  21. stcrpy/tcr_processing/TCR.py +462 -14
  22. stcrpy/tcr_processing/TCRParser.py +364 -193
  23. stcrpy/tcr_processing/annotate.py +35 -24
  24. stcrpy/tcr_processing/utils/common.py +3 -2
  25. stcrpy/tcr_processing/utils/constants.py +4 -3
  26. stcrpy/tcr_processing/utils/region_definitions.py +9 -0
  27. stcrpy/tcr_processing/utils/symmetry_mates.py +90 -0
  28. stcrpy-1.0.5.dist-info/METADATA +285 -0
  29. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/RECORD +33 -22
  30. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/WHEEL +1 -1
  31. stcrpy-1.0.0.dist-info/METADATA +0 -173
  32. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/licenses/LICENCE +0 -0
  33. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +0 -0
  34. {stcrpy-1.0.0.dist-info → stcrpy-1.0.5.dist-info}/top_level.txt +0 -0
@@ -38,18 +38,25 @@ def call_anarci(
38
38
  ):
39
39
  """
40
40
  Use the ANARCI program to number the sequence.
41
- @param seq: An amino acid sequence that you wish to number.
42
- @type seq: C{str}
43
41
 
44
- @return: numbering, chain type
42
+ Args:
43
+ seq: An amino acid sequence that you wish to number.
44
+
45
+ Returns:
46
+ numbering, chain type, germline information
45
47
  """
46
- from anarci import number as anarci_number
48
+ try:
49
+ from anarci import number as anarci_number
50
+ except ImportError as e:
51
+ f"""ANARCI import failed, is ANARCI installed and built? \nInstall ANARCI MHC with: \npip install anarci-mhc \n
52
+ Once installed, build the HMMs with: \nANARCI --build_models. \nError raised was {e}"""
53
+ raise e
47
54
 
48
55
  numbering, chain_type, germline_info = anarci_number(
49
56
  seq, allow=allow, assign_germline=True
50
57
  )
51
58
 
52
- if numbering and "MR" not in chain_type and chain_type in allow:
59
+ if numbering and chain_type in allow:
53
60
  return [(_, aa) for _, aa in numbering if aa != "-"], chain_type, germline_info
54
61
  elif numbering and chain_type in ["BA", "GD", "AB", "DG"]:
55
62
  return (
@@ -85,15 +92,15 @@ def annotate(chain):
85
92
  )
86
93
  # aligned_numbering = cleanup_scTCR_numbering(aligned_numbering, sequence_list)
87
94
  scTCR = True
88
- elif chtype == "DC1" or chtype == "RM1":
89
- # Use the scTCR numbering trick; since CD1/MR1 numbering only spans up to residue ~87 and
90
- aligned_numbering = align_scTCR_numbering(
91
- numbering, sequence_list, sequence_str
92
- )
93
- aligned_numbering[0].update(aligned_numbering[1])
94
- aligned_numbering = aligned_numbering[0] # combine the numbering
95
- aligned_numbering = cleanup_scTCR_numbering(aligned_numbering, sequence_list)
96
- scTCR = False
95
+ # elif chtype == "DC1" or chtype == "RM1":
96
+ # # Use the scTCR numbering trick; since CD1/MR1 numbering only spans up to residue ~87 and
97
+ # aligned_numbering = align_scTCR_numbering(
98
+ # numbering, sequence_list, sequence_str
99
+ # )
100
+ # aligned_numbering[0].update(aligned_numbering[1])
101
+ # aligned_numbering = aligned_numbering[0] # combine the numbering
102
+ # aligned_numbering = cleanup_scTCR_numbering(aligned_numbering, sequence_list)
103
+ # scTCR = False
97
104
  else:
98
105
  # align the original residue id's to the numbering
99
106
  aligned_numbering = align_numbering(numbering, sequence_list)
@@ -108,16 +115,18 @@ def extract_sequence(
108
115
  ):
109
116
  """
110
117
  Get the amino acid sequence of the chain.
111
- @change: Residues containing HETATOMs are skipped --> Residues containing HETATOMs are checked as an amino acid.
118
+ Residues containing HETATOMs are skipped --> Residues containing HETATOMs are checked as an amino acid.
112
119
 
113
120
  Residues containing HETATOMs are checked to be amino acids and the single letter returned.
114
121
 
115
122
  This works provided the residues in the chain are in the correct order.
116
123
 
117
- @param selection: a selection object to select certain residues
118
- @param return_warnings: Flag to return a list of warnings or not
119
- @param backbone: Flag whether to only show residues with a complete backbone (in the structure) or not.
120
- @return: The sequence in a resid:aa tuple list and the sequence as a string.
124
+ Args:
125
+ selection: a selection object to select certain residues
126
+ return_warnings: Flag to return a list of warnings or not
127
+ backbone: Flag whether to only show residues with a complete backbone (in the structure) or not.
128
+ Returns:
129
+ The sequence in a resid:aa tuple list and the sequence as a string.
121
130
 
122
131
  """
123
132
  sequence_list = []
@@ -257,9 +266,10 @@ def align_numbering(numbering, sequence_list, alignment_dict={}):
257
266
  def align_scTCR_numbering(numbering, sequence_list, sequence_str):
258
267
  """
259
268
  Align the sequence that has been numbered to a scTCR structure.
260
- @param numbering: numbered list of residues; this is usually a two-element list/tuple from TCRDB.anarci.number
261
- @param sequence_list: list of residues (e.g. from a structure) in its original numbering
262
- @param sequence_str: string form of sequence_list
269
+ Args:
270
+ numbering: numbered list of residues; this is usually a two-element list/tuple from TCRDB.anarci.number
271
+ sequence_list: list of residues (e.g. from a structure) in its original numbering
272
+ sequence_str: string form of sequence_list
263
273
  """
264
274
  if numbering:
265
275
  numbered_sequence = ["".join([r[1] for r in n]) for n in numbering]
@@ -321,8 +331,9 @@ def cleanup_scTCR_numbering(numbering_dict, sequence_list):
321
331
  This is to close the gaps in the numbering so that residues that were unnumbered by anarci don't move around
322
332
  during structural parsing (when they're probably just connections between domains).
323
333
 
324
- @param numbering_dict: numbered dictionary from align_scTCR_numbering
325
- @param sequence_list : sequence list from the structure for alignment.
334
+ Args:
335
+ numbering_dict: numbered dictionary from align_scTCR_numbering
336
+ sequence_list : sequence list from the structure for alignment.
326
337
  """
327
338
  positions = [p[0] for p in sequence_list]
328
339
 
@@ -23,8 +23,9 @@ def identity(seq1, seq2, positions=[]):
23
23
  """
24
24
  Find the matched sequence identity between two aligned sequences.
25
25
  Can accept lists/strings, but this assumes that the two sequences are of the same length.
26
- @param seq1: Dictionary with key as the position and value as the single letter amino acid code. or an aligned list or string
27
- @param seq2: Dictionary with key as the position and value as the single letter amino acid code. or an aligned list or string
26
+ Args:
27
+ seq1: Dictionary with key as the position and value as the single letter amino acid code. or an aligned list or string
28
+ seq2: Dictionary with key as the position and value as the single letter amino acid code. or an aligned list or string
28
29
  """
29
30
  n = 0 # number
30
31
  m = 0 # match
@@ -12,9 +12,10 @@ import re
12
12
  def tuplefy(x):
13
13
  """
14
14
  Interpretation for converting numbering (in string) into a tuple.
15
-
16
- @param x: A string for the identifier of a numbered position. e.g "H100A".
17
- @return : A tuple of the chain tupe followed by a tuple of residue id and insertion code. eg. ( H, (100, "A") )
15
+ Args:
16
+ x: A string for the identifier of a numbered position. e.g "H100A".
17
+ Returns:
18
+ A tuple of the chain tupe followed by a tuple of residue id and insertion code. eg. ( H, (100, "A") )
18
19
 
19
20
  """
20
21
  chain, resi, ins = re.split(r"(\d+)", x)
@@ -10,6 +10,15 @@ IMGT_CDR_BOUNDARIES = {
10
10
  "3": {"imgt": (105, 117)},
11
11
  }
12
12
 
13
+ IMGT_VARIABLE_DOMAIN: set[int] = set(range(1, 128 + 1))
14
+ '''Variable domain range for IMGT numbered immunoglobulin structures.'''
15
+
16
+ IMGT_MH1_ABD: set[int] = set(range(1, 92)) | set(range(1001, 1092))
17
+ '''IMGT ranges of the antigen binding domain of MHC class I molecules.'''
18
+
19
+ IMGT_MH2_ABD: set[int] = set(range(1, 92))
20
+ '''IMGT ranges of the antigen binding domain of MHC class II molecules.'''
21
+
13
22
  # regions for TCR
14
23
  _regions = {"imgt": {}}
15
24
  _regions["imgt"]["A"] = _regions["imgt"]["B"] = (
@@ -0,0 +1,90 @@
1
+ import warnings
2
+ import tempfile
3
+ import os
4
+
5
+ from Bio.PDB import PDBParser, PDBIO
6
+ from ..TCRParser import TCRParser
7
+
8
+
9
+ def get_symmetry_mates(filename):
10
+ try:
11
+ from pymol import cmd
12
+ except ModuleNotFoundError:
13
+ warnings.warn(
14
+ "Pymol not installed - please install pymol to enabe symmetry mate TCR parsing. "
15
+ )
16
+ return []
17
+
18
+ cmd.load(filename)
19
+ obj_name = cmd.get_object_list()[0]
20
+ cmd.symexp(obj_name, obj_name, obj_name, cutoff=20.0, quiet=0)
21
+ if (
22
+ len(cmd.get_object_list()) == 1
23
+ ): # No symmetry mates found, likely becuase file did not contain symmetry information
24
+ cmd.delete(obj_name)
25
+ cmd.fetch(
26
+ filename.split("/")[-1].split(".")[0]
27
+ ) # this will try to retrieve the file from the pdb directly, will not work if the filename is not the pdb code
28
+ if len(cmd.get_object_list()) == 0:
29
+ warnings.warn(f"No symmetry mates found for {filename}.")
30
+ return
31
+ obj_name = cmd.get_object_list()[0]
32
+ cmd.symexp(obj_name, obj_name, obj_name, cutoff=10.0, quiet=0)
33
+ tcr_symmetry_mates = []
34
+
35
+ tcp = TCRParser()
36
+ pdp = PDBParser(QUIET=True)
37
+ pdbio = PDBIO()
38
+ with tempfile.TemporaryDirectory() as tmpdir:
39
+ for i, obj in enumerate(cmd.get_object_list()):
40
+ fn = os.path.join(tmpdir, f"{obj_name}_symmetry_mate_{i}.pdb")
41
+ cmd.save(fn, obj)
42
+ symmetry_mate = pdp.get_structure("tmp", fn)
43
+ if i == 0:
44
+ chain_ids = generate_chain_id_list(
45
+ len(list(symmetry_mate.get_chains()) * len(cmd.get_object_list()))
46
+ )
47
+ for c in symmetry_mate.get_chains():
48
+ chain_ids.remove(
49
+ c.id
50
+ ) # remove all chain ids of the original structure
51
+ if i > 0: # Skip the original structure
52
+ # rename chain ids, this cannot be done directly to TCR structure without breaking the TCR and MHC chain assignments.
53
+ for chain in reversed(list(symmetry_mate.get_chains())):
54
+ symmetry_mate[0].detach_child(chain.id)
55
+ new_id = chain_ids.pop(0)
56
+ chain.id = new_id
57
+ symmetry_mate[0].add(chain)
58
+ pdbio.set_structure(symmetry_mate)
59
+ pdbio.save(fn)
60
+
61
+ symmetry_mate = tcp.get_tcr_structure(
62
+ f"{obj_name}_symmetry_{i}", fn, include_symmetry_mates=False
63
+ )
64
+ tcr_symmetry_mates.append(symmetry_mate)
65
+
66
+ # clean up the pymol cmd space
67
+ for obj in cmd.get_object_list():
68
+ cmd.delete(obj)
69
+ del cmd
70
+ return tcr_symmetry_mates
71
+
72
+
73
+ def generate_chain_id_list(N):
74
+ """Generates a set of chain ids starting from A, B, C, ..., Z, AA, AB, ..., AZ, BA
75
+
76
+ Args:
77
+ N (int): The number of chain IDs to generate.
78
+
79
+ Returns:
80
+ set: A set of generated chain IDs.
81
+ """
82
+ chain_ids = []
83
+ for i in range(N):
84
+ ch1 = chr(65 + (i // 26) - 1) if i >= 26 else ""
85
+ ch2 = chr(65 + (i % 26))
86
+ chain_ids.append(ch1 + ch2)
87
+ import string
88
+
89
+ chain_ids = string.ascii_uppercase + string.ascii_lowercase + string.digits
90
+ return list(chain_ids)[:N]
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: stcrpy
3
+ Version: 1.0.5
4
+ Summary: Set of methods to parse, annotate, and calculate features of TCR structures
5
+ Maintainer-email: Nele Quast <quast@stats.ox.ac.uk>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENCE
9
+ License-File: stcrpy/tcr_geometry/TCRCoM_LICENCE
10
+ Requires-Dist: biopython
11
+ Requires-Dist: numpy==1.26.4
12
+ Requires-Dist: lxml
13
+ Requires-Dist: openbabel-wheel==3.1.1.21
14
+ Requires-Dist: rdkit
15
+ Requires-Dist: anarci-mhc
16
+ Requires-Dist: pandas
17
+ Requires-Dist: matplotlib
18
+ Requires-Dist: scipy
19
+ Requires-Dist: requests
20
+ Requires-Dist: scikit-learn
21
+ Requires-Dist: DockQ
22
+ Provides-Extra: ml-datasets
23
+ Requires-Dist: einops; extra == "ml-datasets"
24
+ Requires-Dist: torch; extra == "ml-datasets"
25
+ Requires-Dist: torch_geometric; extra == "ml-datasets"
26
+ Dynamic: license-file
27
+
28
+
29
+
30
+ <img src="./stcrpy_logo.png" alt="drawing" width="300"/>
31
+
32
+
33
+ # STCRpy
34
+ [![stcrpy installation](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml)
35
+ [![stcrpy unittests](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml)
36
+ [![stcrpy_docs](https://readthedocs.org/projects/stcrpy/badge/?version=latest)](https://stcrpy.readthedocs.io/en/latest/)
37
+
38
+
39
+ Structural TCR python (STCRpy) is a software suite for analysing and processing T-cell receptor structures.
40
+
41
+ Please feel free to reach out with any comments or feedback.
42
+
43
+ Under review, please cite:
44
+
45
+ **Quast, N. , Deane, C., & Raybould, M. (2025). STCRpy: a software suite for TCR:pMHC structure parsing, interaction profiling, and machine learning dataset preparation. BioRxiv. https://doi.org/10.1101/2025.04.25.650667**
46
+
47
+ <img src="./stcrpy_main_fig.png" alt="drawing" width="1500"/>
48
+
49
+
50
+
51
+ # Installation
52
+
53
+ ## TL;DR installation
54
+ ```
55
+ pip install stcrpy
56
+ pip install plip
57
+ conda install pymol-open-source -y
58
+ ANARCI --build_models # this step will take a few minutes
59
+ ```
60
+
61
+ ## Step by step installation
62
+ We recommend installing STCRpy in a [conda](https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation) (or [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)) environment using python 3.9 to 3.12. You can also use a python virtual environment if you do not need pymol visualisations.
63
+
64
+ <details> <summary>conda</summary>
65
+
66
+ ```
67
+ conda create -n stcrpy_env python==3.12 -y
68
+ conda activate stcrpy_env
69
+ ```
70
+
71
+ </details>
72
+ <details> <summary>mamba</summary>
73
+
74
+ ```
75
+ mamba create -n stcrpy_env python==3.12 -y
76
+ mamba activate stcrpy_env
77
+ ```
78
+
79
+ </details>
80
+ <details> <summary>venv</summary>
81
+
82
+ ```
83
+ python -m venv stcrpy_env
84
+ source stcrpy_env/bin/activate
85
+ ```
86
+
87
+ </details>
88
+
89
+
90
+ The core functionality of STCRpy can be installed as follows:
91
+ ```
92
+ pip install stcrpy
93
+ ```
94
+
95
+ After installing stcrpy, the anarci HMM models must be built to enable annotation.
96
+ ```
97
+ ANARCI --build_models # this step will take a few minutes
98
+ ```
99
+
100
+ To enable interaction profiling, install PLIP (Adasme et. al., 2021):
101
+ ```
102
+ pip install plip
103
+ ```
104
+
105
+ To enable pymol visualisations, install pymol open source locally within the environment. Unfortunately, pymol currently needs to be installed even if you already have a pymol version. Be sure to install pymol within a managed conda (or mamba) environment to prevent interference with any existing versions.
106
+ ```
107
+ conda install pymol-open-source -y
108
+ ```
109
+
110
+ To generate pytorch and pytorch-geometric compatible datasets (see the [pytorch docs](https://pytorch.org/get-started/locally/) for hardware specific instructions):
111
+ ```
112
+ pip install stcrpy[ml_datasets]
113
+ ```
114
+
115
+ > Note that the installs for pytorch can be platform specific.
116
+ > If errors are ecountered here it is best to manually install the depedencies following the [pytorch installation docs](https://pytorch.org/get-started/locally/).
117
+ > For example:
118
+ > ```
119
+ > pip install torch --index-url https://download.pytorch.org/whl/cpu
120
+ > pip install torch_geometric
121
+ > ```
122
+ > This installs the CPU version of pytorch (for GPU / CUDA versions follow the install [pytorch installation docs](https://pytorch.org/get-started/locally/)).
123
+ >
124
+ > The EGNN example also uses `einops`. Which can be manually installed as follows:
125
+ > ```
126
+ > pip install einops
127
+ > ```
128
+
129
+ # Documentation
130
+ STCRpy [documentation](https://stcrpy.readthedocs.io/en/latest/) is hosted on ReadtheDocs.
131
+
132
+ # Examples
133
+ STCRpy generates and operates on TCR structure objects. The majority of the API can be accessed through functions of the format: `tcr.some_stcrpy_function()`. ([See TCR object docs here](https://stcrpy.readthedocs.io/en/latest/stcrpy.tcr_processing.html#stcrpy.tcr_processing.TCR.TCR)). TCR objects are associated with their MHC and antigen if these are presented in the structure.
134
+
135
+ A notebook with examples can be found under [examples/STCRpy_examples.ipynb](./examples/STCRpy_examples.ipynb)
136
+
137
+ First import STCRpy:
138
+ ```
139
+ import stcrpy
140
+ ```
141
+
142
+ ### To fetch a TCR structure from STCRDab or the PDB:
143
+ ```
144
+ multiple_tcrs = stcrpy.fetch_TCRs("8gvb")
145
+ ```
146
+ This will return a list of all of the TCR structures found in the PDB file, represented as TCR structure objects.
147
+
148
+ ### To load a TCR structure from a PDB or MMCIF file:
149
+ ```
150
+ tcr = stcrpy.load_TCR("filename.{pdb, cif}")
151
+ ```
152
+
153
+ ### To load multiple TCR structures from a list of files at once:
154
+ ```
155
+ multiple_tcrs = stcrpy.load_TCRs([file_1, file_2, file_3])
156
+ ```
157
+
158
+ ### To save a TCR object to PDB or MMCIF files:
159
+ ```
160
+ tcr.save(filename.{pdb, cif}) # save the TCR and it's associated MHC and antigen
161
+ tcr.save(filename.{pdb, cif}, TCR_only=True) # save the TCR only
162
+ ```
163
+
164
+ ### To calculate the TCR to pMHC geometry:
165
+ ```
166
+ tcr.calculate_geometry() # change the 'mode' keyword argument to change the geometry calculation method. See paper / documentation for details.
167
+ ```
168
+
169
+ ### To score the TCR to pMHC geometry:
170
+ ```
171
+ tcr.score_docking_geometry()
172
+ ```
173
+
174
+ ### To profile interactions:
175
+ ```
176
+ tcr.profile_peptide_interactions() # interaction profiling parameters can be adjusted, see documentation for details
177
+ ```
178
+
179
+ ### To visualise interactions:
180
+ ```
181
+ tcr.visualise_interactions()
182
+ ```
183
+
184
+ ### To run full analysis on a set of TCR structures:
185
+ ```
186
+ from stcrpy.tcr_methods.tcr_batch_operations import analyse_tcrs
187
+ germlines_and_alleles_df, geometries_df, interactions_df = analyse_tcrs(list_or_dict_of_files)
188
+ ```
189
+
190
+ ### To generate graph datasets:
191
+ ```
192
+ dataset = TCRGraphDataset(
193
+ root=PATH_TO_DATASET,
194
+ data_paths=PATH_TO_TCR_FILES
195
+ )
196
+ ```
197
+
198
+ ### To calculate TCR prediction metrics such as RMSD, interface RMSD (of the TCR:pMHC interface) or DockQ scores:
199
+
200
+ ```
201
+ # RMSD
202
+ from stcrpy.tcr_metrics import RMSD
203
+
204
+ rmsd_calculator = RMSD()
205
+ rmsd = rmsd_calculator.calculate_rmsd(pred_tcr, reference_tcr, save_alignment=False) # Calculates the RMSD of each region of the TCR. To check the alignment set save_alignment to True.
206
+
207
+ # To calculate RMSD for a set of predictions against a set of reference structures from files:
208
+ files = list(zip(prediction_files, reference_files))
209
+ rmsd_df = rmsd_calculator.rmsd_from_files(files)
210
+
211
+
212
+
213
+ # Interface RMSD of TCR:pMHC interface
214
+ from stcrpy.tcr_metrics import InterfaceRMSD
215
+
216
+ interface_rmsd_calculator = InterfaceRMSD()
217
+ irmsds = interface_rmsd_calculator.get_interface_rmsd(tcr, reference_tcr)
218
+
219
+ # DockQ
220
+ from stcrpy.tcr_metrics.tcr_dockq import TCRDockQ
221
+
222
+ dockq_calculator = TCRDockQ() # by default this will merge the TCR and pMHC chains and calculate DockQ of the complete TCR:pMHC interface. To calculate DockQ scores per chain, use TCR_pMHC_interface=False
223
+ dockq_results = dockq_calculator.tcr_dockq(tcr, reference_tcr, save_merged_complex=False) # to investigate the merged TCR:pMHC structure set save_merged_complex=True
224
+
225
+ ```
226
+
227
+ ### Torsion angles and internal coordinates
228
+ STCRpy builds upon the Biopython PDB module, and you can calculate the internal coordinates, such as backbone torsion angles, using the [`internal_coordinates` function](https://biopython.org/docs/dev/api/Bio.PDB.internal_coords.html).
229
+
230
+ ```
231
+ # internal coordinate calculations should be made per chain
232
+ for c in tcr.get_chains():
233
+ c.atom_to_internal_coordinates() # calculate the internal coordinates
234
+
235
+ # internal coordinates can be accessed per residue:
236
+ res = next(tcr.get_residues())
237
+ res.internal_coord.get_angle("psi") # retrieve angles via angle keys
238
+ ```
239
+
240
+ ### Domain angles between TCR chains
241
+ STCRpy can be used to calculate the geometry and angles between the TCR variable domains of abTCRs and gdTCRs. This follows the ABangle implementation [(Dunbar et al. 2013)](https://academic.oup.com/peds/article/26/10/611/1509255).
242
+ ```
243
+ tcr.get_TCR_angles()
244
+
245
+ # returns dictionary of TCR domain angles and measurements.
246
+ # For example:
247
+ # {
248
+ # 'BA': -56.72234454750631,
249
+ # 'BC1': 122.55277240895967,
250
+ # 'AC1': 73.96532018128327,
251
+ # 'BC2': 82.63524566165464,
252
+ # 'AC2': 99.60327202896609,
253
+ # 'dc': np.float64(15.606353954437227)
254
+ # }
255
+
256
+ ```
257
+
258
+
259
+ # Symmetry mate handling
260
+ Some TCR:pMHC crystals are formed of repeating cell units in which the TCR and the antigen do not directly contact.
261
+ STCRpy generates symmetry mates in these cases to pair pMHC with TCRs in the structure.
262
+ Note that symmetry mate generation requires pymol to be installed. By default, symmetry mate generation is enabled, however, it can be toggled by setting:
263
+ `include_symmetry_mates=False` in `get_tcr_structure`.
264
+
265
+ ## Example:
266
+ ```
267
+ tcr_6ulr_paired_antigen = stcrpy.fetch_TCRs("6ulr")
268
+ tcr_6ulr_no_antigen = stcrpy.fetch_TCRs("6ulr", include_symmetry_mates=False) # does not generate symmetry mates
269
+
270
+ ```
271
+
272
+
273
+
274
+ # Limitations
275
+
276
+ ## Connected peptide chains
277
+ STCRpy is currently not configured to handle cases where the antigen peptide is connected to the TCR or MHC chain - this is primarily because the parsing pipeline operates on chain objects and it can be tricky to consistently separate the peptide segment from the remainder of the TCR chain. A known case is PDB code 6MNO.
278
+
279
+ ## Gamma-Delta TCR geometry
280
+ STCRpy supports gamma-delta TCR parsing, interaction profiling and visusalisation, but is not currently configured to calculate gd-TCR geometry.
281
+
282
+ ## MHC Class II geometry scoring
283
+ STCRpy can be used to calculate and characterise the geometries of TCRs to MHC class II antigen, however, due to the smaller number of complexes we have not fit parametric distributions to the geometry features, which means it is not possible to calculate a geometry score.
284
+
285
+
@@ -1,68 +1,79 @@
1
1
  examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  examples/egnn.py,sha256=ab9OMeJ--a1XkTa5nF7y8-B0XNpS5LCuK1x5lEnyz3o,13812
3
- stcrpy/__init__.py,sha256=D5Ela4G19FPIFzO5CpyzJMnrvvoTNQUwz1s-qb0waDo,252
3
+ stcrpy/__init__.py,sha256=ULJpFjOVfuUjI5F1DuWlMxNI5t6Fsrvf8IVCqVJ9OxI,253
4
4
  stcrpy/tcr_datasets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  stcrpy/tcr_datasets/tcr_graph_dataset.py,sha256=q8Di7sxgO-Rtzn7eVi_UMx4Sk9A31nvIxO7n_Nie-d0,19614
6
6
  stcrpy/tcr_datasets/tcr_selector.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  stcrpy/tcr_datasets/tcr_structure_dataset.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  stcrpy/tcr_datasets/utils.py,sha256=t_ogJu4hoI-ywr5-MnepuB-3L5-YXFoKQzyaONsDMFM,4888
9
9
  stcrpy/tcr_formats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- stcrpy/tcr_formats/tcr_formats.py,sha256=ZLouBdpY1K8xOA7tORkfljE9iUsbn_oay4DRHvP5Wbc,3812
10
+ stcrpy/tcr_formats/tcr_formats.py,sha256=RG6P7oDZSzsTDobngv0K9qHhJM7-tmSjwQSDP3CpN3A,4342
11
11
  stcrpy/tcr_formats/tcr_haddock.py,sha256=ejGs1DsOMKE-CDK5k9M4J-9y-7bLq_BbNLAkBNhpjjQ,21913
12
+ stcrpy/tcr_geometry/TCRAngle.py,sha256=1tIoJ2RIxQPH2_ne9w-UlUKGsBlfsc2oarJVIkt9VHQ,6330
12
13
  stcrpy/tcr_geometry/TCRCoM.py,sha256=Mtq0ieQUTj2R_EN9BUFdUtSbT9AtI5OPi1TEdnMlxME,12883
13
14
  stcrpy/tcr_geometry/TCRCoM_LICENCE,sha256=93k_qqF0rgpyWEmxpcl2sbZS3CK1dkGrIuvJtKsBlCA,7844
14
- stcrpy/tcr_geometry/TCRDock.py,sha256=CEwcDbekOy5KvKw-_0DMMU4CwfK01km_NHg2lZaLquI,9911
15
+ stcrpy/tcr_geometry/TCRDock.py,sha256=Uga1OV5owC-lVfzzKFn5dGEGnaeHJUHMpppdUxnK84k,9975
15
16
  stcrpy/tcr_geometry/TCRGeom.py,sha256=niol8kNMUufFkjxhOkFhBhonNIKpQwhl6V8qbVa7tLg,19041
16
17
  stcrpy/tcr_geometry/TCRGeomFiltering.py,sha256=JN02yyxeXy6XRH3oSKTskcUkxoqvxsvSRGpR-4H25kA,9666
17
18
  stcrpy/tcr_geometry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ stcrpy/tcr_geometry/reference_data/Acoreset.txt,sha256=VDcDb_Uf5D_wNEwVtRZM9kcehodwIhl5fQ9Jfh0ncw0,125
20
+ stcrpy/tcr_geometry/reference_data/Bcoreset.txt,sha256=zZw-eZIflcAy9k8z1t869g75lU4ZRj75VtH5ztLA50k,121
18
21
  stcrpy/tcr_geometry/reference_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ stcrpy/tcr_geometry/reference_data/consensus_A.pdb,sha256=sA75dr5KcpND-pvQ5jFEkmWTUbJTwkOoSWLEMWp4nNo,2433
23
+ stcrpy/tcr_geometry/reference_data/consensus_B.pdb,sha256=gXBhVfKKLbNRP3gyq7K7ofLMgEXKWeVdYJQI0Ws_NKg,2434
24
+ stcrpy/tcr_geometry/reference_data/consensus_D.pdb,sha256=O9LHeh3bWxeEWwuTcp1HwzxzEfPfGOSAUHe4nMI-Okc,2434
25
+ stcrpy/tcr_geometry/reference_data/consensus_G.pdb,sha256=Fs8e_UDb6dI1qqhNSttWaHrzkjgzjFpMpC38_W9OXTg,2434
19
26
  stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb,sha256=wjzvP6_fhMoUp4b1Q18_-JXHpl8kKbqjFvLiglW2Yek,530400
20
27
  stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb,sha256=sXUJ7CKGnA9FevyocJj30VA2EqgYM0s0ejRgBP6R9Gk,526026
28
+ stcrpy/tcr_geometry/reference_data/pcA.txt,sha256=SNqBtyHeeL9dSJHzMUhjfO9zhH61Qn8fCORpsPnkH8w,99
29
+ stcrpy/tcr_geometry/reference_data/pcB.txt,sha256=IEHoYprUz56JnV3sNOMApNKmapnk18WtelCGwok5YgU,100
21
30
  stcrpy/tcr_geometry/reference_data/reference_A.pdb,sha256=sA75dr5KcpND-pvQ5jFEkmWTUbJTwkOoSWLEMWp4nNo,2433
22
31
  stcrpy/tcr_geometry/reference_data/reference_B.pdb,sha256=gXBhVfKKLbNRP3gyq7K7ofLMgEXKWeVdYJQI0Ws_NKg,2434
23
32
  stcrpy/tcr_geometry/reference_data/reference_D.pdb,sha256=O9LHeh3bWxeEWwuTcp1HwzxzEfPfGOSAUHe4nMI-Okc,2434
24
33
  stcrpy/tcr_geometry/reference_data/reference_G.pdb,sha256=Fs8e_UDb6dI1qqhNSttWaHrzkjgzjFpMpC38_W9OXTg,2434
25
34
  stcrpy/tcr_geometry/reference_data/reference_data.py,sha256=q3L-cQ1UQUfZxeIMKIiiase_6SEEuWlx7UsLndwNLAk,1658
26
35
  stcrpy/tcr_interactions/PLIPParser.py,sha256=zetY_LvH_8E-26xXc02c7_edoHWEwIWB9_XIu1szRcA,5785
27
- stcrpy/tcr_interactions/TCRInteractionProfiler.py,sha256=Q4OBQ3DooM592Sk5DnjeAJAAohVOkrbp8MAXNRTLnVg,17835
28
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py,sha256=wRY0gOoWMb5hDqPn6vKn5oLXCkMvoFgwuKSqYkB-MM8,4688
36
+ stcrpy/tcr_interactions/TCRInteractionProfiler.py,sha256=56NeZZVIAjEqbbsM_qepjJQjoaR-RFt5OhhtcebiunI,17772
37
+ stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py,sha256=-fm_rtPJhU-h3WuX1U5Cv3vjO0eAlzwbm1HTreQIDCI,5464
29
38
  stcrpy/tcr_interactions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
39
  stcrpy/tcr_interactions/utils.py,sha256=KEGybd1ugZvFdj4Gqn9Xo4lnJbU1ivADmEDjkvuaNiw,5177
31
40
  stcrpy/tcr_methods/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- stcrpy/tcr_methods/tcr_batch_operations.py,sha256=YtRgui5H8MyR0zyGDGk1pwaydhQBYbepkRB1XspzpuQ,8363
33
- stcrpy/tcr_methods/tcr_methods.py,sha256=ghWO-F4sOCv1BOxMLC-nSbGxpY4B6L88v-IF13cV2FM,5553
41
+ stcrpy/tcr_methods/tcr_batch_operations.py,sha256=-nh4EqgEYE8rx5-1iiVBJucc-r0nGizqj1S9S9LlNb0,8539
42
+ stcrpy/tcr_methods/tcr_methods.py,sha256=k1DoZYVSnNLkikpP5bFAZaSa4DS41vwiHtXVIz_9vCQ,5569
34
43
  stcrpy/tcr_methods/tcr_reformatting.py,sha256=lgWyYOrk30fCY0IoDBI6IuIqeXdH0ukKX_x9LAdLVfk,625
35
44
  stcrpy/tcr_metrics/__init__.py,sha256=vy80DujN4kMr_rlFPWtLY48mIbsXK_SfTwzzgfQIY_E,73
36
45
  stcrpy/tcr_metrics/constants.py,sha256=uocAA2RM1JSE6n0gq1RDnZIcd3JROi9wDvJ-J4yc_IY,406
46
+ stcrpy/tcr_metrics/tcr_dockq.py,sha256=G4Px1lT5OjSirdIgMQ0X7G2Nj_1DJCD0Dkjclv0QMlQ,17320
37
47
  stcrpy/tcr_metrics/tcr_interface_rmsd.py,sha256=Gsv6XDO6r4N9dCo--CyOkneni1gVf2X1Ws2y6vXuj2M,9605
38
48
  stcrpy/tcr_metrics/tcr_rmsd.py,sha256=cbK20z1ELjeEecPxZF3vQ4uDlorrAAb5C8D8YtTpwz4,6811
39
49
  stcrpy/tcr_ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
50
  stcrpy/tcr_ml/geometry_predictor.py,sha256=D0w_Rx2PIwPQORyglvlugwI4wGg4xZyXUxRtIOEYyK0,38
41
51
  stcrpy/tcr_processing/AGchain.py,sha256=iVpiNMXOz3tnBIK0CYhq1EUdObMckGO6vkilwWQhjmU,2455
42
- stcrpy/tcr_processing/Chemical_components.py,sha256=8xcUg9HBya2_--hdAtMonECZeyl684wTvCUkrmUdPcE,2020304
43
- stcrpy/tcr_processing/Entity.py,sha256=5WZN5vcEFQfFxZCXcqr9exrA5vOlrH0I82LRNgPe3KY,11198
52
+ stcrpy/tcr_processing/Chemical_components.py,sha256=KuJiV-SnATz1cWoZnZmamNaivOhPhrsIdjKK-VCtKgE,2020298
53
+ stcrpy/tcr_processing/Entity.py,sha256=kY8oLVnmWOMMzj4Gj7ouE-IaxLHrlchX1Y2EAu8Sstw,11180
44
54
  stcrpy/tcr_processing/Fragment.py,sha256=QW_6sPWi2HX7mq5dLLwvA-7Q9oiBHCS0p02WgFLDisA,1908
45
55
  stcrpy/tcr_processing/Holder.py,sha256=y_q2NF0YiCf9CuXWlIm9-EWpD5CJj95o2wUCYDyEHOA,549
46
- stcrpy/tcr_processing/MHC.py,sha256=OfOb9aMKqcP9jM0TRFZo_ZQDB1idqCaJhn9rlBaAKA4,11736
56
+ stcrpy/tcr_processing/MHC.py,sha256=bKNs_dOiTUpqBZnTDlvGjWHH8TaVQrRo-ieYYPULXFY,26299
47
57
  stcrpy/tcr_processing/MHCchain.py,sha256=rT0FCkKD2pbZm-VTvcLwCdS5UOy-ibxnNcsgREIxgSs,4357
48
58
  stcrpy/tcr_processing/Model.py,sha256=K8wTrSCECHyqJX9811wU9MxgdGDVyCXraQ4-rOOa08U,1230
49
59
  stcrpy/tcr_processing/Select.py,sha256=VuVSgmqHBPD1DGRumM44HDzcQ-aQOwg_6zQHM3Ulm0U,3543
50
- stcrpy/tcr_processing/TCR.py,sha256=JR_CQovd7syVLd4k9tPYaFiwXpjP5dl51_-4hI2Phno,17206
60
+ stcrpy/tcr_processing/TCR.py,sha256=dcMdw2WEGa-BUZUqAAe0Dw4U2Qyh1xYUaS4N7E7J2AQ,33220
51
61
  stcrpy/tcr_processing/TCRIO.py,sha256=tSJLVN6MG6gyT26PreVJOz5DqD1e-VsBjBpu3mBBPKg,1501
52
- stcrpy/tcr_processing/TCRParser.py,sha256=D97-MTdbasEdtMR-mT6Up5tY2LK1LRgALVRrTearJJs,51162
62
+ stcrpy/tcr_processing/TCRParser.py,sha256=Raz2W9tygpK3knzjIVCfqUyCGSFA7yhh4UBT5wuWLM4,56134
53
63
  stcrpy/tcr_processing/TCRStructure.py,sha256=yFiUeo02KXIBBEljy0mebv5ol5uCQRTy-Sfs94Rt6Hc,3810
54
64
  stcrpy/tcr_processing/TCRchain.py,sha256=2CdP_JX5LG7nM8Lsuhcevf7SyRaQUZlizt3ntEYVPjg,4769
55
65
  stcrpy/tcr_processing/__init__.py,sha256=BefeJ0jefteeOzWrkOMvxLCvzd0aJDhXWrdTm7zlP94,94
56
- stcrpy/tcr_processing/annotate.py,sha256=aa3Bu_5UXC8H3C7jibBk0YapynhwPAP-Dx69cmGXRw8,17163
66
+ stcrpy/tcr_processing/annotate.py,sha256=MGk4UzLGtGL_J_ySPYoUTszOfzQjsWCpeeaXrza7Bfo,17438
57
67
  stcrpy/tcr_processing/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- stcrpy/tcr_processing/utils/common.py,sha256=SmIbvf9-cH_T4ENBRNdAmhZ4buzRHDN-QY0csVB3pCo,1865
59
- stcrpy/tcr_processing/utils/constants.py,sha256=k16xeH8gMBdgdXy6h829kJ2qIHx2-sDnwv66zlf1aIA,8138
60
- stcrpy/tcr_processing/utils/region_definitions.py,sha256=pWE3xJAhnGtLHWxvJOHaU5Y5spcHuZwPzcIDCsH920I,18806
68
+ stcrpy/tcr_processing/utils/common.py,sha256=5e-dP6jPlaFZEErusakdQYpyj79E3WvEGF5-sG0LitI,1869
69
+ stcrpy/tcr_processing/utils/constants.py,sha256=AHReE1QkeqTnhjvQI7nrPqLF4XdtzHV9C1w1uHTiVcM,8151
70
+ stcrpy/tcr_processing/utils/region_definitions.py,sha256=t3HT6FHFlGqS_DdV2xWLdTxhIUEjWHsf_0_f-06w94E,19198
71
+ stcrpy/tcr_processing/utils/symmetry_mates.py,sha256=9U29IHs63llBzV9xBNCmv4BKTIachwO7T3orHywDn48,3262
61
72
  stcrpy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
73
  stcrpy/utils/error_stream.py,sha256=BLHJnf-tWU41MXh6sHSZgkTFOMiWZsCgwM2qIKBnwr4,247
63
- stcrpy-1.0.0.dist-info/licenses/LICENCE,sha256=G1FnVDsfeYoveKTu9Xaqukcm-4xZ4mzakjLpFMnNfJ0,1507
64
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE,sha256=93k_qqF0rgpyWEmxpcl2sbZS3CK1dkGrIuvJtKsBlCA,7844
65
- stcrpy-1.0.0.dist-info/METADATA,sha256=CEw0KlbcZz8peJ1s8nIrxet6yllG1XU0ZLx3cD0DD-Y,5785
66
- stcrpy-1.0.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
67
- stcrpy-1.0.0.dist-info/top_level.txt,sha256=kAkgyHyGW-_YswJpcuA3pSqzuQVbUggFUmZg2OTx_B8,16
68
- stcrpy-1.0.0.dist-info/RECORD,,
74
+ stcrpy-1.0.5.dist-info/licenses/LICENCE,sha256=G1FnVDsfeYoveKTu9Xaqukcm-4xZ4mzakjLpFMnNfJ0,1507
75
+ stcrpy-1.0.5.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE,sha256=93k_qqF0rgpyWEmxpcl2sbZS3CK1dkGrIuvJtKsBlCA,7844
76
+ stcrpy-1.0.5.dist-info/METADATA,sha256=6rbC0l_TnXd9UMykkNKAlQCsy4VuBBud4DPQEXh-FtY,10512
77
+ stcrpy-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
78
+ stcrpy-1.0.5.dist-info/top_level.txt,sha256=kAkgyHyGW-_YswJpcuA3pSqzuQVbUggFUmZg2OTx_B8,16
79
+ stcrpy-1.0.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5