stcrpy 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. stcrpy-1.0.0/LICENCE +28 -0
  2. stcrpy-1.0.0/PKG-INFO +173 -0
  3. stcrpy-1.0.0/README.md +145 -0
  4. stcrpy-1.0.0/examples/__init__.py +0 -0
  5. stcrpy-1.0.0/examples/egnn.py +425 -0
  6. stcrpy-1.0.0/setup.cfg +4 -0
  7. stcrpy-1.0.0/setup.py +31 -0
  8. stcrpy-1.0.0/stcrpy/__init__.py +5 -0
  9. stcrpy-1.0.0/stcrpy/tcr_datasets/__init__.py +0 -0
  10. stcrpy-1.0.0/stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  11. stcrpy-1.0.0/stcrpy/tcr_datasets/tcr_selector.py +0 -0
  12. stcrpy-1.0.0/stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  13. stcrpy-1.0.0/stcrpy/tcr_datasets/utils.py +350 -0
  14. stcrpy-1.0.0/stcrpy/tcr_formats/__init__.py +0 -0
  15. stcrpy-1.0.0/stcrpy/tcr_formats/tcr_formats.py +114 -0
  16. stcrpy-1.0.0/stcrpy/tcr_formats/tcr_haddock.py +556 -0
  17. stcrpy-1.0.0/stcrpy/tcr_geometry/TCRCoM.py +350 -0
  18. stcrpy-1.0.0/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  19. stcrpy-1.0.0/stcrpy/tcr_geometry/TCRDock.py +261 -0
  20. stcrpy-1.0.0/stcrpy/tcr_geometry/TCRGeom.py +450 -0
  21. stcrpy-1.0.0/stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  22. stcrpy-1.0.0/stcrpy/tcr_geometry/__init__.py +0 -0
  23. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  24. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  25. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  26. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  27. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  28. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  29. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  30. stcrpy-1.0.0/stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  31. stcrpy-1.0.0/stcrpy/tcr_interactions/PLIPParser.py +147 -0
  32. stcrpy-1.0.0/stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  33. stcrpy-1.0.0/stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  34. stcrpy-1.0.0/stcrpy/tcr_interactions/__init__.py +0 -0
  35. stcrpy-1.0.0/stcrpy/tcr_interactions/utils.py +170 -0
  36. stcrpy-1.0.0/stcrpy/tcr_methods/__init__.py +0 -0
  37. stcrpy-1.0.0/stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  38. stcrpy-1.0.0/stcrpy/tcr_methods/tcr_methods.py +150 -0
  39. stcrpy-1.0.0/stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  40. stcrpy-1.0.0/stcrpy/tcr_metrics/__init__.py +2 -0
  41. stcrpy-1.0.0/stcrpy/tcr_metrics/constants.py +39 -0
  42. stcrpy-1.0.0/stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  43. stcrpy-1.0.0/stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  44. stcrpy-1.0.0/stcrpy/tcr_ml/__init__.py +0 -0
  45. stcrpy-1.0.0/stcrpy/tcr_ml/geometry_predictor.py +3 -0
  46. stcrpy-1.0.0/stcrpy/tcr_processing/AGchain.py +89 -0
  47. stcrpy-1.0.0/stcrpy/tcr_processing/Chemical_components.py +48915 -0
  48. stcrpy-1.0.0/stcrpy/tcr_processing/Entity.py +301 -0
  49. stcrpy-1.0.0/stcrpy/tcr_processing/Fragment.py +58 -0
  50. stcrpy-1.0.0/stcrpy/tcr_processing/Holder.py +24 -0
  51. stcrpy-1.0.0/stcrpy/tcr_processing/MHC.py +449 -0
  52. stcrpy-1.0.0/stcrpy/tcr_processing/MHCchain.py +149 -0
  53. stcrpy-1.0.0/stcrpy/tcr_processing/Model.py +37 -0
  54. stcrpy-1.0.0/stcrpy/tcr_processing/Select.py +145 -0
  55. stcrpy-1.0.0/stcrpy/tcr_processing/TCR.py +532 -0
  56. stcrpy-1.0.0/stcrpy/tcr_processing/TCRIO.py +47 -0
  57. stcrpy-1.0.0/stcrpy/tcr_processing/TCRParser.py +1230 -0
  58. stcrpy-1.0.0/stcrpy/tcr_processing/TCRStructure.py +148 -0
  59. stcrpy-1.0.0/stcrpy/tcr_processing/TCRchain.py +160 -0
  60. stcrpy-1.0.0/stcrpy/tcr_processing/__init__.py +3 -0
  61. stcrpy-1.0.0/stcrpy/tcr_processing/annotate.py +480 -0
  62. stcrpy-1.0.0/stcrpy/tcr_processing/utils/__init__.py +0 -0
  63. stcrpy-1.0.0/stcrpy/tcr_processing/utils/common.py +67 -0
  64. stcrpy-1.0.0/stcrpy/tcr_processing/utils/constants.py +367 -0
  65. stcrpy-1.0.0/stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  66. stcrpy-1.0.0/stcrpy/utils/__init__.py +0 -0
  67. stcrpy-1.0.0/stcrpy/utils/error_stream.py +12 -0
  68. stcrpy-1.0.0/stcrpy.egg-info/PKG-INFO +173 -0
  69. stcrpy-1.0.0/stcrpy.egg-info/SOURCES.txt +82 -0
  70. stcrpy-1.0.0/stcrpy.egg-info/dependency_links.txt +1 -0
  71. stcrpy-1.0.0/stcrpy.egg-info/requires.txt +11 -0
  72. stcrpy-1.0.0/stcrpy.egg-info/top_level.txt +2 -0
  73. stcrpy-1.0.0/test/test_annotations.py +17 -0
  74. stcrpy-1.0.0/test/test_tcr_datasets.py +34 -0
  75. stcrpy-1.0.0/test/test_tcr_formats.py +77 -0
  76. stcrpy-1.0.0/test/test_tcr_geometry.py +259 -0
  77. stcrpy-1.0.0/test/test_tcr_geometry_filters.py +249 -0
  78. stcrpy-1.0.0/test/test_tcr_interactions.py +365 -0
  79. stcrpy-1.0.0/test/test_tcr_methods.py +18 -0
  80. stcrpy-1.0.0/test/test_tcr_metrics.py +111 -0
  81. stcrpy-1.0.0/test/test_tcr_processing.py +172 -0
  82. stcrpy-1.0.0/test/test_tcr_sequence_operations.py +16 -0
stcrpy-1.0.0/LICENCE ADDED
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, University of Oxford
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stcrpy-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: stcrpy
3
+ Version: 1.0.0
4
+ Summary: Set of methods to parse, annotate, and calculate features of TCR structures
5
+ Maintainer: Nele Quast
6
+ Maintainer-email: quast@stats.ox.ac.uk
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENCE
9
+ License-File: stcrpy/tcr_geometry/TCRCoM_LICENCE
10
+ Requires-Dist: biopython
11
+ Requires-Dist: numpy==1.26.4
12
+ Requires-Dist: lxml
13
+ Requires-Dist: openbabel-wheel==3.1.1.21
14
+ Requires-Dist: rdkit
15
+ Requires-Dist: anarci-mhc
16
+ Requires-Dist: pandas
17
+ Requires-Dist: matplotlib
18
+ Requires-Dist: scipy
19
+ Requires-Dist: requests
20
+ Requires-Dist: scikit-learn
21
+ Dynamic: description
22
+ Dynamic: description-content-type
23
+ Dynamic: license-file
24
+ Dynamic: maintainer
25
+ Dynamic: maintainer-email
26
+ Dynamic: requires-dist
27
+ Dynamic: summary
28
+
29
+
30
+
31
+ <img src="./stcrpy_logo.png" alt="drawing" width="300"/>
32
+
33
+
34
+ # STCRpy
35
+ [![stcrpy installation](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml)
36
+ [![stcrpy unittests](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml)
37
+ [![stcrpy_docs](https://readthedocs.org/projects/stcrpy/badge/?version=latest)](https://stcrpy.readthedocs.io/en/latest/)
38
+
39
+
40
+ Structural TCR python (STCRpy) is a software suite for analysing and processing T-cell receptor structures.
41
+
42
+ Please feel free to reach out with any comments or feedback.
43
+
44
+ Under review, please cite:
45
+
46
+ **Quast, N. , Deane, C., & Raybould, M. (2025). STCRpy: a software suite for TCR:pMHC structure parsing, interaction profiling, and machine learning dataset preparation. BioRxiv. https://doi.org/10.1101/2025.04.25.650667**
47
+
48
+ <img src="./stcrpy_main_fig.png" alt="drawing" width="1500"/>
49
+
50
+
51
+
52
+ # Installation
53
+
54
+ ## TL;DR installation
55
+ ```
56
+ pip install stcrpy
57
+ pip install plip
58
+ conda install -c conda-forge pymol-open-source numpy -y
59
+ ANARCI --build_models # this step will take a few minutes
60
+ ```
61
+
62
+ ## Step by step installation
63
+ We recommend installing STCRpy in a [conda](https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation) (or [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)) environment using python 3.9 to 3.12:
64
+ ```
65
+ conda create -n stcrpy_env python==3.12 -y
66
+ conda activate stcrpy_env
67
+ ```
68
+
69
+ The core functionality of STCRpy can be installed as follows:
70
+ ```
71
+ pip install stcrpy
72
+ ```
73
+
74
+ After installing stcrpy, the anarci HMM models must be built to enable annotation.
75
+ ```
76
+ ANARCI --build_models # this step will take a few minutes
77
+ ```
78
+
79
+ To enable interaction profiling, install PLIP (Adasme et. al., 2021):
80
+ ```
81
+ pip install plip
82
+ ```
83
+
84
+ To enable pymol visualisations, install pymol open source locally within the environment. Unfortunately, pymol currently needs to be installed even if you already have a pymol version. Be sure to install pymol within a managed conda (or mamba) environment to prevent interference with any existing versions.
85
+ ```
86
+ conda install -c conda-forge pymol-open-source numpy -y
87
+ ```
88
+
89
+ To generate pytorch and pytorch-geometric compatible datasets:
90
+ ```
91
+ pip install pytorch --index-url https://download.pytorch.org/whl/cpu
92
+ pip install torch_geometric
93
+ ```
94
+ Note that this installs the CPU version of pytorch, for GPU / CUDA versions install according to the [pytorch installation docs](https://pytorch.org/get-started/locally/).
95
+
96
+ The EGNN example also uses `einops`. To install:
97
+ ```
98
+ pip install einops
99
+ ```
100
+
101
+ # Documentation
102
+ STCRpy [documentation](https://stcrpy.readthedocs.io/en/latest/) is hosted on ReadtheDocs.
103
+
104
+ # Examples
105
+ STCRpy generates and operates on TCR structure objects. The majority of the API can be accessed through functions of the format: `tcr.some_stcrpy_function()`. TCR objects are associated with their MHC and antigen if these are presented in the structure.
106
+
107
+ A notebook with examples can be found under [examples/STCRpy_examples.ipynb](./examples/STCRpy_examples.ipynb)
108
+
109
+ First import STCRpy:
110
+ ```
111
+ import stcrpy
112
+ ```
113
+
114
+ ### To fetch a TCR structure from STCRDab or the PDB:
115
+ ```
116
+ tcr = stcrpy.fetch_TCR("8gvb")
117
+ ```
118
+ This will return a TCR strcuture or object, or, if there are multiple copies of TCR crystal structures in the PDB file, will return a list containing TCR structure objects. It may be useful to unpack the list into distinct objects, or use python generators to operate on the lists.
119
+
120
+ ### To load a TCR structure from a PDB or MMCIF file:
121
+ ```
122
+ tcr = stcrpy.load_TCR("filename.{pdb, cif}")
123
+ ```
124
+
125
+ ### To load multiple TCR structures from a list of files at once:
126
+ ```
127
+ multiple_tcrs = stcrpy.load_TCRs([file_1, file_2, file_3])
128
+ ```
129
+
130
+ ### To save a TCR object to PDB or MMCIF files:
131
+ ```
132
+ tcr.save(filename.{pdb, cif}) # save the TCR and it's associated MHC and antigen
133
+ tcr.save(filename.{pdb, cif}, TCR_only=True) # save the TCR only
134
+ ```
135
+
136
+ ### To calculate the TCR to pMHC geometry:
137
+ ```
138
+ tcr.calculate_geometry() # change the 'mode' keyword argument to change the geometry calculation method. See paper / documentation for details.
139
+ ```
140
+
141
+ ### To score the TCR to pMHC geometry:
142
+ ```
143
+ tcr.score_docking_geometry()
144
+ ```
145
+
146
+ ### To profile interactions:
147
+ ```
148
+ tcr.profile_peptide_interactions() # interaction profiling parameters can be adjusted, see documentation for details
149
+ ```
150
+
151
+ ### To visualise interactions:
152
+ ```
153
+ tcr.visualise_interactions()
154
+ ```
155
+
156
+ ### To run full analysis on a set of TCR structures:
157
+ ```
158
+ from stcrpy.tcr_methods.tcr_batch_operations import analyse_tcrs
159
+ germlines_and_alleles_df, geometries_df, interactions_df = analyse_tcrs(list_or_dict_of_files)
160
+ ```
161
+
162
+ ### To generate graph datasets:
163
+ ```
164
+ dataset = TCRGraphDataset(
165
+ root=PATH_TO_DATASET,
166
+ data_paths=PATH_TO_TCR_FILES
167
+ )
168
+ ```
169
+
170
+
171
+
172
+
173
+
stcrpy-1.0.0/README.md ADDED
@@ -0,0 +1,145 @@
1
+
2
+
3
+ <img src="./stcrpy_logo.png" alt="drawing" width="300"/>
4
+
5
+
6
+ # STCRpy
7
+ [![stcrpy installation](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/conda-workflow.yml)
8
+ [![stcrpy unittests](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml/badge.svg)](https://github.com/npqst/STCRpy/actions/workflows/unittest-workflow.yml)
9
+ [![stcrpy_docs](https://readthedocs.org/projects/stcrpy/badge/?version=latest)](https://stcrpy.readthedocs.io/en/latest/)
10
+
11
+
12
+ Structural TCR python (STCRpy) is a software suite for analysing and processing T-cell receptor structures.
13
+
14
+ Please feel free to reach out with any comments or feedback.
15
+
16
+ Under review, please cite:
17
+
18
+ **Quast, N. , Deane, C., & Raybould, M. (2025). STCRpy: a software suite for TCR:pMHC structure parsing, interaction profiling, and machine learning dataset preparation. BioRxiv. https://doi.org/10.1101/2025.04.25.650667**
19
+
20
+ <img src="./stcrpy_main_fig.png" alt="drawing" width="1500"/>
21
+
22
+
23
+
24
+ # Installation
25
+
26
+ ## TL;DR installation
27
+ ```
28
+ pip install stcrpy
29
+ pip install plip
30
+ conda install -c conda-forge pymol-open-source numpy -y
31
+ ANARCI --build_models # this step will take a few minutes
32
+ ```
33
+
34
+ ## Step by step installation
35
+ We recommend installing STCRpy in a [conda](https://www.anaconda.com/docs/getting-started/miniconda/install#macos-linux-installation) (or [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html)) environment using python 3.9 to 3.12:
36
+ ```
37
+ conda create -n stcrpy_env python==3.12 -y
38
+ conda activate stcrpy_env
39
+ ```
40
+
41
+ The core functionality of STCRpy can be installed as follows:
42
+ ```
43
+ pip install stcrpy
44
+ ```
45
+
46
+ After installing stcrpy, the anarci HMM models must be built to enable annotation.
47
+ ```
48
+ ANARCI --build_models # this step will take a few minutes
49
+ ```
50
+
51
+ To enable interaction profiling, install PLIP (Adasme et. al., 2021):
52
+ ```
53
+ pip install plip
54
+ ```
55
+
56
+ To enable pymol visualisations, install pymol open source locally within the environment. Unfortunately, pymol currently needs to be installed even if you already have a pymol version. Be sure to install pymol within a managed conda (or mamba) environment to prevent interference with any existing versions.
57
+ ```
58
+ conda install -c conda-forge pymol-open-source numpy -y
59
+ ```
60
+
61
+ To generate pytorch and pytorch-geometric compatible datasets:
62
+ ```
63
+ pip install pytorch --index-url https://download.pytorch.org/whl/cpu
64
+ pip install torch_geometric
65
+ ```
66
+ Note that this installs the CPU version of pytorch, for GPU / CUDA versions install according to the [pytorch installation docs](https://pytorch.org/get-started/locally/).
67
+
68
+ The EGNN example also uses `einops`. To install:
69
+ ```
70
+ pip install einops
71
+ ```
72
+
73
+ # Documentation
74
+ STCRpy [documentation](https://stcrpy.readthedocs.io/en/latest/) is hosted on ReadtheDocs.
75
+
76
+ # Examples
77
+ STCRpy generates and operates on TCR structure objects. The majority of the API can be accessed through functions of the format: `tcr.some_stcrpy_function()`. TCR objects are associated with their MHC and antigen if these are presented in the structure.
78
+
79
+ A notebook with examples can be found under [examples/STCRpy_examples.ipynb](./examples/STCRpy_examples.ipynb)
80
+
81
+ First import STCRpy:
82
+ ```
83
+ import stcrpy
84
+ ```
85
+
86
+ ### To fetch a TCR structure from STCRDab or the PDB:
87
+ ```
88
+ tcr = stcrpy.fetch_TCR("8gvb")
89
+ ```
90
+ This will return a TCR strcuture or object, or, if there are multiple copies of TCR crystal structures in the PDB file, will return a list containing TCR structure objects. It may be useful to unpack the list into distinct objects, or use python generators to operate on the lists.
91
+
92
+ ### To load a TCR structure from a PDB or MMCIF file:
93
+ ```
94
+ tcr = stcrpy.load_TCR("filename.{pdb, cif}")
95
+ ```
96
+
97
+ ### To load multiple TCR structures from a list of files at once:
98
+ ```
99
+ multiple_tcrs = stcrpy.load_TCRs([file_1, file_2, file_3])
100
+ ```
101
+
102
+ ### To save a TCR object to PDB or MMCIF files:
103
+ ```
104
+ tcr.save(filename.{pdb, cif}) # save the TCR and it's associated MHC and antigen
105
+ tcr.save(filename.{pdb, cif}, TCR_only=True) # save the TCR only
106
+ ```
107
+
108
+ ### To calculate the TCR to pMHC geometry:
109
+ ```
110
+ tcr.calculate_geometry() # change the 'mode' keyword argument to change the geometry calculation method. See paper / documentation for details.
111
+ ```
112
+
113
+ ### To score the TCR to pMHC geometry:
114
+ ```
115
+ tcr.score_docking_geometry()
116
+ ```
117
+
118
+ ### To profile interactions:
119
+ ```
120
+ tcr.profile_peptide_interactions() # interaction profiling parameters can be adjusted, see documentation for details
121
+ ```
122
+
123
+ ### To visualise interactions:
124
+ ```
125
+ tcr.visualise_interactions()
126
+ ```
127
+
128
+ ### To run full analysis on a set of TCR structures:
129
+ ```
130
+ from stcrpy.tcr_methods.tcr_batch_operations import analyse_tcrs
131
+ germlines_and_alleles_df, geometries_df, interactions_df = analyse_tcrs(list_or_dict_of_files)
132
+ ```
133
+
134
+ ### To generate graph datasets:
135
+ ```
136
+ dataset = TCRGraphDataset(
137
+ root=PATH_TO_DATASET,
138
+ data_paths=PATH_TO_TCR_FILES
139
+ )
140
+ ```
141
+
142
+
143
+
144
+
145
+
File without changes