stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA GLY A 16 -12.766 -6.146 4.154 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLU A 17 -12.984 -3.096 1.985 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA ASN A 18 -10.777 -2.216 -0.940 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA THR A 20 -6.622 2.515 -3.475 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA ASN A 22 -2.375 6.893 -5.615 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA ASN A 38 10.871 1.660 -12.510 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA LEU A 39 8.338 2.153 -9.768 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA GLN A 40 8.338 0.459 -6.382 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA TRP A 41 5.923 0.253 -3.477 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA TYR A 42 7.139 -0.195 0.076 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA ARG A 43 5.258 -0.682 3.297 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA GLN A 44 6.468 0.441 6.681 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ASN A 45 4.955 -0.914 9.806 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA HIS A 52 9.234 -4.199 -0.523 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA LYS A 85 2.672 9.293 -15.022 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER A 86 0.797 7.515 -12.294 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA SER A 88 -1.761 3.077 -8.014 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA LEU A 89 -1.639 0.408 -5.358 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA LEU A 90 -4.860 -1.486 -4.764 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA SER A 94 -8.077 -5.060 3.718 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG A 95 -7.018 -6.656 6.969 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA ALA A 96 -5.916 -5.018 10.209 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ALA A 97 -2.373 -6.175 9.542 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP A 98 -2.346 -3.975 6.457 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR A 99 -2.284 -0.874 8.638 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA ALA A 100 0.968 0.902 7.897 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA SER A 101 2.587 3.693 5.962 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR A 102 2.875 3.032 2.230 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE A 103 5.639 4.573 0.152 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS A 104 5.899 4.893 -3.569 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA THR B 5 7.680 4.957 6.318 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLN B 6 4.157 4.013 5.323 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA PRO B 8 -1.597 4.163 7.655 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA LYS B 9 -0.563 0.753 8.980 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA PHE B 10 -4.125 -0.534 8.872 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA GLN B 11 -7.228 0.591 7.033 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA VAL B 12 -10.787 -0.634 6.761 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA LYS B 14 -16.366 0.265 2.522 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA THR B 15 -16.732 -0.673 -1.121 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA GLY B 16 -16.733 2.365 -3.364 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA GLN B 22 1.267 7.341 1.834 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA CYS B 23 4.911 6.380 1.563 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ALA B 24 7.861 8.498 2.594 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA TYR B 38 13.247 5.051 -7.303 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA MET B 39 10.086 4.125 -5.461 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER B 40 7.830 1.118 -5.919 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA TRP B 41 4.421 -0.118 -4.828 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA TYR B 42 3.741 -3.827 -4.381 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA ARG B 43 0.720 -5.770 -3.219 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA GLN B 44 0.883 -9.078 -1.429 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG B 51 3.180 -8.202 -7.285 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA LEU B 52 4.411 -4.907 -8.681 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ILE B 53 1.744 -2.252 -9.110 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP B 86 6.783 11.037 -1.067 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR B 99 -8.080 -6.335 0.175 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA SER B 100 -4.378 -6.994 0.577 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA VAL B 101 -1.115 -5.893 2.125 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR B 102 0.511 -2.993 0.305 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE B 103 4.257 -2.495 0.489 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS B 104 6.223 0.500 -0.633 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA GLY D 16 -12.766 -6.146 4.154 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLU D 17 -12.984 -3.096 1.985 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA ASN D 18 -10.777 -2.216 -0.940 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA THR D 20 -6.622 2.515 -3.475 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA ASN D 22 -2.375 6.893 -5.615 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA ASN D 38 10.871 1.660 -12.510 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA LEU D 39 8.338 2.153 -9.768 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA GLN D 40 8.338 0.459 -6.382 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA TRP D 41 5.923 0.253 -3.477 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA TYR D 42 7.139 -0.195 0.076 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA ARG D 43 5.258 -0.682 3.297 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA GLN D 44 6.468 0.441 6.681 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ASN D 45 4.955 -0.914 9.806 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA HIS D 52 9.234 -4.199 -0.523 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA LYS D 85 2.672 9.293 -15.022 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER D 86 0.797 7.515 -12.294 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA SER D 88 -1.761 3.077 -8.014 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA LEU D 89 -1.639 0.408 -5.358 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA LEU D 90 -4.860 -1.486 -4.764 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA SER D 94 -8.077 -5.060 3.718 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG D 95 -7.018 -6.656 6.969 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA ALA D 96 -5.916 -5.018 10.209 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ALA D 97 -2.373 -6.175 9.542 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP D 98 -2.346 -3.975 6.457 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR D 99 -2.284 -0.874 8.638 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA ALA D 100 0.968 0.902 7.897 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA SER D 101 2.587 3.693 5.962 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR D 102 2.875 3.032 2.230 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE D 103 5.639 4.573 0.152 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS D 104 5.899 4.893 -3.569 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
ATOM 1 CA THR G 5 7.680 4.957 6.318 0.00 0.00 C
|
|
2
|
+
ATOM 2 CA GLN G 6 4.157 4.013 5.323 0.00 0.00 C
|
|
3
|
+
ATOM 3 CA PRO G 8 -1.597 4.163 7.655 0.00 0.00 C
|
|
4
|
+
ATOM 4 CA LYS G 9 -0.563 0.753 8.980 0.00 0.00 C
|
|
5
|
+
ATOM 5 CA PHE G 10 -4.125 -0.534 8.872 0.00 0.00 C
|
|
6
|
+
ATOM 6 CA GLN G 11 -7.228 0.591 7.033 0.00 0.00 C
|
|
7
|
+
ATOM 7 CA VAL G 12 -10.787 -0.634 6.761 0.00 0.00 C
|
|
8
|
+
ATOM 8 CA LYS G 14 -16.366 0.265 2.522 0.00 0.00 C
|
|
9
|
+
ATOM 9 CA THR G 15 -16.732 -0.673 -1.121 0.00 0.00 C
|
|
10
|
+
ATOM 10 CA GLY G 16 -16.733 2.365 -3.364 0.00 0.00 C
|
|
11
|
+
ATOM 11 CA GLN G 22 1.267 7.341 1.834 0.00 0.00 C
|
|
12
|
+
ATOM 12 CA CYS G 23 4.911 6.380 1.563 0.00 0.00 C
|
|
13
|
+
ATOM 13 CA ALA G 24 7.861 8.498 2.594 0.00 0.00 C
|
|
14
|
+
ATOM 14 CA TYR G 38 13.247 5.051 -7.303 0.00 0.00 C
|
|
15
|
+
ATOM 15 CA MET G 39 10.086 4.125 -5.461 0.00 0.00 C
|
|
16
|
+
ATOM 16 CA SER G 40 7.830 1.118 -5.919 0.00 0.00 C
|
|
17
|
+
ATOM 17 CA TRP G 41 4.421 -0.118 -4.828 0.00 0.00 C
|
|
18
|
+
ATOM 18 CA TYR G 42 3.741 -3.827 -4.381 0.00 0.00 C
|
|
19
|
+
ATOM 19 CA ARG G 43 0.720 -5.770 -3.219 0.00 0.00 C
|
|
20
|
+
ATOM 20 CA GLN G 44 0.883 -9.078 -1.429 0.00 0.00 C
|
|
21
|
+
ATOM 21 CA ARG G 51 3.180 -8.202 -7.285 0.00 0.00 C
|
|
22
|
+
ATOM 22 CA LEU G 52 4.411 -4.907 -8.681 0.00 0.00 C
|
|
23
|
+
ATOM 23 CA ILE G 53 1.744 -2.252 -9.110 0.00 0.00 C
|
|
24
|
+
ATOM 24 CA ASP G 86 6.783 11.037 -1.067 0.00 0.00 C
|
|
25
|
+
ATOM 25 CA THR G 99 -8.080 -6.335 0.175 0.00 0.00 C
|
|
26
|
+
ATOM 26 CA SER G 100 -4.378 -6.994 0.577 0.00 0.00 C
|
|
27
|
+
ATOM 27 CA VAL G 101 -1.115 -5.893 2.125 0.00 0.00 C
|
|
28
|
+
ATOM 28 CA TYR G 102 0.511 -2.993 0.305 0.00 0.00 C
|
|
29
|
+
ATOM 29 CA PHE G 103 4.257 -2.495 0.489 0.00 0.00 C
|
|
30
|
+
ATOM 30 CA CYS G 104 6.223 0.500 -0.633 0.00 0.00 C
|
|
31
|
+
TER
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
reference_positions = {
|
|
4
|
+
"A": [
|
|
5
|
+
[0.170468, 0.120233, -0.978000],
|
|
6
|
+
[-0.460937, 0.886969, 0.028699],
|
|
7
|
+
[5.223556, 2.001083, 1.418880],
|
|
8
|
+
],
|
|
9
|
+
"B": [
|
|
10
|
+
[0.595917, 0.758482, -0.263794],
|
|
11
|
+
[-0.056280, 0.367130, 0.928465],
|
|
12
|
+
[2.455201, -3.709282, -1.446316],
|
|
13
|
+
],
|
|
14
|
+
"G": [
|
|
15
|
+
[0.170468, 0.120233, -0.978000],
|
|
16
|
+
[-0.460937, 0.886969, 0.028699],
|
|
17
|
+
[5.223556, 2.001083, 1.418880],
|
|
18
|
+
],
|
|
19
|
+
"D": [
|
|
20
|
+
[0.595917, 0.758482, -0.263794],
|
|
21
|
+
[-0.056280, 0.367130, 0.928465],
|
|
22
|
+
[2.455201, -3.709282, -1.446316],
|
|
23
|
+
],
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
reference_pdb_paths = {
|
|
27
|
+
chain_id: os.path.join(
|
|
28
|
+
os.path.dirname(os.path.abspath(__file__)), f"reference_{chain_id}.pdb"
|
|
29
|
+
)
|
|
30
|
+
for chain_id in ["A", "B", "G", "D"]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
reference_origin_scaling = {
|
|
35
|
+
"A": [3.0, -1.0, 1.0],
|
|
36
|
+
"B": [-5.0, 0.5, 1.0],
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
reference_residues = {
|
|
40
|
+
"A": [
|
|
41
|
+
16,
|
|
42
|
+
17,
|
|
43
|
+
18,
|
|
44
|
+
20,
|
|
45
|
+
22,
|
|
46
|
+
38,
|
|
47
|
+
39,
|
|
48
|
+
40,
|
|
49
|
+
41,
|
|
50
|
+
42,
|
|
51
|
+
43,
|
|
52
|
+
44,
|
|
53
|
+
45,
|
|
54
|
+
52,
|
|
55
|
+
85,
|
|
56
|
+
86,
|
|
57
|
+
88,
|
|
58
|
+
89,
|
|
59
|
+
90,
|
|
60
|
+
94,
|
|
61
|
+
95,
|
|
62
|
+
96,
|
|
63
|
+
97,
|
|
64
|
+
98,
|
|
65
|
+
99,
|
|
66
|
+
100,
|
|
67
|
+
101,
|
|
68
|
+
102,
|
|
69
|
+
103,
|
|
70
|
+
104,
|
|
71
|
+
],
|
|
72
|
+
"B": [
|
|
73
|
+
5,
|
|
74
|
+
6,
|
|
75
|
+
8,
|
|
76
|
+
9,
|
|
77
|
+
10,
|
|
78
|
+
11,
|
|
79
|
+
12,
|
|
80
|
+
14,
|
|
81
|
+
15,
|
|
82
|
+
16,
|
|
83
|
+
22,
|
|
84
|
+
23,
|
|
85
|
+
24,
|
|
86
|
+
38,
|
|
87
|
+
39,
|
|
88
|
+
40,
|
|
89
|
+
41,
|
|
90
|
+
42,
|
|
91
|
+
43,
|
|
92
|
+
44,
|
|
93
|
+
51,
|
|
94
|
+
52,
|
|
95
|
+
53,
|
|
96
|
+
86,
|
|
97
|
+
99,
|
|
98
|
+
100,
|
|
99
|
+
101,
|
|
100
|
+
102,
|
|
101
|
+
103,
|
|
102
|
+
104,
|
|
103
|
+
],
|
|
104
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
import warnings
|
|
3
|
+
import os
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
from . import utils as plip_utils
|
|
8
|
+
from .TCRpMHC_PLIP_Model_Parser import TCRpMHC_PLIP_Model_Parser
|
|
9
|
+
|
|
10
|
+
class PLIPParser:
|
|
11
|
+
"""This class is used to parse the interactions of a TCR-pMHC complex using PLIP."""
|
|
12
|
+
def parse_complex(
|
|
13
|
+
self,
|
|
14
|
+
complex: "plip.structure.preparation.PDBComplex",
|
|
15
|
+
tcr_pmhc_complex: typing.Union["abTCR", "gdTCR"] = None,
|
|
16
|
+
renumbering=None,
|
|
17
|
+
domain_assignment=None,
|
|
18
|
+
) -> pd.DataFrame:
|
|
19
|
+
"""
|
|
20
|
+
Parses PLIP profiled interactions and maps them back onto a syctpy TCR object.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
complex (plip.structure.preparation.PDBComplex):
|
|
24
|
+
tcr_pmhc_complex (typing.Union["abTCR", "gdTCR"], optional): _description_. Defaults to None.
|
|
25
|
+
renumbering (_type_, optional): _description_. Defaults to None.
|
|
26
|
+
domain_assignment (_type_, optional): _description_. Defaults to None.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
pd.DataFrame: _description_
|
|
30
|
+
"""
|
|
31
|
+
all_interactions = []
|
|
32
|
+
for _, interaction_set in complex.interaction_sets.items():
|
|
33
|
+
for interaction in interaction_set.all_itypes:
|
|
34
|
+
try:
|
|
35
|
+
all_interactions.append(plip_utils.parse_interaction(interaction))
|
|
36
|
+
except NotImplementedError as e:
|
|
37
|
+
print(e)
|
|
38
|
+
continue
|
|
39
|
+
interactions_df = self._interactions_to_dataframe(all_interactions)
|
|
40
|
+
if renumbering is not None and len(interactions_df) > 0:
|
|
41
|
+
self._renumber_interactions(interactions_df, renumbering)
|
|
42
|
+
if tcr_pmhc_complex is not None:
|
|
43
|
+
self._map_amino_acids_to_ligands(interactions_df, tcr_pmhc_complex)
|
|
44
|
+
if domain_assignment is not None:
|
|
45
|
+
self._assign_domains_to_chains(interactions_df, domain_assignment)
|
|
46
|
+
|
|
47
|
+
return interactions_df
|
|
48
|
+
|
|
49
|
+
def _renumber_interactions(self, interactions_df, renumbering):
|
|
50
|
+
# def imgt_number_mapping(original_idx):
|
|
51
|
+
# imgt_insertions_to_number_map = {
|
|
52
|
+
# "A": 1,
|
|
53
|
+
# "B": 2,
|
|
54
|
+
# "C": 3,
|
|
55
|
+
# "D": 4,
|
|
56
|
+
# "E": 5,
|
|
57
|
+
# "F": 6,
|
|
58
|
+
# "G": 7,
|
|
59
|
+
# "H": 8,
|
|
60
|
+
# "I": 9,
|
|
61
|
+
# }
|
|
62
|
+
|
|
63
|
+
# if original_idx[-1] == ' ':
|
|
64
|
+
# return original_idx[1]
|
|
65
|
+
# else:
|
|
66
|
+
# return original_idx[1] + 0.1*imgt_insertions_to_number_map[original_idx]
|
|
67
|
+
interactions_df["original_numbering"] = interactions_df.apply(
|
|
68
|
+
lambda x: str(renumbering[x.protein_chain][(" ", x.protein_number, " ")][1])
|
|
69
|
+
+ renumbering[x.protein_chain][(" ", x.protein_number, " ")][2].strip(),
|
|
70
|
+
axis=1,
|
|
71
|
+
)
|
|
72
|
+
return interactions_df
|
|
73
|
+
|
|
74
|
+
for chain_id, renumber in renumbering.items():
|
|
75
|
+
|
|
76
|
+
for plip_idx, original_idx in renumber.items():
|
|
77
|
+
mask = (interactions_df.protein_chain == chain_id) & (
|
|
78
|
+
interactions_df.protein_number == plip_idx[1]
|
|
79
|
+
)
|
|
80
|
+
if sum(mask) > 0:
|
|
81
|
+
interactions_df[mask].loc[:, "protein_number"] = (
|
|
82
|
+
str(original_idx[1]) + str(original_idx[-1])
|
|
83
|
+
).strip()
|
|
84
|
+
return interactions_df
|
|
85
|
+
|
|
86
|
+
def _assign_domains_to_chains(self, interactions_df, domains):
|
|
87
|
+
chain_to_domain_mapping = {v: k for k, v in domains.items()}
|
|
88
|
+
|
|
89
|
+
def assign_domain(chain_id):
|
|
90
|
+
if chain_id in chain_to_domain_mapping:
|
|
91
|
+
return chain_to_domain_mapping[chain_id]
|
|
92
|
+
else:
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
interactions_df["domain"] = interactions_df.protein_chain.apply(assign_domain)
|
|
96
|
+
|
|
97
|
+
def _interactions_to_dataframe(self, interaction_list: list) -> pd.DataFrame:
|
|
98
|
+
columns = [
|
|
99
|
+
"type",
|
|
100
|
+
"protein_atom",
|
|
101
|
+
"protein_chain",
|
|
102
|
+
"protein_residue",
|
|
103
|
+
"protein_number",
|
|
104
|
+
"ligand_atom",
|
|
105
|
+
"distance",
|
|
106
|
+
"angle",
|
|
107
|
+
"plip_id",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
interactions_as_tuples = [
|
|
111
|
+
interaction.to_tuple() for interaction in interaction_list
|
|
112
|
+
]
|
|
113
|
+
interactions = list(zip(*interactions_as_tuples))
|
|
114
|
+
if len(interactions) > 0:
|
|
115
|
+
interactions_as_dict = {
|
|
116
|
+
columns[i]: interactions[i] for i in range(len(columns))
|
|
117
|
+
}
|
|
118
|
+
return pd.DataFrame(interactions_as_dict)
|
|
119
|
+
else:
|
|
120
|
+
return pd.DataFrame(columns=columns)
|
|
121
|
+
|
|
122
|
+
def _map_amino_acids_to_ligands(
|
|
123
|
+
self, interactions_df: pd.DataFrame, tcr_pmhc_complex: str
|
|
124
|
+
):
|
|
125
|
+
parser = TCRpMHC_PLIP_Model_Parser()
|
|
126
|
+
_, tcr_mhc_pdb, ligand_pdb, ligand_sdf = parser.parse_tcr_pmhc_complex(
|
|
127
|
+
tcr_pmhc_complex, delete_tmp_files=False, renumber=False
|
|
128
|
+
)
|
|
129
|
+
coordinate_mapping = parser.map_amino_acids_to_ligands(ligand_pdb, ligand_sdf)
|
|
130
|
+
if len(interactions_df) > 0:
|
|
131
|
+
ligand_residues = interactions_df.apply(
|
|
132
|
+
lambda x: coordinate_mapping[x.ligand_atom[0][0]], axis=1
|
|
133
|
+
)
|
|
134
|
+
interactions_df["ligand_residue"], interactions_df["ligand_number"] = map(
|
|
135
|
+
list, zip(*ligand_residues)
|
|
136
|
+
)
|
|
137
|
+
else: # return empty dataframe with appropriate columns
|
|
138
|
+
extended_columns = list(interactions_df.columns)
|
|
139
|
+
extended_columns.extend(["ligand_residue", "ligand_number"])
|
|
140
|
+
interactions_df = pd.DataFrame(columns=extended_columns)
|
|
141
|
+
|
|
142
|
+
# delete temp files needed for renumbering
|
|
143
|
+
os.remove(tcr_mhc_pdb)
|
|
144
|
+
os.remove(ligand_pdb)
|
|
145
|
+
os.remove(ligand_sdf)
|
|
146
|
+
|
|
147
|
+
return interactions_df
|