stcrpy 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/__init__.py +0 -0
- examples/egnn.py +425 -0
- stcrpy/__init__.py +5 -0
- stcrpy/tcr_datasets/__init__.py +0 -0
- stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
- stcrpy/tcr_datasets/tcr_selector.py +0 -0
- stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
- stcrpy/tcr_datasets/utils.py +350 -0
- stcrpy/tcr_formats/__init__.py +0 -0
- stcrpy/tcr_formats/tcr_formats.py +114 -0
- stcrpy/tcr_formats/tcr_haddock.py +556 -0
- stcrpy/tcr_geometry/TCRCoM.py +350 -0
- stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy/tcr_geometry/TCRDock.py +261 -0
- stcrpy/tcr_geometry/TCRGeom.py +450 -0
- stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
- stcrpy/tcr_geometry/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
- stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
- stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
- stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
- stcrpy/tcr_interactions/PLIPParser.py +147 -0
- stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
- stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
- stcrpy/tcr_interactions/__init__.py +0 -0
- stcrpy/tcr_interactions/utils.py +170 -0
- stcrpy/tcr_methods/__init__.py +0 -0
- stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
- stcrpy/tcr_methods/tcr_methods.py +150 -0
- stcrpy/tcr_methods/tcr_reformatting.py +18 -0
- stcrpy/tcr_metrics/__init__.py +2 -0
- stcrpy/tcr_metrics/constants.py +39 -0
- stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
- stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
- stcrpy/tcr_ml/__init__.py +0 -0
- stcrpy/tcr_ml/geometry_predictor.py +3 -0
- stcrpy/tcr_processing/AGchain.py +89 -0
- stcrpy/tcr_processing/Chemical_components.py +48915 -0
- stcrpy/tcr_processing/Entity.py +301 -0
- stcrpy/tcr_processing/Fragment.py +58 -0
- stcrpy/tcr_processing/Holder.py +24 -0
- stcrpy/tcr_processing/MHC.py +449 -0
- stcrpy/tcr_processing/MHCchain.py +149 -0
- stcrpy/tcr_processing/Model.py +37 -0
- stcrpy/tcr_processing/Select.py +145 -0
- stcrpy/tcr_processing/TCR.py +532 -0
- stcrpy/tcr_processing/TCRIO.py +47 -0
- stcrpy/tcr_processing/TCRParser.py +1230 -0
- stcrpy/tcr_processing/TCRStructure.py +148 -0
- stcrpy/tcr_processing/TCRchain.py +160 -0
- stcrpy/tcr_processing/__init__.py +3 -0
- stcrpy/tcr_processing/annotate.py +480 -0
- stcrpy/tcr_processing/utils/__init__.py +0 -0
- stcrpy/tcr_processing/utils/common.py +67 -0
- stcrpy/tcr_processing/utils/constants.py +367 -0
- stcrpy/tcr_processing/utils/region_definitions.py +782 -0
- stcrpy/utils/__init__.py +0 -0
- stcrpy/utils/error_stream.py +12 -0
- stcrpy-1.0.0.dist-info/METADATA +173 -0
- stcrpy-1.0.0.dist-info/RECORD +68 -0
- stcrpy-1.0.0.dist-info/WHEEL +5 -0
- stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
- stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
- stcrpy-1.0.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on 10 May 2017
|
|
3
|
+
@author: leem
|
|
4
|
+
Based on the ABDB.AbPDB.AntibodyStructure class.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from Bio import SeqUtils
|
|
8
|
+
from .Entity import Entity
|
|
9
|
+
from .TCR import TCR
|
|
10
|
+
from .MHC import MHC
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TCRStructure(Entity):
|
|
14
|
+
"""
|
|
15
|
+
The TCRStructure class contains a collection of models
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, identifier):
|
|
19
|
+
self.level = "TS"
|
|
20
|
+
Entity.__init__(self, identifier)
|
|
21
|
+
self.header = {}
|
|
22
|
+
|
|
23
|
+
def __repr__(self):
|
|
24
|
+
return "<Structure id=%s>" % self.get_id()
|
|
25
|
+
|
|
26
|
+
def _sort(self, m1, m2):
|
|
27
|
+
"""Sort models.
|
|
28
|
+
|
|
29
|
+
This sorting function sorts the Model instances in the Structure instance.
|
|
30
|
+
The sorting is done based on the model id, which is a simple int that
|
|
31
|
+
reflects the order of the models in the PDB file.
|
|
32
|
+
|
|
33
|
+
Arguments:
|
|
34
|
+
o m1, m2 - Model instances
|
|
35
|
+
"""
|
|
36
|
+
return (m1.get_id() > m2.get_id()) - (m1.get_id() < m2.get_id())
|
|
37
|
+
|
|
38
|
+
def _set_numbering_scheme(self, scheme=None):
|
|
39
|
+
"""
|
|
40
|
+
Set the numbering scheme used.
|
|
41
|
+
"""
|
|
42
|
+
self.numbering_scheme = scheme
|
|
43
|
+
|
|
44
|
+
# Public
|
|
45
|
+
def set_header(self, header):
|
|
46
|
+
"""
|
|
47
|
+
Set the header as the parsed header dictionary from biopython
|
|
48
|
+
"""
|
|
49
|
+
self.header = header
|
|
50
|
+
|
|
51
|
+
def get_header(self):
|
|
52
|
+
return self.header
|
|
53
|
+
|
|
54
|
+
def get_models(self):
|
|
55
|
+
for m in self:
|
|
56
|
+
yield m
|
|
57
|
+
|
|
58
|
+
def get_holders(self):
|
|
59
|
+
for m in self.get_models():
|
|
60
|
+
for h in m:
|
|
61
|
+
yield h
|
|
62
|
+
|
|
63
|
+
def get_TCRs(self):
|
|
64
|
+
"""
|
|
65
|
+
Get any instance of the TCR object.
|
|
66
|
+
Hierarchy:
|
|
67
|
+
TCRStructure
|
|
68
|
+
|
|
|
69
|
+
|______ TCR
|
|
70
|
+
|
|
|
71
|
+
|______ MHC
|
|
72
|
+
"""
|
|
73
|
+
for h in self.get_holders():
|
|
74
|
+
if isinstance(h, TCR):
|
|
75
|
+
yield h
|
|
76
|
+
|
|
77
|
+
def get_TCRchains(self):
|
|
78
|
+
"""Gets all TCR chains"""
|
|
79
|
+
for h in self.get_holders():
|
|
80
|
+
if h.id == "TCRchain":
|
|
81
|
+
for c in h:
|
|
82
|
+
yield c
|
|
83
|
+
elif isinstance(h, TCR):
|
|
84
|
+
for c in h:
|
|
85
|
+
yield c
|
|
86
|
+
|
|
87
|
+
def get_MHCs(self):
|
|
88
|
+
"""
|
|
89
|
+
Get any instance of the MHC object.
|
|
90
|
+
Hierarchy:
|
|
91
|
+
TCRStructure
|
|
92
|
+
|
|
|
93
|
+
|______ TCR
|
|
94
|
+
|
|
|
95
|
+
|______ MHC
|
|
96
|
+
"""
|
|
97
|
+
for h in self.get_holders():
|
|
98
|
+
if isinstance(h, MHC):
|
|
99
|
+
yield h
|
|
100
|
+
|
|
101
|
+
def get_antigens(self):
|
|
102
|
+
"""
|
|
103
|
+
This gets the 'antigen' chains in the structure,
|
|
104
|
+
that have been assigned to a TCR or an MHC.
|
|
105
|
+
"""
|
|
106
|
+
antigens = set([])
|
|
107
|
+
for h in self.get_holders():
|
|
108
|
+
if isinstance(h, MHC) or isinstance(h, TCR) or h.id == "TCRchain":
|
|
109
|
+
for c in h.antigen:
|
|
110
|
+
if c not in antigens:
|
|
111
|
+
antigens = antigens.union(set([c]))
|
|
112
|
+
yield c
|
|
113
|
+
|
|
114
|
+
def get_unpaired_TCRchains(self):
|
|
115
|
+
"""
|
|
116
|
+
This gets the TCR chains that are not paired
|
|
117
|
+
"""
|
|
118
|
+
for h in self.get_holders():
|
|
119
|
+
if h.id == "TCRchain":
|
|
120
|
+
for c in h:
|
|
121
|
+
yield c
|
|
122
|
+
|
|
123
|
+
def get_chains(self):
|
|
124
|
+
for h in self.get_holders():
|
|
125
|
+
for c in h:
|
|
126
|
+
yield c
|
|
127
|
+
|
|
128
|
+
def get_residues(self):
|
|
129
|
+
for c in self.get_chains():
|
|
130
|
+
for r in c:
|
|
131
|
+
yield r
|
|
132
|
+
|
|
133
|
+
def get_atoms(self):
|
|
134
|
+
for r in self.get_residues():
|
|
135
|
+
for a in r:
|
|
136
|
+
yield a
|
|
137
|
+
|
|
138
|
+
def get_seq(self, model=0):
|
|
139
|
+
seq = ""
|
|
140
|
+
for c in self[model]:
|
|
141
|
+
for r in c.get_residues():
|
|
142
|
+
# Skip over water molecules
|
|
143
|
+
if r.resname == "HOH":
|
|
144
|
+
continue
|
|
145
|
+
seq += SeqUtils.IUPACData.protein_letters_3to1[r.resname]
|
|
146
|
+
seq += "/"
|
|
147
|
+
|
|
148
|
+
return seq[:-1]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from Bio.PDB import Chain
|
|
2
|
+
from Bio import SeqUtils
|
|
3
|
+
from .utils.region_definitions import get_region
|
|
4
|
+
from .Entity import Entity
|
|
5
|
+
from .Fragment import Fragment
|
|
6
|
+
|
|
7
|
+
regions = {
|
|
8
|
+
"B": ["fwb1", "cdrb1", "fwb2", "cdrb2", "fwb3", "cdrb3", "fwb4"],
|
|
9
|
+
"A": ["fwa1", "cdra1", "fwa2", "cdra2", "fwa3", "cdra3", "fwa4"],
|
|
10
|
+
"D": ["fwd1", "cdrd1", "fwd2", "cdrd2", "fwd3", "cdrd3", "fwd4"],
|
|
11
|
+
"G": ["fwg1", "cdrg1", "fwg2", "cdrg2", "fwg3", "cdrg3", "fwg4"],
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TCRchain(Chain.Chain, Entity):
|
|
16
|
+
"""
|
|
17
|
+
A class to hold a TCR chain.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, identifier):
|
|
21
|
+
Chain.Chain.__init__(self, identifier)
|
|
22
|
+
Entity.__init__(self, identifier)
|
|
23
|
+
self.level = "C"
|
|
24
|
+
self.mhc = []
|
|
25
|
+
self.antigen = []
|
|
26
|
+
self.unnumbered = []
|
|
27
|
+
self.sequence = {}
|
|
28
|
+
self.residue_order = {}
|
|
29
|
+
self.engineered = False
|
|
30
|
+
|
|
31
|
+
def __repr__(self):
|
|
32
|
+
return "<TCRchain %s type: %s>" % (self.id, self.chain_type)
|
|
33
|
+
|
|
34
|
+
def _add_mhc(self, mhc=None):
|
|
35
|
+
self.mhc.append(mhc)
|
|
36
|
+
|
|
37
|
+
def _add_antigen(self, antigen=None):
|
|
38
|
+
if antigen not in self.antigen:
|
|
39
|
+
self.antigen.append(antigen)
|
|
40
|
+
|
|
41
|
+
def is_bound(self):
|
|
42
|
+
"""
|
|
43
|
+
Check whether there is an antigen bound to the TCR
|
|
44
|
+
"""
|
|
45
|
+
if self.get_antigen():
|
|
46
|
+
return True
|
|
47
|
+
else:
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
def analyse(self, chain_type):
|
|
51
|
+
self.set_chain_type(chain_type)
|
|
52
|
+
self._init_fragments()
|
|
53
|
+
self.annotate_children()
|
|
54
|
+
self.set_sequence()
|
|
55
|
+
|
|
56
|
+
def set_chain_type(self, chain_type):
|
|
57
|
+
"""
|
|
58
|
+
Set the chain type to B, A, D, or G
|
|
59
|
+
"""
|
|
60
|
+
self.chain_type = chain_type
|
|
61
|
+
|
|
62
|
+
def set_sequence(self):
|
|
63
|
+
i = 0
|
|
64
|
+
for residue in self:
|
|
65
|
+
if (
|
|
66
|
+
residue.get_resname().capitalize()
|
|
67
|
+
in SeqUtils.IUPACData.protein_letters_3to1
|
|
68
|
+
):
|
|
69
|
+
resname = SeqUtils.IUPACData.protein_letters_3to1[
|
|
70
|
+
residue.get_resname().capitalize()
|
|
71
|
+
] # change this to use our chemical components.
|
|
72
|
+
else:
|
|
73
|
+
# skip the residue if the code is not recognised - e.g. UNK
|
|
74
|
+
continue
|
|
75
|
+
hetflag, resseq, icode = residue.get_id()
|
|
76
|
+
self.sequence[(self.chain_type + str(resseq) + str(icode)).strip()] = (
|
|
77
|
+
resname
|
|
78
|
+
)
|
|
79
|
+
self.residue_order[(self.chain_type + str(resseq) + str(icode)).strip()] = i
|
|
80
|
+
i += 1
|
|
81
|
+
|
|
82
|
+
def set_engineered(self, engineered):
|
|
83
|
+
if engineered:
|
|
84
|
+
self.engineered = True
|
|
85
|
+
else:
|
|
86
|
+
self.engineered = False
|
|
87
|
+
|
|
88
|
+
def add_unnumbered(self, residue):
|
|
89
|
+
self.unnumbered.append(residue.id)
|
|
90
|
+
|
|
91
|
+
def _get_region(self, residue):
|
|
92
|
+
region = ""
|
|
93
|
+
if hasattr(residue, "imgt_numbered") and residue.imgt_numbered:
|
|
94
|
+
region = get_region((residue.id[1], residue.id[2]), self.chain_type)
|
|
95
|
+
return region
|
|
96
|
+
return "?"
|
|
97
|
+
|
|
98
|
+
def annotate_children(self):
|
|
99
|
+
for residue in self:
|
|
100
|
+
residue.chain_type = self.chain_type
|
|
101
|
+
residue.region = self._get_region(residue)
|
|
102
|
+
for atom in residue:
|
|
103
|
+
atom.chain_type = self.chain_type
|
|
104
|
+
atom.region = residue.region
|
|
105
|
+
|
|
106
|
+
if residue.region != "?":
|
|
107
|
+
self.fragments.child_dict[residue.region].add(residue)
|
|
108
|
+
|
|
109
|
+
def _init_fragments(self):
|
|
110
|
+
self.fragments = Entity("Fragments")
|
|
111
|
+
self.fragments.set_parent(self)
|
|
112
|
+
for region in regions[self.chain_type]:
|
|
113
|
+
self.fragments.add(Fragment(region))
|
|
114
|
+
|
|
115
|
+
def is_engineered(self):
|
|
116
|
+
return self.engineered
|
|
117
|
+
|
|
118
|
+
def get_MHC(self):
|
|
119
|
+
return self.mhc
|
|
120
|
+
|
|
121
|
+
def get_antigen(self):
|
|
122
|
+
return self.antigen
|
|
123
|
+
|
|
124
|
+
def get_fragments(self):
|
|
125
|
+
for f in self.fragments:
|
|
126
|
+
yield f
|
|
127
|
+
|
|
128
|
+
def get_CDRs(self):
|
|
129
|
+
for f in self.fragments:
|
|
130
|
+
if f.id.lower()[:3] == "cdr":
|
|
131
|
+
yield f
|
|
132
|
+
|
|
133
|
+
def get_frameworks(self):
|
|
134
|
+
"""
|
|
135
|
+
Obtain framework regions from a TCRChain object.
|
|
136
|
+
"""
|
|
137
|
+
for f in self.get_fragments():
|
|
138
|
+
if "fw" in f.id:
|
|
139
|
+
yield f
|
|
140
|
+
|
|
141
|
+
def get_sequence(self, type=dict):
|
|
142
|
+
if not self.sequence:
|
|
143
|
+
self.set_sequence()
|
|
144
|
+
if type is dict:
|
|
145
|
+
return self.sequence
|
|
146
|
+
else:
|
|
147
|
+
ordered = sorted(
|
|
148
|
+
list(self.sequence.items()), key=lambda x: self.residue_order[x[0]]
|
|
149
|
+
)
|
|
150
|
+
if type is str:
|
|
151
|
+
return "".join([r[1] for r in ordered])
|
|
152
|
+
else:
|
|
153
|
+
return ordered
|
|
154
|
+
|
|
155
|
+
def get_unnumbered(self):
|
|
156
|
+
for r in self.unnumbered:
|
|
157
|
+
yield self.child_dict[r]
|
|
158
|
+
|
|
159
|
+
def get_germline_assignments(self):
|
|
160
|
+
return self.xtra["genetic_origin"]
|