stcrpy 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. examples/__init__.py +0 -0
  2. examples/egnn.py +425 -0
  3. stcrpy/__init__.py +5 -0
  4. stcrpy/tcr_datasets/__init__.py +0 -0
  5. stcrpy/tcr_datasets/tcr_graph_dataset.py +499 -0
  6. stcrpy/tcr_datasets/tcr_selector.py +0 -0
  7. stcrpy/tcr_datasets/tcr_structure_dataset.py +0 -0
  8. stcrpy/tcr_datasets/utils.py +350 -0
  9. stcrpy/tcr_formats/__init__.py +0 -0
  10. stcrpy/tcr_formats/tcr_formats.py +114 -0
  11. stcrpy/tcr_formats/tcr_haddock.py +556 -0
  12. stcrpy/tcr_geometry/TCRCoM.py +350 -0
  13. stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  14. stcrpy/tcr_geometry/TCRDock.py +261 -0
  15. stcrpy/tcr_geometry/TCRGeom.py +450 -0
  16. stcrpy/tcr_geometry/TCRGeomFiltering.py +273 -0
  17. stcrpy/tcr_geometry/__init__.py +0 -0
  18. stcrpy/tcr_geometry/reference_data/__init__.py +0 -0
  19. stcrpy/tcr_geometry/reference_data/dock_reference_1_imgt_numbered.pdb +6549 -0
  20. stcrpy/tcr_geometry/reference_data/dock_reference_2_imgt_numbered.pdb +6495 -0
  21. stcrpy/tcr_geometry/reference_data/reference_A.pdb +31 -0
  22. stcrpy/tcr_geometry/reference_data/reference_B.pdb +31 -0
  23. stcrpy/tcr_geometry/reference_data/reference_D.pdb +31 -0
  24. stcrpy/tcr_geometry/reference_data/reference_G.pdb +31 -0
  25. stcrpy/tcr_geometry/reference_data/reference_data.py +104 -0
  26. stcrpy/tcr_interactions/PLIPParser.py +147 -0
  27. stcrpy/tcr_interactions/TCRInteractionProfiler.py +433 -0
  28. stcrpy/tcr_interactions/TCRpMHC_PLIP_Model_Parser.py +133 -0
  29. stcrpy/tcr_interactions/__init__.py +0 -0
  30. stcrpy/tcr_interactions/utils.py +170 -0
  31. stcrpy/tcr_methods/__init__.py +0 -0
  32. stcrpy/tcr_methods/tcr_batch_operations.py +223 -0
  33. stcrpy/tcr_methods/tcr_methods.py +150 -0
  34. stcrpy/tcr_methods/tcr_reformatting.py +18 -0
  35. stcrpy/tcr_metrics/__init__.py +2 -0
  36. stcrpy/tcr_metrics/constants.py +39 -0
  37. stcrpy/tcr_metrics/tcr_interface_rmsd.py +237 -0
  38. stcrpy/tcr_metrics/tcr_rmsd.py +179 -0
  39. stcrpy/tcr_ml/__init__.py +0 -0
  40. stcrpy/tcr_ml/geometry_predictor.py +3 -0
  41. stcrpy/tcr_processing/AGchain.py +89 -0
  42. stcrpy/tcr_processing/Chemical_components.py +48915 -0
  43. stcrpy/tcr_processing/Entity.py +301 -0
  44. stcrpy/tcr_processing/Fragment.py +58 -0
  45. stcrpy/tcr_processing/Holder.py +24 -0
  46. stcrpy/tcr_processing/MHC.py +449 -0
  47. stcrpy/tcr_processing/MHCchain.py +149 -0
  48. stcrpy/tcr_processing/Model.py +37 -0
  49. stcrpy/tcr_processing/Select.py +145 -0
  50. stcrpy/tcr_processing/TCR.py +532 -0
  51. stcrpy/tcr_processing/TCRIO.py +47 -0
  52. stcrpy/tcr_processing/TCRParser.py +1230 -0
  53. stcrpy/tcr_processing/TCRStructure.py +148 -0
  54. stcrpy/tcr_processing/TCRchain.py +160 -0
  55. stcrpy/tcr_processing/__init__.py +3 -0
  56. stcrpy/tcr_processing/annotate.py +480 -0
  57. stcrpy/tcr_processing/utils/__init__.py +0 -0
  58. stcrpy/tcr_processing/utils/common.py +67 -0
  59. stcrpy/tcr_processing/utils/constants.py +367 -0
  60. stcrpy/tcr_processing/utils/region_definitions.py +782 -0
  61. stcrpy/utils/__init__.py +0 -0
  62. stcrpy/utils/error_stream.py +12 -0
  63. stcrpy-1.0.0.dist-info/METADATA +173 -0
  64. stcrpy-1.0.0.dist-info/RECORD +68 -0
  65. stcrpy-1.0.0.dist-info/WHEEL +5 -0
  66. stcrpy-1.0.0.dist-info/licenses/LICENCE +28 -0
  67. stcrpy-1.0.0.dist-info/licenses/stcrpy/tcr_geometry/TCRCoM_LICENCE +168 -0
  68. stcrpy-1.0.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,480 @@
1
+ """
2
+ Created on 10 May 2017
3
+ @author: leem
4
+
5
+ Implementation to call anarci (built-in to STrDab) to annotate structures.
6
+ """
7
+ import sys
8
+ import warnings
9
+
10
+ from Bio.PDB.Polypeptide import aa1, aa3 # to allow me to return "X" if not found.
11
+
12
+ to_one_letter_code = dict(list(zip(aa3, aa1)))
13
+
14
+ # Import TCRDB's constants and common functions.
15
+ from .utils.constants import TCR_CHAINS
16
+
17
+
18
+ def call_anarci(
19
+ seq,
20
+ allow=set(
21
+ [
22
+ "B",
23
+ "A",
24
+ "D",
25
+ "G",
26
+ "GA1",
27
+ "GA2",
28
+ "GA1L",
29
+ "GA2L",
30
+ "GA",
31
+ "GB",
32
+ "B2M",
33
+ "MH1",
34
+ "MR1",
35
+ "MR2",
36
+ ]
37
+ ),
38
+ ):
39
+ """
40
+ Use the ANARCI program to number the sequence.
41
+ @param seq: An amino acid sequence that you wish to number.
42
+ @type seq: C{str}
43
+
44
+ @return: numbering, chain type
45
+ """
46
+ from anarci import number as anarci_number
47
+
48
+ numbering, chain_type, germline_info = anarci_number(
49
+ seq, allow=allow, assign_germline=True
50
+ )
51
+
52
+ if numbering and "MR" not in chain_type and chain_type in allow:
53
+ return [(_, aa) for _, aa in numbering if aa != "-"], chain_type, germline_info
54
+ elif numbering and chain_type in ["BA", "GD", "AB", "DG"]:
55
+ return (
56
+ [[(_, aa) for _, aa in n if aa != "-"] for n in numbering],
57
+ chain_type,
58
+ germline_info,
59
+ )
60
+ else:
61
+ return False, False, False
62
+
63
+
64
+ def annotate(chain):
65
+ """
66
+ Annotate the sequence of a chain object from TCRDB.TcrPDB
67
+ # e.g. if you have chains B, A and X, you want to force the annotator to return the annotation
68
+ # for B and A but not for X (the antigen)
69
+
70
+ returns a dictionary which has the residue ids as key and the annotation as value or is False,
71
+ and chain type which is B/A/G/D/MH1/GA/GB/B2M or False.
72
+ """
73
+ sequence_list, sequence_str = extract_sequence(chain)
74
+ numbering, chain_type, germline_info = call_anarci(sequence_str)
75
+
76
+ # Use
77
+ if chain_type:
78
+ chtype = "".join(sorted(chain_type, reverse=True))
79
+ else:
80
+ chtype = False
81
+
82
+ if chtype in ("BA", "GD"):
83
+ aligned_numbering = align_scTCR_numbering(
84
+ numbering, sequence_list, sequence_str
85
+ )
86
+ # aligned_numbering = cleanup_scTCR_numbering(aligned_numbering, sequence_list)
87
+ scTCR = True
88
+ elif chtype == "DC1" or chtype == "RM1":
89
+ # Use the scTCR numbering trick; since CD1/MR1 numbering only spans up to residue ~87 and
90
+ aligned_numbering = align_scTCR_numbering(
91
+ numbering, sequence_list, sequence_str
92
+ )
93
+ aligned_numbering[0].update(aligned_numbering[1])
94
+ aligned_numbering = aligned_numbering[0] # combine the numbering
95
+ aligned_numbering = cleanup_scTCR_numbering(aligned_numbering, sequence_list)
96
+ scTCR = False
97
+ else:
98
+ # align the original residue id's to the numbering
99
+ aligned_numbering = align_numbering(numbering, sequence_list)
100
+ scTCR = False
101
+
102
+ # aligned numbering is a dictionary of the original residue ids and the new numbering
103
+ return aligned_numbering, chain_type, germline_info, scTCR
104
+
105
+
106
+ def extract_sequence(
107
+ chain, selection=False, return_warnings=False, ignore_hets=False, backbone=False
108
+ ):
109
+ """
110
+ Get the amino acid sequence of the chain.
111
+ @change: Residues containing HETATOMs are skipped --> Residues containing HETATOMs are checked as an amino acid.
112
+
113
+ Residues containing HETATOMs are checked to be amino acids and the single letter returned.
114
+
115
+ This works provided the residues in the chain are in the correct order.
116
+
117
+ @param selection: a selection object to select certain residues
118
+ @param return_warnings: Flag to return a list of warnings or not
119
+ @param backbone: Flag whether to only show residues with a complete backbone (in the structure) or not.
120
+ @return: The sequence in a resid:aa tuple list and the sequence as a string.
121
+
122
+ """
123
+ sequence_list = []
124
+ warnings = []
125
+ for residue in chain.get_list():
126
+ if (
127
+ residue.id[0] != " "
128
+ ): # skip HETATOMs - this is not necesserily a good idea, flag to the user that is has been done.
129
+ # if residue.get_resname() not in to_one_letter_code: # Check that the residue can be converted into a single letter.
130
+ # continue
131
+ # if residue.get_resname() in to_one_letter_code: # Check that the residue can be converted into a single letter.
132
+ # pass
133
+ if residue.get_resname() in to_one_letter_code:
134
+ if ignore_hets:
135
+ if return_warnings:
136
+ warnings.append(
137
+ """Warning: HETATM residue %s at position %s (PDB numbering) found in chain %s.
138
+ Not including it in structure's sequence."""
139
+ % (
140
+ residue.get_resname(),
141
+ str(residue.id[1]) + residue.id[2].strip(),
142
+ residue.parent.id,
143
+ )
144
+ )
145
+ else:
146
+ sys.stderr.write(
147
+ """Warning: HETATM residue %s position %s (PDB numbering) found in chain %s.
148
+ Not including it in structure's sequence.\n"""
149
+ % (
150
+ residue.get_resname(),
151
+ str(residue.id[1]) + residue.id[2].strip(),
152
+ residue.parent.id,
153
+ )
154
+ )
155
+ continue
156
+ else:
157
+ continue
158
+
159
+ if selection:
160
+ if not selection.accept(residue):
161
+ continue
162
+
163
+ atoms_of_residue = list(residue.child_dict.keys())
164
+ backboneCondition = (
165
+ "N" in atoms_of_residue
166
+ and "C" in atoms_of_residue
167
+ and "CA" in atoms_of_residue
168
+ and "O" in atoms_of_residue
169
+ ) # Boolean to hold if residue has a full backbone
170
+
171
+ # CASE 1: backbone = True, and residue has a full backbone; convert a.a into single letter
172
+ if backbone and backboneCondition:
173
+ sequence_list.append(
174
+ (residue.id, to_one_letter_code.get(residue.get_resname(), "X"))
175
+ )
176
+ # CASE 2: backbone = True, but residue does not have a full backbone; use a gap in sequence annotation
177
+ elif backbone and not backboneCondition:
178
+ sequence_list.append((residue.id, "-"))
179
+ # CASE 0 (default): don't care about backbone, just write it to sequence if it's found in structure.
180
+ elif not backbone:
181
+ sequence_list.append(
182
+ (residue.id, to_one_letter_code.get(residue.get_resname(), "X"))
183
+ ) # i am
184
+
185
+ sequence_str = "".join([r[1] for r in sequence_list])
186
+ if not return_warnings:
187
+ return sequence_list, sequence_str
188
+ else:
189
+ return sequence_list, sequence_str, warnings
190
+
191
+
192
+ def interpret(x):
193
+ """
194
+ Function to interpret an annotation in the form H100A into the form ( 100, 'A' )
195
+ """
196
+ assert x[0] in TCR_CHAINS, x
197
+ try:
198
+ return (int(x[1:]), " ")
199
+ except ValueError:
200
+ return (int(x[1:-1]), x[-1])
201
+
202
+
203
+ def align_numbering(numbering, sequence_list, alignment_dict={}):
204
+ """
205
+ Align the sequence that has been numbered to the sequence you input.
206
+ The numbered sequence should be "in" the input sequence.
207
+ If not, supply an alignment dictionary.(align sequences and use get_alignment_dict(ali1,ali2))
208
+ """
209
+ if numbering:
210
+ numbered_sequence = "".join([r[1] for r in numbering])
211
+ input_sequence = "".join([r[1] for r in sequence_list])
212
+ if not alignment_dict:
213
+ try:
214
+ numbered_sequence_ali, input_sequence_ali = pairwise_alignment(
215
+ numbered_sequence, input_sequence
216
+ )
217
+ alignment_dict = get_alignment_dict(
218
+ input_sequence_ali, numbered_sequence_ali
219
+ )
220
+ except Exception:
221
+ raise Exception(
222
+ "Could not align numbered sequence to aligned sequence:"
223
+ + " "
224
+ + str(numbered_sequence)
225
+ + " "
226
+ + str(input_sequence)
227
+ )
228
+
229
+ aligned_numbering = {}
230
+ n = -1
231
+ after_flag = False
232
+ for i in range(len(input_sequence)):
233
+ if i in alignment_dict:
234
+ # during
235
+ assert (
236
+ after_flag is False
237
+ ), "Extra residue in structure than expected from provided sequence"
238
+ assert (
239
+ input_sequence[i] == numbered_sequence[alignment_dict[i]]
240
+ ), "alignment dictionary failed"
241
+ aligned_numbering[sequence_list[i][0]] = numbering[alignment_dict[i]][0]
242
+ n = numbering[-1][0][0] + 1
243
+ elif n > -1:
244
+ # after
245
+ after_flag = True
246
+ aligned_numbering[sequence_list[i][0]] = (n, " ")
247
+ n += 1
248
+ else:
249
+ # before numbering
250
+ aligned_numbering[sequence_list[i][0]] = ""
251
+
252
+ return aligned_numbering
253
+ else:
254
+ return False
255
+
256
+
257
+ def align_scTCR_numbering(numbering, sequence_list, sequence_str):
258
+ """
259
+ Align the sequence that has been numbered to a scTCR structure.
260
+ @param numbering: numbered list of residues; this is usually a two-element list/tuple from TCRDB.anarci.number
261
+ @param sequence_list: list of residues (e.g. from a structure) in its original numbering
262
+ @param sequence_str: string form of sequence_list
263
+ """
264
+ if numbering:
265
+ numbered_sequence = ["".join([r[1] for r in n]) for n in numbering]
266
+ input_sequence = sequence_str
267
+
268
+ aligned_numbering = [{}, {}]
269
+
270
+ for ii, a_sequence in enumerate(numbered_sequence):
271
+
272
+ # Align each of the joined sequences from the numbering into the target structure sequence in "sequence_str"
273
+ try:
274
+ a_sequence_ali, input_sequence_ali = pairwise_alignment(
275
+ a_sequence, input_sequence
276
+ )
277
+ alignment_dict = get_alignment_dict(input_sequence_ali, a_sequence_ali)
278
+ except Exception:
279
+ raise Exception(
280
+ "Could not align numbered sequence to aligned sequence"
281
+ + "\n"
282
+ + str(numbered_sequence)
283
+ + "\n"
284
+ + str(input_sequence)
285
+ )
286
+
287
+ n = -1
288
+ after_flag = False
289
+ # for i in xrange(len(input_sequence)):
290
+ for i in alignment_dict:
291
+ if i in alignment_dict:
292
+ # during
293
+ assert (
294
+ after_flag is False
295
+ ), "Extra residue in structure than expected from provided sequence"
296
+ assert (
297
+ input_sequence[i] == numbered_sequence[ii][alignment_dict[i]]
298
+ ), "alignment dictionary failed"
299
+ aligned_numbering[ii][sequence_list[i][0]] = numbering[ii][
300
+ alignment_dict[i]
301
+ ][0]
302
+ n = numbering[ii][-1][0][0] + 1
303
+ elif n > -1:
304
+ # after
305
+ after_flag = True
306
+ aligned_numbering[ii][sequence_list[i][0]] = (n, " ")
307
+ n += 1
308
+ else:
309
+ # before numbering
310
+ aligned_numbering[ii][sequence_list[i][0]] = ""
311
+
312
+ return aligned_numbering
313
+ else:
314
+ return False
315
+
316
+
317
+ def cleanup_scTCR_numbering(numbering_dict, sequence_list):
318
+ """
319
+ The scTCR numbering method, while useful for sequences with two domains,
320
+ can have gaps in between (e.g. CD1 molecule of 4lhu).
321
+ This is to close the gaps in the numbering so that residues that were unnumbered by anarci don't move around
322
+ during structural parsing (when they're probably just connections between domains).
323
+
324
+ @param numbering_dict: numbered dictionary from align_scTCR_numbering
325
+ @param sequence_list : sequence list from the structure for alignment.
326
+ """
327
+ positions = [p[0] for p in sequence_list]
328
+
329
+ # This gets the last numbered residue in numbering_dict
330
+ lastkey = max(numbering_dict)
331
+ lastidx = positions.index(lastkey) # Where is this on sequence_list?
332
+
333
+ for index in range(1, len(positions)):
334
+
335
+ # If we got to the last key, don't bother.
336
+ if index > lastidx:
337
+ break
338
+
339
+ key = positions[index]
340
+
341
+ # If a target key is not in the numbering dict, see where it fits, then fit a number in it.
342
+ if key not in numbering_dict:
343
+
344
+ # Get the left and right bounds of the gap
345
+ left, right = False, False
346
+ lidx, ridx = 0, 0
347
+ lval = (0, " ")
348
+ j = 0
349
+
350
+ # Continue iterating left from the missing key until we find one that exists
351
+ while not left:
352
+ key_left = positions[index - j]
353
+ if key_left in numbering_dict:
354
+ left = True
355
+ lidx = (
356
+ index - j
357
+ ) # Last known index of sequence_list where we know a key exists
358
+ lval = numbering_dict[key_left]
359
+ else:
360
+ j += 1
361
+
362
+ j = 0
363
+ while not right:
364
+ key_right = positions[index + j]
365
+ if key_right in numbering_dict:
366
+ right = True
367
+ ridx = (
368
+ index + j
369
+ ) # Last known index of sequence_list on the right where we know a key exists
370
+ else:
371
+ j += 1
372
+
373
+ # For every key between the left and right, fill in
374
+ for k, missing_key in enumerate(positions[lidx + 1 : ridx]):
375
+ numbering_dict[missing_key] = (lval[0] + k + 1, " ")
376
+
377
+ return numbering_dict
378
+
379
+
380
+ def get_alignment_dict(ali1, ali2):
381
+ """
382
+ Get a dictionary which tells you the index in sequence 2 that should align with the index in sequence 1 (key)
383
+
384
+ ali1: ----bcde-f--- seq1: bcdef
385
+ ali2: ---abcd--f--- seq2: abcdf
386
+
387
+ alignment_dict={
388
+ 0:1,
389
+ 1:2,
390
+ 2:3,
391
+ 4:4
392
+ }
393
+
394
+ If the index is aligned with a gap do not include in the dictionary.
395
+ e.g 1 in alignment_dict --> True
396
+ e.g 3 in alignment_dict --> False
397
+ """
398
+ assert len(ali1) == len(
399
+ ali2
400
+ ), "aligned sequences must be same lengths (including gaps)"
401
+ alignment_dict = {}
402
+ p1 = -1
403
+ p2 = -1
404
+ for ap in range(len(ali1)):
405
+ if ali1[ap] != "-" and ali2[ap] != "-":
406
+ p1 += 1
407
+ p2 += 1
408
+ alignment_dict[p1] = p2
409
+ elif ali1[ap] != "-":
410
+ p1 += 1
411
+ elif ali2[ap] != "-":
412
+ p2 += 1
413
+ return alignment_dict
414
+
415
+
416
+ def pairwise_alignment(seq1, seq2, exact=False):
417
+ """
418
+ Function to do alignment of sequences between sequences using biopython.
419
+ """
420
+ with warnings.catch_warnings(): # prevents pairwise2 deprecation warning from being raised
421
+ warnings.simplefilter("ignore")
422
+ from Bio.pairwise2 import align
423
+
424
+ alignment = None
425
+ s1_aln, s2_aln = easy_alignment(seq1, seq2)
426
+ if s1_aln:
427
+ return s1_aln, s2_aln
428
+
429
+ if exact:
430
+ # Align with a match score of 1, mismatch of 0, gap opening of -1.001, and gap extension of -1
431
+ alignment = align.globalms(seq1, seq2, 1, 0, -1, -1.001)
432
+ else:
433
+ alignment = align.globalxx(seq1, seq2)
434
+
435
+ if alignment:
436
+ aligned_seqs = alignment[0]
437
+ return aligned_seqs[0], aligned_seqs[1]
438
+ else:
439
+ return False, False
440
+
441
+
442
+ def easy_alignment(seq1, seq2):
443
+ """
444
+ Function to align two sequences by checking if one is in the other.
445
+ This function will conserve gaps.
446
+ """
447
+ assert (
448
+ type(seq1) is str and type(seq2) is str
449
+ ), "Sequences must be strings for easy_alignment"
450
+ if seq1 in seq2:
451
+ start = seq2.index(seq1)
452
+ seq1_ali = "-" * start + seq1 + "-" * (len(seq2) - start - len(seq1))
453
+ return seq1_ali, seq2
454
+
455
+ elif seq2 in seq1:
456
+ start = seq1.index(seq2)
457
+ seq2_ali = "-" * start + seq2 + "-" * (len(seq1) - start - len(seq2))
458
+ return seq1, seq2_ali
459
+
460
+ else:
461
+ # Can't align them # I return just one value here.
462
+ return False, False
463
+
464
+
465
+ def validate_sequence(seq):
466
+ """
467
+ Check whether a sequence is a protein sequence or if someone has submitted something nasty.
468
+ """
469
+ if len(seq) > 10000:
470
+ raise AssertionError("Sequence too long.")
471
+ if any([1 for s in seq.upper() if s not in aa1]):
472
+ raise AssertionError(
473
+ "Unknown amino acid letter found in sequence: " + seq.upper()
474
+ )
475
+ else:
476
+ return True
477
+
478
+
479
+ if __name__ == "__main__":
480
+ pass
File without changes
@@ -0,0 +1,67 @@
1
+ import math
2
+ import warnings
3
+ import numpy as np
4
+
5
+
6
+ def fastcross(v, w):
7
+ """Cross-vector of two Vector objects which is faster than NumPy's version"""
8
+ return np.array(
9
+ [
10
+ v[1] * w[2] - v[2] * w[1],
11
+ v[2] * w[0] - v[0] * w[2],
12
+ v[0] * w[1] - v[1] * w[0],
13
+ ]
14
+ )
15
+
16
+
17
+ def fastnorm(A):
18
+ """Faster version of Euclidean norm"""
19
+ return math.sqrt(sum([x**2 for x in A]))
20
+
21
+
22
+ def identity(seq1, seq2, positions=[]):
23
+ """
24
+ Find the matched sequence identity between two aligned sequences.
25
+ Can accept lists/strings, but this assumes that the two sequences are of the same length.
26
+ @param seq1: Dictionary with key as the position and value as the single letter amino acid code. or an aligned list or string
27
+ @param seq2: Dictionary with key as the position and value as the single letter amino acid code. or an aligned list or string
28
+ """
29
+ n = 0 # number
30
+ m = 0 # match
31
+
32
+ if isinstance(seq1, dict) and isinstance(seq2, dict):
33
+ if not positions:
34
+ positions = set(seq1.keys()) | set(seq2.keys())
35
+ else:
36
+ assert len(seq1) == len(seq2), "Use two aligned sequences."
37
+ positions = range(len(seq1))
38
+
39
+ # matched identity
40
+ for p in positions:
41
+ try:
42
+ if seq1[p] == "-":
43
+ continue
44
+ if seq2[p] == "-":
45
+ continue
46
+ except KeyError:
47
+ continue
48
+
49
+ if seq1[p] == seq2[p]:
50
+ m += 1
51
+ n += 1
52
+
53
+ try:
54
+ return float(m) / n
55
+ except ZeroDivisionError:
56
+ return 0
57
+
58
+
59
+ def angle(v1, v2):
60
+ """Return the angle between two vectors"""
61
+ # num = np.dot(v1.point,v2.point)
62
+ # denom = v1.norm() * v2.norm()
63
+ num = np.dot(v1, v2)
64
+ denom = fastnorm(v1) * fastnorm(v2)
65
+ if abs(num / denom) > 1:
66
+ return np.pi
67
+ return np.arccos(num / denom)