treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/mad.py ADDED
@@ -0,0 +1,708 @@
1
+ from sys import argv, stderr, version, exit
2
+ from re import sub
3
+
4
+ """
5
+ MAD phylogenetic rooting
6
+ - Please cite DOI:10.1038/s41559-017-0193
7
+
8
+ Usage:
9
+ mad filename [-mnsptfgvh]
10
+
11
+ Where the file contains tree(s) in NEWICK format.
12
+ Rooted tree(s) will be written to 'filename.rooted',
13
+ rooting statistics to screen.
14
+
15
+ Flags:
16
+ -m: Multiple trees in input file, output file is verbose and
17
+ contains '>' (info) and '<' (error) lines in addition to
18
+ NEWICK strings.
19
+ Default allows exactly one tree, and output is pure NEWICK.
20
+ -n: Like -m, but pure newick. Only one rooted tree per input tree,
21
+ errors as a lone ';' in a line.
22
+ -s: Statistics - append MAD statistics to output NEWICK strings.
23
+ -p: Polytomies - flat polytomies in rooted NEWICK (if present).
24
+ Default is a binary tree with polytomies arbitrarily resolved
25
+ into zero-length branches.
26
+ -t: Tiny - retain branch lengths smaller than 10^-6.
27
+ Default contracts these to 0.0, thereby creating polytomies.
28
+ -f | -g: Figtree - rooted trees formatted for viewing AD scores in figtree.
29
+ Important: after loading in figtree, please check the option
30
+ 'Appearance>Gradient' manually, otherwise the branch colors will
31
+ be misleading. -f reports ancestor deviations (AD) only for nodes,
32
+ while -g reports also within-branch maximizing positions and AD values.
33
+ -v: Version.
34
+ -h: Help.
35
+
36
+ Please report bugs to giddy.landan@gmail.com .
37
+ """
38
+
39
+ #v2.2 27-Mar-2018
40
+ #=====================
41
+ #---- init
42
+
43
+ def mad(flags):
44
+
45
+ fstr="".join([a.upper() for a in argv if a[0]=='-'])
46
+ flags={f: fstr.count(f) for f in "MNSPTFGVHD"}
47
+
48
+ if flags['V']:
49
+ v=sub('\s+[^\[]+',' ',version,count=1)
50
+ print('mad 2.2\npython '+v)
51
+ exit()
52
+ if flags['G'] and flags['F']:
53
+ print('Flags -g and -f are mutually exclusive.')
54
+ exit()
55
+ if flags['M'] and flags['N']:
56
+ print('Flags -m and -n are mutually exclusive.')
57
+ exit()
58
+ if len(argv)<2 or flags['H']:
59
+ print(__doc__)
60
+ exit()
61
+ fstr=[a for a in argv if a[0]!='-']
62
+ if len(fstr)!=2:
63
+ exit('Expecting exactly one filename')
64
+ print("\nMAD phylogenetic rooting")
65
+ fn=fstr[1]
66
+ #-------- consts
67
+ if flags['T']: minlen=1e-15
68
+ else: minlen=1e-6
69
+ madtol=1.0001
70
+ gdbug_level=flags['D']
71
+ at="#^$%"
72
+ if flags['M'] or flags['N']: err="Error: "
73
+ else: err="Error analyzing file '"+fn+"':\n\t"
74
+ bug=err+"Oops... Please report this error to giddy.landan@gmail.com ."
75
+ class madError(Exception):
76
+ pass
77
+ #------- short circut init - deffered imports
78
+ try:
79
+ with open(fn) as x: nwkstr=x.read()
80
+ except FileNotFoundError: exit("\nFile not found: "+fn)
81
+ except: exit("\nError reading file: "+fn)
82
+ blackspace=str.maketrans('', '', '\t\n\r\f\v')
83
+ mnwk=nwkstr.translate(blackspace).split(';')
84
+ if mnwk[-1]=='': mnwk.pop()
85
+ nnwk=len(mnwk)
86
+ if nnwk==0: exit(err+
87
+ "No NEWICK string found.")
88
+ if nnwk>1 and not flags['M'] and not flags['N']: exit(err+
89
+ "Expecting exactly one string terminated with ';'. (For multiple inputs, use -m.)")
90
+ #---
91
+ ofn=fn+".rooted"
92
+ try:
93
+ fh=open(ofn,'w')
94
+ except:
95
+ exit("\nError opening output file: "+ofn)
96
+ #----------------
97
+ #------ imports
98
+ import re
99
+ import numpy as np
100
+ from copy import deepcopy
101
+ from statistics import stdev, mean
102
+ from inspect import stack
103
+ #----- functions
104
+ #========================================
105
+ #===== write to output file
106
+ def writeout(s):
107
+ global fh,ofn
108
+ try:
109
+ fh.write(s+"\n")
110
+ except:
111
+ fh.close()
112
+ exit("\nError writing to file: "+ofn)
113
+ #<-- writeout
114
+ #========================================
115
+
116
+ #========================================
117
+ #===== deal outputs
118
+ def madlog(s):
119
+ global fh,ofn,gdbug_level,flags
120
+ if s[0]=='>': pref=''
121
+ elif s[:5]=='Error': pref='<<< '
122
+ else: pref='>> '
123
+ if gdbug_level<1:
124
+ pos=''
125
+ else:
126
+ sat=stack()[1][1:3]
127
+ pos="{}:{} ".format(sat[0],sat[1])
128
+ if pref=='' and flags['M']:
129
+ writeout(s.strip())
130
+ else:
131
+ s1=re.sub('(?m)^',pref+pos,s.strip())
132
+ if s1[0]=='<': s1+="\n"
133
+ if flags['M']: writeout(s1)
134
+ print(s1)
135
+ if s[:5]=='Error':
136
+ if flags['N']:
137
+ writeout(';')
138
+ raise madError
139
+ #<-- madlog
140
+ #========================================
141
+
142
+ #========================================
143
+ #===== debug messages
144
+ def gdbug(lev,*args):
145
+ global gdbug_level
146
+ if lev>gdbug_level:
147
+ return
148
+ sat=stack()[1][1:3]
149
+ print("---\n",file=stderr)
150
+ [print(x,file=stderr) for x in args]
151
+ print("<-- {}:{}".format(sat[0],sat[1]),file=stderr)
152
+ #<-- gdbug
153
+ #========================================
154
+
155
+ #========================================
156
+ #===== branch lengths conversion and sanity
157
+ def str2len(s):
158
+ try:
159
+ x=float(s)
160
+ except ValueError: madlog(err+"Corrupt NEWICK format: invalid branch length (...'"+s+"'...).")
161
+ if not np.isfinite(x): madlog(err+"Cowardly refusing to root trees with infinite or undefined branch lengths (...'"+s+"'...).")
162
+ return x
163
+ #<-- str2len
164
+ #========================================
165
+
166
+ #========================================
167
+ #======= convert NEWICK string, fills: labs,blen,nodes,trip + various scalars
168
+ def nwk2tree(nwkstr):
169
+ nc=[ nwkstr.count(x) for x in ';,:()']
170
+ if nc[0]!=1 or nwkstr[-1]!=';': madlog(err+
171
+ "Corrupt NEWICK format - expecting exactly one string terminated with ';'.")
172
+ if nc[3]!=nc[4]: madlog(err+
173
+ "Corrupt NEWICK format - unbalanced ().")
174
+ notu=nc[1]+1
175
+ if notu<3: madlog(err+
176
+ "Cowardly refusing to root trees with less than 3 OTUs.")
177
+ if nc[2]>2*notu-2: madlog(err+
178
+ "Corrupt NEWICK format - too many ':' | too few ','.")
179
+ if nc[3]>notu-1: madlog(err+
180
+ "Corrupt NEWICK format - too many '()' | too few ','.")
181
+ nnode=notu*2-1
182
+ rootnode=nnode-1
183
+ nodes=np.full((nnode,3),-1,np.int16)
184
+ trip=np.full((nnode,nnode),2,np.int8)
185
+ blen=[0]*nnode
186
+ labs=['']*nnode
187
+ cld_splt=re.compile('\(([^()]*)\)')
188
+ node_splt=re.compile('[:@]')
189
+ iotu,tlen,ntiny,prec=[0]*4
190
+ badbsp=[]
191
+ inode=notu
192
+ s2=nwkstr.replace("@",at).strip()
193
+ while s2[0]=="(" :
194
+ c3=cld_splt.split(s2,1)
195
+ if len(c3)!=3: madlog(err+
196
+ "Corrupt NEWICK format: () do not balance.")
197
+ b2=c3[1].split(',')
198
+ if len(b2)<2: madlog(err+
199
+ "Corrupt NEWICK format: empty, singleton or unbalanced group ().")
200
+ elif len(b2)>2:
201
+ b2[1]="({},{}):0.0".format(b2[0],b2[1])
202
+ z=str.join(',',b2[1:])
203
+ s2="{}({}){}".format(c3[0],z,c3[2])
204
+ continue
205
+ for i in [0,1]:
206
+ b=b2[i]
207
+ if b[0]!="@":
208
+ z=b.split(":")
209
+ if len(z)!=2: madlog(err+
210
+ "Corrupt NEWICK format: malformed 'node_label:branch_length' clouse (...'"+b+"'...).")
211
+ if labs.count(z[0])>0: madlog(err+
212
+ "Cowardly refusing to root trees with duplicate OTU names ('"+z[0]+"').")
213
+ b="@{}@{}".format(iotu,b)
214
+ trip[iotu,iotu]=-1
215
+ iotu+=1
216
+ ll=node_splt.split(b)
217
+ if len(ll)!=4: madlog(err+
218
+ "Corrupt NEWICK format: malformed 'node_label:branch_length' clouse (...'"+ll[2].replace(at,"@")+"'...).")
219
+ jnode=int(ll[1])
220
+ labs[jnode]=ll[2]
221
+ x=str2len(ll[3])
222
+ if abs(x)<minlen and x!=0 and not flags['T']:
223
+ #gdbug(3,ll[3],x)
224
+ x=0
225
+ ntiny+=1
226
+ if x<0: madlog(err+
227
+ "Cowardly refusing to root trees with negative branch lengths.")
228
+ blen[jnode]=x
229
+ tlen+=x
230
+ prec=max(prec,len(ll[3]))
231
+ nodes[jnode,2]=inode
232
+ nodes[inode,i]=jnode
233
+ trip[inode,trip[jnode,:]<2]=i
234
+ #<--i 0:1
235
+ trip[inode,inode]=-1
236
+ s2="{}@{}@{}".format(c3[0],inode,c3[2])
237
+ inode+=1
238
+ #<--inode loop
239
+ n=sum([c3[2].count(x) for x in '(),:'])
240
+ if n>0: madlog(err+
241
+ "Corrupt NEWICK format: () balanced out before end of string, tail contains more '(),:'.\n"+
242
+ "\t(Possible cause - missing ';' between two trees).")
243
+ #gdbug(1,[notu,iotu,nnode,inode,tlen,prec,ntiny],badbsp)
244
+ if inode!=nnode or iotu!=notu:
245
+ #gdbug(2,s2,nwkstr,nodes)
246
+ print(s2)
247
+ exit()
248
+ madlog(bug+' (Unidentified parsing error.)\nInput string: '+nwkstr)
249
+ if ntiny>0 and not flags['T']: madlog("Warning: {} tiny branch lengths (<10^-6) were converted to 0. (Override with '-t'.)".format(ntiny))
250
+ n=sum([x>0 for x in blen])
251
+ if n<3: madlog(err+
252
+ "Cowardly refusing to root trees with less than 3 positive branch lengths.")
253
+ if tlen==0: madlog(err+
254
+ "Cowardly refusing to root zero-lengthed trees.")
255
+ slen=np.sort(blen)
256
+ n=sum(np.diff(slen[slen>0])==0)
257
+ if n>0: madlog("Warning: Trees with repeating branch lengths are suspicious ({} repeating values).".format(n))
258
+ if len(badbsp)>0: madlog("Warning: Non-numeric or negative bootstrap values ignored. ({}).".format(badbsp))
259
+ #gdbug(2,nodes,blen,trip,labs)
260
+ return(notu,nnode,rootnode,nodes,blen,trip,labs,prec,tlen)
261
+ #<--nwk2tree()
262
+ #========================================
263
+
264
+
265
+ #========================================
266
+ #-------- Pre-processing:
267
+ def pwdist_preproc():
268
+ #======= pairwise distances
269
+ dist=np.full((nnode,nnode),0.0,np.float64)
270
+ for i in range(notu,nnode):
271
+ for j in [0,1]:
272
+ b=nodes[i,j]
273
+ for k in np.flatnonzero(trip[b,:]<2):
274
+ dist[i,k]=dist[b,k]+blen[b]
275
+ dist[k,i]=dist[i,k]
276
+ for k0 in np.flatnonzero(trip[i,:]==0):
277
+ for k1 in np.flatnonzero(trip[i,:]==1):
278
+ dist[k0,k1]=dist[i,k0]+dist[i,k1]
279
+ dist[k1,k0]=dist[k0,k1]
280
+ #======= tip polytomies
281
+ etrip=deepcopy(trip)
282
+ n2n=list(range(nnode))
283
+ ntipp=0
284
+ for i in range(nnode):
285
+ if n2n[i]<i:
286
+ continue
287
+ jj=np.flatnonzero(dist[i,:]==0)
288
+ if len(jj)>1:
289
+ for j in jj[1:]:
290
+ n2n[j]=i
291
+ if i<notu and j<notu:
292
+ etrip[j,:]=69
293
+ etrip[:,j]=69
294
+ ntipp+=1
295
+ enotu=notu-ntipp
296
+ npair=enotu*(enotu-1)/2
297
+ #np.set_printoptions(threshold=np.inf)
298
+ #gdbug(1,notu,ntipp,enotu,npair,n2n)
299
+ if ntipp>0: madlog("Warning: Squeezing tip polytomies ({} OTUs, {} redundant tips, {} effective OTUs).".format(notu,ntipp,enotu))
300
+ if enotu<3: madlog(err+"Cowardly refusing to root trees with less than 3 effctive OTUs.")
301
+ #======= preproc in lists
302
+ kij=[[]]*4
303
+ dij=[[]]*nnode
304
+ for i in range(nnode):
305
+ for j in [0,1,2]:
306
+ kij[j]=np.flatnonzero(etrip[i,:notu]==j)
307
+ kij[3]=np.flatnonzero(etrip[i,:notu]<2)
308
+ dij[i]=list([[]]*4)
309
+ for j in [0,1,2,3]:
310
+ dij[i][j]=dist[i,kij[j]].flatten().tolist()
311
+ #gdbug(2,dij,"\n",npair)
312
+ return(enotu,npair,dist,dij,n2n)
313
+ #<-- pwdist_preproc()
314
+ #========================================
315
+
316
+ #========================================
317
+ #-------- branch AD
318
+ def ancestor_deviations():
319
+ global dist
320
+ #-------- node deviations triplets
321
+ dsum=[[]]*nnode
322
+ for i in range(nnode):
323
+ dsum[i]=[0]*3
324
+ if i<notu: continue
325
+ for j in [0,1,2]:
326
+ k1=(j+1)%3
327
+ k2=(j+2)%3
328
+ ndev=0
329
+ for dik1 in dij[i][k1]:
330
+ for dik2 in dij[i][k2]:
331
+ d=dik1+dik2
332
+ if d>0:
333
+ ndev+=(2*dik1/d - 1)**2
334
+ dsum[i][j]=ndev
335
+ #<-- for i
336
+ #-------- node ad
337
+ nad=[-1]*nnode
338
+ #nccv=[-1]*nnode
339
+ #ndepth=[-1]*nnode
340
+ #r2t=[]
341
+ ibrn=range(notu,nnode)
342
+ for i in range(nnode):
343
+ if n2n[i]<i:
344
+ nad[i]=nad[n2n[i]]
345
+ continue
346
+ if i<notu:
347
+ nomin=enotu-1
348
+ else:
349
+ nomin=sum(dsum[i])
350
+ for k in ibrn:
351
+ if k!=i:
352
+ nomin+=dsum[k][trip[k,i]]
353
+ #gdbug(2,nomin,npair)
354
+ nad[i]=(nomin/npair)**0.5
355
+ if nad[i]>1.0:
356
+ madlog(bug+' (Node AD out of range.)\nInput tree is: '+nwkstr)
357
+ #r2t=dij[i][3]+dij[i][2]
358
+ #nccv[i]=stdev(r2t)/mean(r2t)
359
+ #ndepth[i]=max(r2t)
360
+ #-------- transversing pairs and branch ad
361
+ ad=[10**6]*nnode
362
+ rlen=[-1]*nnode
363
+ rlen2=[-1]*nnode
364
+ ccv=[-1]*nnode
365
+ depth=[-1]*nnode
366
+ polyroots=[-1]*nnode
367
+ #gdbug(1,blen)
368
+ for i in range(nnode-1):
369
+ if blen[i]==0:
370
+ rlen[i]=0
371
+ rlen2[i]=0
372
+ ad[i]=nad[i]+1
373
+ continue
374
+ j=nodes[i,2]
375
+ jj=trip[j,i]
376
+ denom=0
377
+ nomin=0
378
+ for dik1 in dij[i][3]:
379
+ for dik2 in dij[i][2]:
380
+ d=dik1+dik2
381
+ d2=d**-2
382
+ denom+=d2
383
+ nomin+=d2*(dik2-dik1)
384
+ rho=nomin/(2*denom)
385
+ r1=min(max(0,rho),blen[i])
386
+ r2=blen[i]-r1
387
+ rn=-1
388
+ if r1<minlen and r1<r2:
389
+ r1=0
390
+ r2=blen[i]
391
+ rn=i
392
+ elif r2<minlen and r2<r1:
393
+ r1=blen[i]
394
+ r2=0
395
+ rn=j
396
+ rlen[i]=r1
397
+ rlen2[i]=r2
398
+ if rn>-1 and polyroots[n2n[rn]]>-1:
399
+ ad[i]=nad[rn]+1
400
+ continue
401
+ nomin=0
402
+ for dik1 in dij[i][3]:
403
+ for dik2 in dij[i][2]:
404
+ d=dik1+dik2
405
+ nomin+=(2*(dik1+r1)/d - 1)**2
406
+ for k in ibrn:
407
+ if trip[i,k]==2:
408
+ nomin+=dsum[k][trip[k,i]]
409
+ else:
410
+ nomin+=dsum[k][trip[k,j]]
411
+ ad[i]=(nomin/npair)**0.5
412
+ if ad[i]>1.0:
413
+ madlog(bug+' (AD out of range.)\nInput tree is: '+nwkstr)
414
+ r2t=[ d+r1 for d in dij[i][3]]+[ d-r1 for d in dij[i][2]]
415
+ ccv[i]=stdev(r2t)/mean(r2t)
416
+ depth[i]=max(r2t)
417
+ if rn>-1:
418
+ polyroots[n2n[rn]]=i
419
+ #gdbug(1,ad,nad,rlen,rlen2,ccv,polyroots)
420
+ return(ad,nad,rlen,rlen2,ccv,depth)
421
+ #<-- ancestor_deviations()
422
+ #========================================
423
+
424
+ #========================================
425
+ #======= convert to NEWICK string
426
+ def tree2nwk():
427
+ global labs
428
+ nstck=[rootnode,nodes[rootnode,0],nodes[rootnode,2]]
429
+ nwk=[[]]*nnode
430
+ nwk[nstck[1]]=[rootnode]
431
+ nwk[nstck[2]]=[rootnode]
432
+ #gdbug(2,"yy {} {} {} yy".format(rootnode,nwk,nstck))
433
+ blens=[prec.format(x) for x in blen]
434
+ if flags['F']:
435
+ nads=["[&AD={:#5.3f},ADS={:#5.3f}]:".format(x,x) for x in nad]
436
+ elif flags['G']:
437
+ bads=["[&AD={:#5.3f},ADS={:#5.3f}]:".format(x,x) for x in ad]
438
+ nads=["[&AD={:#5.3f},ADS={:#5.3f}]:".format(x,x) for x in nad]
439
+ blens1=[prec.format(x) for x in rlen]
440
+ blens2=[prec.format(x) for x in rlen2]
441
+ else:
442
+ nads=[":"]*nnode
443
+ clen=0
444
+ while len(nstck)>1:
445
+ k=nstck[-1]
446
+ #gdbug(3,k,nwk,nwk[k])
447
+ if nstck.count(k)>1:
448
+ #gdbug(9,nstck)
449
+ madlog(bug)
450
+ if k<notu:
451
+ lens=blens[k]
452
+ if flags['G']:
453
+ if rlen[k]==0:
454
+ nwk[k]="{}{}{}".format(labs[k],nads[k],blens2[k])
455
+ elif rlen2[k]==0:
456
+ nwk[k]="{}{}{}".format(labs[k],nads[k],blens1[k])
457
+ else:
458
+ nwk[k]="({}{}{}){}{}".format(labs[k],nads[k],blens1[k],bads[k],blens2[k])
459
+ else:
460
+ nwk[k]="{}{}{}".format(labs[k],nads[k],blens[k])
461
+ nstck.pop()
462
+ #gdbug(3,'MM',k,nwk,blens,blen,nstck)
463
+ clen+=blen[k]
464
+ continue
465
+ elif len(nwk[k])==1:
466
+ for b in [0,1,2]:
467
+ if nodes[k,b]==nwk[k][0]:
468
+ continue
469
+ c=nodes[k,b]
470
+ nwk[c]=[k]
471
+ nstck.append(c)
472
+ nwk[k].append(c)
473
+ continue
474
+ else:
475
+ b=nwk[k]
476
+ f,c,d=b
477
+ nwk[k]="{},{}".format(nwk[c],nwk[d])
478
+ nwk[c],nwk[d]=["",""]
479
+ if trip[k,f]==2:
480
+ k1=k
481
+ else:
482
+ k1=f
483
+ #gdbug(3,k,b[0],k1,blen[k1])
484
+ if flags['G']:
485
+ if blen[k1]==0:
486
+ pass
487
+ else:
488
+ if rlen[k1]==0:
489
+ nwk[k]="({}){}{}".format(nwk[k],nads[k],blens2[k1])
490
+ elif rlen2[k1]==0:
491
+ nwk[k]="({}){}{}".format(nwk[k],nads[k],blens1[k1])
492
+ else:
493
+ if k1==k:
494
+ nwk[k]="(({}){}{}){}{}".format(nwk[k],nads[k],blens1[k1],bads[k1],blens2[k1])
495
+ else:
496
+ nwk[k]="(({}){}{}){}{}".format(nwk[k],nads[k],blens2[k1],bads[k1],blens1[k1])
497
+ elif blen[k1]>0 or not flags['P']:
498
+ nwk[k]="({}){}{}".format(nwk[k],nads[k],blens[k1])
499
+ #gdbug(3,"qq {}".format(blen[k1]))
500
+ nstck.pop()
501
+ clen+=blen[k1]
502
+ #gdbug(1,'ww',k,nwk,blens,blen,nstck)
503
+ #<--while stck
504
+ tol=(tlen-clen)/tlen
505
+ #gdbug(1,"tol=",tol)
506
+ newnwk="({},{})".format(nwk[nodes[rootnode,0]],nwk[nodes[rootnode,2]])
507
+ if abs(tol)>0.0000001:
508
+ #gdbug(0,nodes,newnwk,nodes[rootnode,:],[rootnode,notu,newnwk.count(':')])
509
+ madlog(bug+
510
+ " tlen {} clen {} tol {}".format(tlen,clen,tol))
511
+ #gdbug(2,newnwk,"\n----\n",nwk)
512
+ return newnwk
513
+ #<--tree2nwk()
514
+ #========================================
515
+
516
+
517
+ #========================================
518
+ #====== fimd minimal values, generate AI, reroot
519
+ def mad_output():
520
+ global nodes,blen,trip,labs,ofn,rlen,rlen2,ad,nad
521
+ #--------- find mads
522
+ mad=min(ad)
523
+ if mad<0.001: madlog("Warning: MAD=={:.5g} is too good to be true.".format(mad))
524
+ roots = [i for i, x in enumerate(ad) if x <=(mad*madtol)]
525
+ nroots=len(roots)
526
+ if nroots==1:
527
+ ai=mad/sorted(ad)[1]
528
+ else:
529
+ ai=1.0
530
+ gdbug(1,mad,roots,ai)
531
+ #----- output:
532
+ #---- detach rootnode
533
+ a,b=list(nodes[rootnode,0:2])
534
+ #gdbug(1,nodes,a,b,blen,rlen,rlen2)
535
+ nodes[a,2]=b
536
+ nodes[b,2]=a
537
+ blen[b]=blen[a]
538
+ rlen[b]=rlen2[a]
539
+ rlen2[b]=rlen[a]
540
+ ad[b]=ad[a]
541
+ ad[rootnode]=mad
542
+ nad[rootnode]=mad
543
+ if nroots>1:
544
+ saved=[deepcopy(v) for v in [labs,blen,nodes,trip,ad,nad,rlen,rlen2]]
545
+ rooted=""
546
+ #gdbug(1,nodes,blen,rlen,rlen2)
547
+ for r in range(nroots):
548
+ if flags['N'] and nroots>1:
549
+ r=np.argmin([ccv[j] for j in roots])
550
+ i=roots[r]
551
+ if i==rootnode: madlog(bug+' (rootnode in roots.)')
552
+
553
+ #reattach rootnode at inferred root
554
+ j=nodes[i,2]
555
+ k=trip[j,i]
556
+ #gdbug(2,[i,j,k])
557
+ #gdbug(3,nodes,blen,labs)
558
+ nodes[i,2]=rootnode
559
+ nodes[j,k]=rootnode
560
+ nodes[rootnode,0]=i
561
+ nodes[rootnode,1]=-1
562
+ nodes[rootnode,2]=j
563
+ trip[i,rootnode]=2
564
+ trip[j,rootnode]=k
565
+ trip[rootnode,i]=0
566
+ trip[rootnode,j]=2
567
+ if k==2:
568
+ k1=j
569
+ k2=rootnode
570
+ else:
571
+ k1=rootnode
572
+ k2=j
573
+ #blen[k1]=blen[i]-rlen[i] #new branch length
574
+ blen[k1]=rlen2[i] #new branch length
575
+ blen[i]=rlen[i] #new branch length
576
+ rlen[rootnode]=0
577
+ rlen2[rootnode]=rlen2[i]
578
+ rlen[k1]=0
579
+ rlen2[k1]=rlen2[i]
580
+ rlen2[i]=0
581
+ if blen[k1]==0 or blen[i]==0: madlog("Warning: Root is polytomous.")
582
+ #gdbug(1,nodes,blen,labs)
583
+ #------- make newick string
584
+ rootstr=tree2nwk().replace(at,"@")
585
+ s="[MAD={:#5.3f}_AI={:#5.3f}_CCV={:#.3g}%_N={}/{}]".format(mad,ai,ccv[i]*100,r+1,nroots)
586
+ madlog(">> "+s)
587
+ if flags['S']:
588
+ rootstr+=s
589
+ if flags['F'] or flags['G']:
590
+ s='[&AD={:#5.3f},ADS="MAD={:#5.3f}",STT="CCV={:#.3g}%"]:{:.3g})[&ADS="AI={:#5.3f}"]'.format(mad,mad,ccv[i]*100,depth[i]*0.1,ai)
591
+ rootstr='tree tree_{} = [&R] ({}{}'.format(r+1,rootstr,s);
592
+ rooted+=rootstr+";"
593
+ #gdbug(2,nwkstr,rooted)
594
+ if flags['N']:
595
+ break
596
+ rooted+="\n"
597
+ if r<nroots-1:
598
+ labs,blen,nodes,trip,ad,nad,rlen,rlen2=[deepcopy(v) for v in saved]
599
+ #<--r loop
600
+ #--------
601
+ rccv=", ".join(["{:#.3g}%".format(ccv[i]*100) for i in roots])
602
+ sttstr="\nMinimal ancestor deviation, MAD = {:#5.3f}\n".format(mad)+ \
603
+ " Ambiguity index, AI = {:#5.3f}\n".format(ai)+ \
604
+ " Clock CV, CCV = {}".format(rccv)
605
+ print(sttstr)
606
+ s=""
607
+ if flags['M']:
608
+ if nroots>1: writeout(">> Tied root positions, {} rooted trees:".format(nroots))
609
+ else: writeout(">> Rooted tree:")
610
+ if flags['F'] or flags['G']:
611
+ rooted='#NEXUS\nbegin trees;\n'+rooted+"""end;
612
+ begin figtree;
613
+ set appearance.branchColorAttribute="AD";
614
+ set appearance.branchLineWidth=6.0;
615
+ set colour.scheme.ad="AD:HSBContinuous{{false,false,0,1},0.5,1.0,1.0,0.9,1.0,0.75,false}";
616
+ set legend.attribute="AD";
617
+ set legend.isShown=true;
618
+ set nodeLabels.displayAttribute="ADS";
619
+ set nodeLabels.isShown=true;
620
+ set branchLabels.displayAttribute="STT";
621
+ set branchLabels.isShown=true;
622
+ set rectilinearLayout.alignTipLabels=true;
623
+ set rectilinearLayout.curvature=9571;
624
+ set rectilinearLayout.rootLength=0;
625
+ """
626
+ if flags['G']:
627
+ rooted+="""
628
+ set nodeShape.size=6.0;
629
+ set nodeShape.isShown=true;
630
+ """
631
+ rooted+='end;'
632
+ writeout(rooted)
633
+ #gdbug(1,rooted)
634
+ if nroots>1:
635
+ if flags['N']: print("Tied root positions,\n{} rooted trees, but just one written to {}\n".format(nroots,ofn))
636
+ else: print("Tied root positions,\n{} rooted trees written to {}\n".format(nroots,ofn))
637
+ else: print("Rooted tree written to '{}'\n".format(ofn));
638
+ #<-- mad_output
639
+ #--------------------------
640
+
641
+ #--------------------------
642
+ #----- Main loop
643
+ for inwk in range(nnwk):
644
+ if flags['M'] or flags['N']:
645
+ print("\nAnalyzing tree {} of {} from '{}'...".format(inwk+1,nnwk,fn))
646
+ if flags['M']: writeout(">Rooting of input tree #{}:".format(inwk+1))
647
+ else: print("\nAnalyzing file '{}'...".format(fn))
648
+ nwkstr=mnwk[inwk]+";"
649
+ try:
650
+ #---
651
+ notu,nnode,rootnode,nodes,blen,trip,labs,prec,tlen = nwk2tree(nwkstr)
652
+ #---
653
+ gdbug(1,notu,nnode,rootnode,prec,tlen)
654
+ gdbug(1,nodes,blen,trip,labs)
655
+ prec="{"+":#.{}".format(min(prec-2,9))+"g}"
656
+ #--- merge pre-existing root splits
657
+ a,b=list(nodes[rootnode,0:2])
658
+ blen[a]+=blen[b]
659
+ blen[b]=0
660
+ #gdbug(1,a,b,blen)
661
+ #---
662
+ enotu,npair,dist,dij,n2n=pwdist_preproc()
663
+ #gdbug(1,enotu,npair)
664
+ #---
665
+ ad,nad,rlen,rlen2,ccv,depth=ancestor_deviations()
666
+ #gdbug(2,ad)
667
+ #---
668
+ mad_output()
669
+ except madError:
670
+ pass
671
+ except SystemExit:
672
+ fh.close()
673
+ raise
674
+ except:
675
+ madlog(bug+" (Unidentified error.)\n")
676
+ #<-- inwk loop
677
+ s=" - Please cite DOI:10.1038/s41559-017-0193\n"
678
+ if flags['M']: writeout(">>"+s)
679
+ print("\n"+s)
680
+ fh.close()
681
+ exit()
682
+ #ffu if __name__ == '__main__':
683
+
684
+ if __name__ == '__main__':
685
+ mad_parser = argparse.ArgumentParser()
686
+ mad_parser.add_argument('-i', required=False, help='input tree')
687
+ mad_parser.add_argument('-m', required=False, help='input tree')
688
+ mad_parser.add_argument('-n', required=False, help='input tree')
689
+ mad_parser.add_argument('-s', required=False, help='input tree')
690
+ mad_parser.add_argument('-p', required=False, help='input tree')
691
+ mad_parser.add_argument('-t', required=False, help='input tree')
692
+ mad_parser.add_argument('-f', required=False, help='input tree')
693
+ mad_parser.add_argument('-g', required=False, help='input tree')
694
+ mad_parser.add_argument('-v', required=False, help='input tree')
695
+ args = vars(mad_parser.parse_args())
696
+ mad(args)
697
+
698
+ '''
699
+
700
+ -m: Multiple trees in input file, output file is verbose and contains '>' (info) and '<' (error) lines in addition to NEWICK strings. Default allows exactly one tree, and output is pure NEWICK.
701
+ -n: Like -m, but pure newick. Only one rooted tree per input tree, errors as a lone ';' in a line.
702
+ -s: Statistics - append MAD statistics to output NEWICK strings.
703
+ -p: Polytomies - flat polytomies in rooted NEWICK (if present). Default is a binary tree with polytomies arbitrarily resolved into zero-length branches.
704
+ -t: Tiny - retain branch lengths smaller than 10^-6. Default contracts these to 0.0, thereby creating polytomies.
705
+ -f | -g: Figtree - rooted trees formatted for viewing AD scores in figtree. Important: after loading in figtree, please check the option 'Appearance>Gradient' manually, otherwise the branch colors will be misleading. -f reports ancestor deviations (AD) only for nodes, while -g reports also within-branch maximizing positions and AD values.
706
+ -v: Version.
707
+
708
+ '''