treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/dating_ss.py ADDED
@@ -0,0 +1,361 @@
1
+ import os
2
+ import argparse
3
+ import itertools
4
+ from ete3 import Tree
5
+ from Bio import AlignIO
6
+
7
+
8
+ Dating_usage = '''
9
+ ============================= Dating example commands =============================
10
+
11
+ # example commands
12
+ TreeSAK Dating_ss -deltall DeltaLL_stdout.txt -aod s11_marker_sets_by_DeltaLL -o s12_dating_wd -c 25-50-75-100 -mmn 20 -f
13
+
14
+ ===================================================================================
15
+ '''
16
+
17
+
18
+ def sep_path_basename_ext(file_in):
19
+ file_path, file_name = os.path.split(file_in)
20
+ if file_path == '':
21
+ file_path = '.'
22
+ file_basename, file_extension = os.path.splitext(file_name)
23
+ return file_path, file_basename, file_extension
24
+
25
+
26
+ def submit_js(js):
27
+ current_wd = os.getcwd()
28
+ js_path, js_basename, js_ext = sep_path_basename_ext(js)
29
+ os.chdir(js_path)
30
+ os.system('qsub %s%s' % (js_basename, js_ext))
31
+ os.chdir(current_wd)
32
+
33
+
34
+ def root_with_out_group(tree_file, out_group_txt, tree_file_rooted):
35
+
36
+ out_group_set = set()
37
+ for each_og in open(out_group_txt):
38
+ out_group_set.add(each_og.strip())
39
+
40
+ tre = Tree(tree_file, format=1)
41
+ out_group_lca = tre.get_common_ancestor(out_group_set)
42
+ tre.set_outgroup(out_group_lca)
43
+ tre.write(outfile=tree_file_rooted)
44
+
45
+
46
+ def replace_clades(main_tree, sub_tree, tree_out, quote_node_name):
47
+
48
+ tre_sub = Tree(sub_tree, format=1, quoted_node_names=quote_node_name)
49
+ subtree_leaf_name_list = tre_sub.get_leaf_names()
50
+ tre_main = Tree(main_tree)
51
+ lca = tre_main.get_common_ancestor(subtree_leaf_name_list)
52
+
53
+ if len(lca.get_leaf_names()) != len(subtree_leaf_name_list):
54
+ print('LCA of subtree leaves in main tree contain extra leaves, program exited!')
55
+ exit()
56
+
57
+ lca_p = lca.up
58
+ lca_p.remove_child(lca)
59
+ lca_p.add_child(tre_sub)
60
+ tre_main.write(outfile=tree_out, format=8, quoted_node_names=quote_node_name)
61
+
62
+
63
+ def prep_mcmctree_ctl(ctl_para_dict, mcmctree_ctl_file):
64
+
65
+ with open(mcmctree_ctl_file, 'w') as ctl_file_handle:
66
+ ctl_file_handle.write(' finetune = %s\n' % ctl_para_dict.get('seed', '-1'))
67
+ ctl_file_handle.write(' seqfile = %s\n' % ctl_para_dict['seqfile'])
68
+ ctl_file_handle.write(' treefile = %s\n' % ctl_para_dict['treefile'])
69
+ ctl_file_handle.write(' mcmcfile = %s\n' % ctl_para_dict['mcmcfile'])
70
+ ctl_file_handle.write(' outfile = %s\n' % ctl_para_dict['outfile'])
71
+ ctl_file_handle.write(' ndata = %s\n' % ctl_para_dict.get('ndata', 1))
72
+ ctl_file_handle.write(' seqtype = %s * 0: nucleotides; 1:codons; 2:AAs\n' % ctl_para_dict['seqtype'])
73
+ ctl_file_handle.write(' usedata = %s * 0: no data; 1:seq like; 2:normal approximation; 3:out.BV (in.BV)\n' % ctl_para_dict['usedata'])
74
+ ctl_file_handle.write(' clock = %s * 1: global clock; 2: independent rates; 3: correlated rates\n' % ctl_para_dict['clock'])
75
+ ctl_file_handle.write(' RootAge = %s * safe constraint on root age, used if no fossil for root.\n' % ctl_para_dict.get('RootAge', '<1.0'))
76
+ ctl_file_handle.write(' model = %s * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85\n' % ctl_para_dict.get('model', 0))
77
+ ctl_file_handle.write(' alpha = %s * alpha for gamma rates at sites\n' % ctl_para_dict.get('alpha', 0.5))
78
+ ctl_file_handle.write(' ncatG = %s * No. categories in discrete gamma\n' % ctl_para_dict.get('ncatG', 4))
79
+ ctl_file_handle.write(' cleandata = %s * remove sites with ambiguity data (1:yes, 0:no)?\n' % ctl_para_dict.get('cleandata', 0))
80
+ ctl_file_handle.write(' BDparas = %s * birth, death, sampling\n' % ctl_para_dict.get('BDparas', '1 1 0.1'))
81
+ ctl_file_handle.write(' kappa_gamma = %s * gamma prior for kappa\n' % ctl_para_dict.get('kappa_gamma', '6 2'))
82
+ ctl_file_handle.write(' alpha_gamma = %s * gamma prior for alpha\n' % ctl_para_dict.get('alpha_gamma', '1 1'))
83
+ ctl_file_handle.write(' rgene_gamma = %s * gammaDir prior for rate for genes\n' % ctl_para_dict.get('rgene_gamma', '1 50 1'))
84
+ ctl_file_handle.write(' sigma2_gamma = %s * gammaDir prior for sigma^2 (for clock=2 or 3)\n' % ctl_para_dict.get('sigma2_gamma', '1 10 1'))
85
+ ctl_file_handle.write(' finetune = %s * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr\n' % ctl_para_dict.get('finetune', '1: .1 .1 .1 .1 .1 .1'))
86
+ ctl_file_handle.write(' print = %s * 0: no mcmc sample; 1: everything except branch rates 2: everything\n' % ctl_para_dict.get('print', 1))
87
+ ctl_file_handle.write(' burnin = %s\n' % ctl_para_dict.get('burnin', 50000))
88
+ ctl_file_handle.write(' sampfreq = %s\n' % ctl_para_dict.get('sampfreq', 5))
89
+ ctl_file_handle.write(' nsample = %s\n' % ctl_para_dict.get('nsample', 150000))
90
+
91
+
92
+ def get_parameter_combinations(para_to_test_dict):
93
+
94
+ para_lol_name = []
95
+ para_lol_value = []
96
+ para_lol_name_with_value = []
97
+ for each_para in sorted(list(para_to_test_dict.keys())):
98
+ para_setting_list_name = []
99
+ para_setting_list_value = []
100
+ para_setting_list_name_with_value = []
101
+ for each_setting in sorted(para_to_test_dict[each_para]):
102
+ name_str = ('%s%s' % (each_para, each_setting)).replace(' ', '_')
103
+ para_setting_list_name.append(each_para)
104
+ para_setting_list_value.append(each_setting)
105
+ para_setting_list_name_with_value.append(name_str)
106
+ para_lol_name.append(para_setting_list_name)
107
+ para_lol_value.append(para_setting_list_value)
108
+ para_lol_name_with_value.append(para_setting_list_name_with_value)
109
+
110
+ all_combination_list_name = [p for p in itertools.product(*para_lol_name)]
111
+ all_combination_list_value = [p for p in itertools.product(*para_lol_value)]
112
+ all_combination_list_name_with_value = [p for p in itertools.product(*para_lol_name_with_value)]
113
+ all_combination_list_name_with_value_str = ['_'.join(i) for i in all_combination_list_name_with_value]
114
+
115
+ para_dod = dict()
116
+ element_index = 0
117
+ for each_combination in all_combination_list_name_with_value_str:
118
+ current_name_list = all_combination_list_name[element_index]
119
+ current_value_list = all_combination_list_value[element_index]
120
+ current_para_dict = dict()
121
+ for key, value in zip(current_name_list, current_value_list):
122
+ current_para_dict[key] = value
123
+ para_dod[each_combination] = current_para_dict
124
+ element_index += 1
125
+
126
+ return para_dod
127
+
128
+
129
+ def fa2phy(fasta_in, phy_out):
130
+
131
+ alignment = AlignIO.read(fasta_in, 'fasta')
132
+
133
+ max_seq_id_len = 0
134
+ for each_seq in alignment:
135
+ seq_id_len = len(each_seq.id)
136
+ if seq_id_len > max_seq_id_len:
137
+ max_seq_id_len = seq_id_len
138
+
139
+ with open(phy_out, 'w') as msa_out_handle:
140
+ msa_out_handle.write('%s %s\n' % (len(alignment), alignment.get_alignment_length()))
141
+ for each_seq in alignment:
142
+ seq_id = each_seq.id
143
+ seq_id_with_space = '%s%s' % (seq_id, ' ' * (max_seq_id_len + 2 - len(seq_id)))
144
+ msa_out_handle.write('%s%s\n' % (seq_id_with_space, str(each_seq.seq)))
145
+
146
+
147
+ def dating_ss(args):
148
+
149
+ deltall_stdout_txt = args['deltall']
150
+ aod = args['aod']
151
+ out_group_txt = args['og']
152
+ eu_tree = args['eu']
153
+ op_dir = args['o']
154
+ deltall_keep_pct_str = args['c']
155
+ min_marker_num = args['mmn']
156
+ force_overwrite = args['f']
157
+ root_age = args['ra']
158
+ submit_job = args['qsub']
159
+ para_to_test = args['to_test']
160
+ js_cpu_num = 1
161
+ quote_node_name = False
162
+
163
+ para_to_test_dict = dict()
164
+ for each_para in open(para_to_test):
165
+ each_para_split = each_para.strip().split()
166
+ para_list = each_para_split[1].split(',')
167
+ para_to_test_dict[each_para_split[0]] = para_list
168
+ print('Parameters to test: %s' % para_to_test_dict)
169
+
170
+ if os.path.isfile(eu_tree) is False:
171
+ print('%s not found, program exited!' % eu_tree)
172
+ exit()
173
+
174
+ deltall_keep_pct_list = [int(i) for i in deltall_keep_pct_str.split('-')]
175
+ deltall_stdout_path, deltall_stdout_basename, deltall_stdout_ext = sep_path_basename_ext(deltall_stdout_txt)
176
+
177
+ # create dir
178
+ if os.path.isdir(op_dir) is True:
179
+ if force_overwrite is True:
180
+ os.system('rm -r %s' % op_dir)
181
+ else:
182
+ print('output folder detected, program exited!')
183
+ exit()
184
+ os.system('mkdir %s' % op_dir)
185
+
186
+ # read in deltall_stdout_txt
187
+ deltall_op_dict = dict()
188
+ for each_line in open(deltall_stdout_txt):
189
+ if not ((each_line.startswith('WARNING:')) or (each_line.startswith('awk:'))):
190
+ each_line_split = each_line.strip().split('\t')
191
+ marker_id = each_line_split[0]
192
+ value = float(each_line_split[1])
193
+ if marker_id not in deltall_op_dict:
194
+ deltall_op_dict[marker_id] = [value]
195
+ else:
196
+ deltall_op_dict[marker_id].append(value)
197
+
198
+ # assigned score to marker
199
+ metric_1_dict = dict()
200
+ metric_2_dict = dict()
201
+ for each_marker in deltall_op_dict:
202
+ metric_1_value = float("{0:.2f}".format(deltall_op_dict[each_marker][0]))
203
+ metric_2_value = float("{0:.2f}".format(deltall_op_dict[each_marker][1]))
204
+ metric_1_dict[each_marker] = metric_1_value
205
+ metric_2_dict[each_marker] = metric_2_value
206
+
207
+ metric_1_dict_sorted = {k: v for k, v in sorted(metric_1_dict.items(), key=lambda item: item[1])[::-1]}
208
+ metric_2_dict_sorted = {k: v for k, v in sorted(metric_2_dict.items(), key=lambda item: item[1])}
209
+
210
+ metric_1_score_dict = dict()
211
+ metric_1_score = 1
212
+ for each_marker_1 in metric_1_dict_sorted:
213
+ metric_1_score_dict[each_marker_1] = metric_1_score
214
+ metric_1_score += 1
215
+
216
+ metric_2_score_dict = dict()
217
+ metric_2_score = 1
218
+ for each_marker_2 in metric_2_dict_sorted:
219
+ metric_2_score_dict[each_marker_2] = metric_2_score
220
+ metric_2_score += 1
221
+
222
+ overall_score_dict = dict()
223
+ for each_marker in deltall_op_dict:
224
+ metric_score_1 = metric_1_score_dict[each_marker]
225
+ metric_score_2 = metric_2_score_dict[each_marker]
226
+ metric_score_overall = metric_score_1 + metric_score_2
227
+ overall_score_dict[each_marker] = metric_score_overall
228
+ marker_list_sorted_by_deltall = [k for k, v in sorted(overall_score_dict.items(), key=lambda item: item[1])]
229
+
230
+ # get qualified marker list
231
+ for each_keep_pct in deltall_keep_pct_list:
232
+ marker_num_to_keep = round(len(marker_list_sorted_by_deltall)*each_keep_pct/100)
233
+
234
+ if marker_num_to_keep < min_marker_num:
235
+ print('Ignored DeltaLL cutoff at %s , the number of qualified markers (%s) less than %s' % (each_keep_pct, marker_num_to_keep, min_marker_num))
236
+ else:
237
+ prefix_base = '%s_DeltaLL_%s' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
238
+ aln_concatenated = '%s_DeltaLL_%s_concatenated.phy' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
239
+ aln_concatenated_in_aod_wd_fasta = '%s_DeltaLL_%s_concatenated.phy.fasta' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
240
+ c60_tree_file_rooted_with_time_final = '%s_DeltaLL_%s_rooted_with_time_final.treefile' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
241
+ pwd_c60_tree_file = '%s/%s_DeltaLL_%s_iqtree_C60_PMSF/concatenated.treefile' % (aod, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
242
+ pwd_c60_tree_file_renamed = '%s/%s_DeltaLL_%s_raw.treefile' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
243
+ pwd_c60_tree_file_rooted = '%s/%s_DeltaLL_%s_rooted.treefile' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
244
+ pwd_c60_tree_file_rooted_with_time = '%s/%s_DeltaLL_%s_rooted_with_time.treefile' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
245
+ pwd_aln_concatenated_in_aod_wd_fasta = '%s/%s' % (aod, aln_concatenated_in_aod_wd_fasta)
246
+ pwd_aln_concatenated_in_op_wd_phylip = '%s/%s' % (op_dir, aln_concatenated)
247
+ pwd_c60_tree_file_rooted_with_time_final = '%s/%s' % (op_dir, c60_tree_file_rooted_with_time_final)
248
+ get_BV_wd = '%s/%s_get_BV_wd' % (op_dir, prefix_base)
249
+ pwd_aln_concatenated_in_bv_wd_phylip = '%s/%s' % (get_BV_wd, aln_concatenated)
250
+
251
+ fa2phy(pwd_aln_concatenated_in_aod_wd_fasta, pwd_aln_concatenated_in_op_wd_phylip)
252
+ os.system('cp %s %s' % (pwd_c60_tree_file, pwd_c60_tree_file_renamed))
253
+
254
+ # root genome tree with outgroup
255
+ root_with_out_group(pwd_c60_tree_file_renamed, out_group_txt, pwd_c60_tree_file_rooted)
256
+
257
+ # add time constraints
258
+ replace_clades(pwd_c60_tree_file_rooted, eu_tree, pwd_c60_tree_file_rooted_with_time, quote_node_name)
259
+
260
+ # remove "NoName" from the rooted tree with time constraints
261
+ tree_str = open(pwd_c60_tree_file_rooted_with_time).readline().strip().replace('NoName', '')
262
+
263
+ # add root age
264
+ tree_str = tree_str.replace(';', '<%s;' % root_age)
265
+ tre_object = Tree(tree_str, format=8, quoted_node_names=quote_node_name)
266
+ with open(pwd_c60_tree_file_rooted_with_time_final, 'w') as pwd_c60_tree_file_rooted_with_time_final_hanlde:
267
+ pwd_c60_tree_file_rooted_with_time_final_hanlde.write('%s\t1\n' % len(tre_object.get_leaf_names()))
268
+ pwd_c60_tree_file_rooted_with_time_final_hanlde.write(tree_str.replace('""', '') + '\n')
269
+ #pwd_c60_tree_file_rooted_with_time_final_hanlde.write(tree_str + '\n')
270
+
271
+ # rm tmp tree files
272
+ os.system('rm %s' % pwd_c60_tree_file_renamed)
273
+ os.system('rm %s' % pwd_c60_tree_file_rooted)
274
+ os.system('rm %s' % pwd_c60_tree_file_rooted_with_time)
275
+
276
+ # get BV file
277
+ os.mkdir(get_BV_wd)
278
+ fa2phy(pwd_aln_concatenated_in_aod_wd_fasta, pwd_aln_concatenated_in_bv_wd_phylip) # sequence in phylip format need to be in one line
279
+ os.system('cp %s %s/' % (pwd_c60_tree_file_rooted_with_time_final, get_BV_wd))
280
+
281
+ get_BV_js = '%s/%s_get_BV.sh' % (op_dir, prefix_base)
282
+ get_BV_mcmctree_ctl = '%s_get_BV_mcmctree.ctl' % (prefix_base)
283
+ pwd_get_BV_mcmctree_ctl = '%s/%s' % (get_BV_wd, get_BV_mcmctree_ctl)
284
+
285
+ get_BV_para_dict = dict()
286
+ get_BV_para_dict['seqfile'] = aln_concatenated
287
+ get_BV_para_dict['treefile'] = c60_tree_file_rooted_with_time_final
288
+ get_BV_para_dict['mcmcfile'] = '%s_mcmc.txt' % prefix_base
289
+ get_BV_para_dict['outfile'] = '%s_out.txt' % prefix_base
290
+ get_BV_para_dict['seqtype'] = '2'
291
+ get_BV_para_dict['usedata'] = '3'
292
+ get_BV_para_dict['clock'] = '3'
293
+ prep_mcmctree_ctl(get_BV_para_dict, pwd_get_BV_mcmctree_ctl)
294
+
295
+ with open(get_BV_js, 'w') as get_BV_js_handle:
296
+ get_BV_js_handle.write('#!/bin/bash\n#SBATCH --ntasks 1\n#SBATCH --cpus-per-task %s\n\n' % js_cpu_num)
297
+ get_BV_js_handle.write('cd %s/%s\n' % (os.getcwd(), get_BV_wd))
298
+ get_BV_js_handle.write('mcmctree %s\n' % get_BV_mcmctree_ctl)
299
+
300
+ # prepare files for dating
301
+ para_dod = get_parameter_combinations(para_to_test_dict)
302
+ for para_combination in para_dod:
303
+ mcmctree_ctl = '%s_%s_mcmctree.ctl' % (prefix_base, para_combination)
304
+ current_dating_wd = '%s/%s_DeltaLL_%s_%s_dating_wd' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct, para_combination)
305
+ pwd_mcmctree_ctl = '%s/%s_%s_mcmctree.ctl' % (current_dating_wd, prefix_base, para_combination)
306
+ js_mcmctree = '%s/js_%s_DeltaLL_%s_%s.sh' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct, para_combination)
307
+ pwd_aln_in_dating_wd = '%s/%s' % (current_dating_wd, aln_concatenated)
308
+
309
+ # create dating wd and copy tree and alignment files into it
310
+ os.mkdir(current_dating_wd)
311
+ fa2phy(pwd_aln_concatenated_in_aod_wd_fasta, pwd_aln_in_dating_wd) # sequence in phylip format need to be in one line
312
+ os.system('cp %s %s/' % (pwd_c60_tree_file_rooted_with_time_final, current_dating_wd))
313
+
314
+ current_para_dict = para_dod[para_combination]
315
+ current_para_dict['seqfile'] = aln_concatenated
316
+ current_para_dict['treefile'] = c60_tree_file_rooted_with_time_final
317
+ current_para_dict['mcmcfile'] = '%s_%s_mcmc.txt' % (prefix_base, para_combination)
318
+ current_para_dict['outfile'] = '%s_%s_out.txt' % (prefix_base, para_combination)
319
+ current_para_dict['seqtype'] = '2'
320
+ current_para_dict['usedata'] = '2'
321
+
322
+ prep_mcmctree_ctl(current_para_dict, pwd_mcmctree_ctl)
323
+
324
+ with open(js_mcmctree, 'w') as js_mcmctree_handle:
325
+ js_mcmctree_handle.write('#!/bin/bash\n\n')
326
+ js_mcmctree_handle.write('cd %s/%s\n' % (os.getcwd(), current_dating_wd))
327
+ js_mcmctree_handle.write('cp ../%s_get_BV_wd/out.BV in.BV\n' % prefix_base)
328
+ js_mcmctree_handle.write('mcmctree %s\n' % mcmctree_ctl)
329
+ print('Job script for performing dating exported to %s' % js_mcmctree)
330
+
331
+ if submit_job is True:
332
+ submit_js(get_BV_js)
333
+
334
+
335
+ if __name__ == '__main__':
336
+
337
+ parser = argparse.ArgumentParser()
338
+ parser.add_argument('-deltall', required=True, help='DeltaLL stdout')
339
+ parser.add_argument('-aod', required=True, help='AssessMarkerDeltaLL output dir')
340
+ parser.add_argument('-og', required=True, help='outgroup leaves, one id per line')
341
+ parser.add_argument('-eu', required=True, help='EU tree with time constraints')
342
+ parser.add_argument('-o', required=True, help='dating wd')
343
+ parser.add_argument('-c', required=False, default='25-50-75-100', help='cutoffs, default: 25-50-75-100')
344
+ parser.add_argument('-mmn', required=False, default=20, type=int, help='minimal marker number, default: 20')
345
+ parser.add_argument('-ra', required=False, default=45, type=int, help='root age, default: 45')
346
+ parser.add_argument('-qsub', required=False, action="store_true", help='submit job scripts for getting in.BV')
347
+ parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
348
+ parser.add_argument('-to_test', required=True, help='Settings to test')
349
+ args = vars(parser.parse_args())
350
+ dating_ss(args)
351
+
352
+
353
+ '''
354
+
355
+ cd /Users/songweizhi/Desktop/dating_test
356
+ python3 /Users/songweizhi/PycharmProjects/TreeSAK/TreeSAK/dating_ss.py -deltall Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s10_assess_marker_deltaLL/PA_75_DeltaLL_stdout.txt -aod Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s11_marker_sets_by_DeltaLL -og out_group.txt -eu 27.nwk -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s12_dating_wd -c 25-50-75-100 -mmn 20 -f
357
+
358
+ cd /home-user/wzsong/DateArTree
359
+ python3 dating_ss.py -deltall Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s10_assess_marker_deltaLL/PA_75_DeltaLL_stdout.txt -aod Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s11_marker_sets_by_DeltaLL -og out_group.txt -eu 27.nwk -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s12_dating_wd -c 25-50-75-100 -mmn 20 -f
360
+
361
+ '''
TreeSAK/deltall.py ADDED
@@ -0,0 +1,82 @@
1
+ import argparse
2
+
3
+
4
+ deltall_usage = '''
5
+ ========================= deltall example commands =========================
6
+
7
+ TreeSAK deltall -i nohup.out -o DeltaLL_op_summary.txt
8
+
9
+ # This script was wrote to parse the stdout of deltaLL.rb from Sishuo Wang
10
+
11
+ ============================================================================
12
+ '''
13
+
14
+
15
+ def deltall(args):
16
+
17
+ deltall_stdout_txt = args['i']
18
+ summary_txt = args['o']
19
+
20
+ deltall_op_dict = dict()
21
+ for each_line in open(deltall_stdout_txt):
22
+ if not ((each_line.startswith('WARNING:')) or (each_line.startswith('awk:'))):
23
+ each_line_split = each_line.strip().split('\t')
24
+ marker_id = each_line_split[0]
25
+ value = float(each_line_split[1])
26
+ if marker_id not in deltall_op_dict:
27
+ deltall_op_dict[marker_id] = [value]
28
+ else:
29
+ deltall_op_dict[marker_id].append(value)
30
+
31
+ metric_1_dict = dict()
32
+ metric_2_dict = dict()
33
+ for each_marker in deltall_op_dict:
34
+ metric_1_value = float("{0:.2f}".format(deltall_op_dict[each_marker][0]))
35
+ metric_2_value = float("{0:.2f}".format(deltall_op_dict[each_marker][1]))
36
+ metric_1_dict[each_marker] = metric_1_value
37
+ metric_2_dict[each_marker] = metric_2_value
38
+
39
+ metric_1_dict_sorted = {k: v for k, v in sorted(metric_1_dict.items(), key=lambda item: item[1])[::-1]}
40
+ metric_2_dict_sorted = {k: v for k, v in sorted(metric_2_dict.items(), key=lambda item: item[1])}
41
+
42
+ metric_1_score_dict = dict()
43
+ metric_1_score = 1
44
+ for each_marker_1 in metric_1_dict_sorted:
45
+ metric_1_score_dict[each_marker_1] = metric_1_score
46
+ metric_1_score += 1
47
+
48
+ metric_2_score_dict = dict()
49
+ metric_2_score = 1
50
+ for each_marker_2 in metric_2_dict_sorted:
51
+ metric_2_score_dict[each_marker_2] = metric_2_score
52
+ metric_2_score += 1
53
+
54
+ overall_score_dict = dict()
55
+ for each_marker in deltall_op_dict:
56
+ metric_score_1 = metric_1_score_dict[each_marker]
57
+ metric_score_2 = metric_2_score_dict[each_marker]
58
+ metric_score_overall = metric_score_1 + metric_score_2
59
+ overall_score_dict[each_marker] = metric_score_overall
60
+
61
+ overall_score_dict_sorted = {k: v for k, v in sorted(overall_score_dict.items(), key=lambda item: item[1])}
62
+
63
+ summary_txt_handle = open(summary_txt, 'w')
64
+ summary_txt_handle.write('Marker\tmetric1\tmetric1_score\tmetric2\tmetric2_score\toverall_score\n')
65
+ for each_marker in overall_score_dict_sorted:
66
+ metric_value_1 = metric_1_dict[each_marker]
67
+ metric_value_2 = metric_2_dict[each_marker]
68
+ metric_score_1 = metric_1_score_dict[each_marker]
69
+ metric_score_2 = metric_2_score_dict[each_marker]
70
+ metric_score_overall = overall_score_dict_sorted[each_marker]
71
+ summary_txt_handle.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (each_marker, metric_value_1, metric_score_1, metric_value_2, metric_score_2, metric_score_overall))
72
+ summary_txt_handle.close()
73
+
74
+
75
+ if __name__ == '__main__':
76
+
77
+ # initialize the options parser
78
+ parser = argparse.ArgumentParser()
79
+ parser.add_argument('-i', required=True, help='input file (e.g., nohup.out)')
80
+ parser.add_argument('-o', required=True, help='output summary')
81
+ args = vars(parser.parse_args())
82
+ deltall(args)