treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,255 @@
1
+ import os
2
+ import glob
3
+ import argparse
4
+ from ete3 import Tree
5
+ import multiprocessing as mp
6
+
7
+
8
+ compare_trees_usage = '''
9
+ ======================== compare_trees example command ========================
10
+
11
+ TreeSAK compare_trees -t1 tree_1.newick -t2 tree_2.newick -o op_dir
12
+ TreeSAK compare_trees -t1 tree_dir -t2 tree_dir -tx newick -dm -t 12 -o op_dir
13
+
14
+ ===============================================================================
15
+ '''
16
+
17
+
18
+ def sep_path_basename_ext(file_in):
19
+
20
+ # separate path and file name
21
+ file_path, file_name = os.path.split(file_in)
22
+ if file_path == '':
23
+ file_path = '.'
24
+
25
+ # separate file basename and extension
26
+ file_basename, file_extension = os.path.splitext(file_name)
27
+
28
+ return file_path, file_basename, file_extension
29
+
30
+
31
+ def check_numeric(str_in):
32
+ is_numeric = True
33
+ try:
34
+ x = float(str_in)
35
+ except ValueError:
36
+ is_numeric = False
37
+
38
+ return is_numeric
39
+
40
+
41
+ def parse_mantel_stats(mantel_stats_txt):
42
+
43
+ mantel_similarity = 'na'
44
+ for each_line in open(mantel_stats_txt):
45
+ if 'Mantel statistic r: ' in each_line:
46
+ mantel_similarity = each_line.strip().split('Mantel statistic r: ')[1]
47
+ return mantel_similarity
48
+
49
+
50
+ def get_matrix(query_tree_list, subject_tree_list, mantel_stats_dir, write_out_dm, output_matrix, output_matrix_distance):
51
+
52
+ header_line_str = '\t' + '\t'.join(subject_tree_list) + '\n'
53
+
54
+ output_matrix_handle = open(output_matrix, 'w')
55
+ output_matrix_handle.write(header_line_str)
56
+ distance_lol = []
57
+ for each_qt in query_tree_list:
58
+
59
+ current_qt_mantel_stats_value_list = [each_qt]
60
+ for each_st in subject_tree_list:
61
+
62
+ qt_vs_st_mantel_stats = '%s/%s_vs_%s_mantel_stats.txt' % (mantel_stats_dir, each_qt, each_st)
63
+ st_vs_qt_mantel_stats = '%s/%s_vs_%s_mantel_stats.txt' % (mantel_stats_dir, each_st, each_qt)
64
+
65
+ tree_similarity = 'na'
66
+ if os.path.isfile(qt_vs_st_mantel_stats) is True:
67
+ tree_similarity = parse_mantel_stats(qt_vs_st_mantel_stats)
68
+ if os.path.isfile(st_vs_qt_mantel_stats) is True:
69
+ tree_similarity = parse_mantel_stats(st_vs_qt_mantel_stats)
70
+
71
+ current_qt_mantel_stats_value_list.append(tree_similarity)
72
+
73
+ current_qt_mantel_stats_value_list_distance = [each_qt]
74
+ for each_value in current_qt_mantel_stats_value_list[1:]:
75
+ if check_numeric(each_value) is True:
76
+ in_distance = 1 - float(each_value)
77
+ in_distance = float("{0:.4f}".format(in_distance))
78
+ if in_distance == 0:
79
+ in_distance = '0'
80
+ current_qt_mantel_stats_value_list_distance.append(str(in_distance))
81
+ else:
82
+ current_qt_mantel_stats_value_list_distance.append('na')
83
+
84
+ distance_lol.append(current_qt_mantel_stats_value_list_distance)
85
+ current_qt_mantel_stats_value_str = '\t'.join(current_qt_mantel_stats_value_list)
86
+ output_matrix_handle.write(current_qt_mantel_stats_value_str + '\n')
87
+ output_matrix_handle.close()
88
+
89
+ # write out distance matrix
90
+ if write_out_dm is True:
91
+ output_matrix_distance_handle = open(output_matrix_distance, 'w')
92
+ output_matrix_distance_handle.write(header_line_str)
93
+ for each_list in distance_lol:
94
+ output_matrix_distance_handle.write('\t'.join(each_list) + '\n')
95
+ output_matrix_distance_handle.close()
96
+
97
+
98
+ def compare_trees_worker(arg_list):
99
+
100
+ compare_trees_R = arg_list[0]
101
+ tree_file_1 = arg_list[1]
102
+ tree_file_2 = arg_list[2]
103
+ tmp_dir = arg_list[3]
104
+ keep_tmp_file = arg_list[4]
105
+
106
+ tree1_path, tree1_basename, tree1_extension = sep_path_basename_ext(tree_file_1)
107
+ tree2_path, tree2_basename, tree2_extension = sep_path_basename_ext(tree_file_2)
108
+
109
+ op_stats = '%s/%s_vs_%s_mantel_stats.txt' % (tmp_dir, tree1_basename, tree2_basename)
110
+
111
+ t1 = Tree(tree_file_1, format=1)
112
+ t2 = Tree(tree_file_2, format=1)
113
+
114
+ tree1_leaf_list = []
115
+ for leaf1 in t1:
116
+ tree1_leaf_list.append(leaf1.name)
117
+
118
+ tree2_leaf_list = []
119
+ for leaf2 in t2:
120
+ tree2_leaf_list.append(leaf2.name)
121
+
122
+ shared_leaves = set(tree1_leaf_list).intersection(tree2_leaf_list)
123
+ if len(shared_leaves) == 0:
124
+ print('No leaves shared between %s and %s, calculation skipped!' % (tree1_basename, tree2_basename))
125
+ #exit()
126
+
127
+ elif len(tree1_leaf_list) == len(tree2_leaf_list) == len(shared_leaves):
128
+ compare_trees_cmd = 'Rscript %s -a %s -b %s > %s' % (compare_trees_R, tree_file_1, tree_file_2, op_stats)
129
+ os.system(compare_trees_cmd)
130
+
131
+ elif (len(shared_leaves) != len(tree1_leaf_list)) or (len(shared_leaves) != len(tree2_leaf_list)):
132
+ print('Performing Mantel test based on %s leaves shared by %s (%s) and %s (%s)' % (len(shared_leaves), tree1_basename, len(tree1_leaf_list), tree2_basename, len(tree2_leaf_list)))
133
+
134
+ # write out shared leaves
135
+ shared_leaves_txt = '%s/%s_vs_%s_shared_leaves.txt' % (tmp_dir, tree1_basename, tree2_basename)
136
+ shared_leaves_txt_handle = open(shared_leaves_txt, 'w')
137
+ for each_shared_leaf in shared_leaves:
138
+ shared_leaves_txt_handle.write(each_shared_leaf + '\n')
139
+ shared_leaves_txt_handle.close()
140
+
141
+ # subset_tree
142
+ t1_subset = '%s/%s_vs_%s_%s_subset%s' % (tmp_dir, tree1_basename, tree2_basename, tree1_basename, tree1_extension)
143
+ t2_subset = '%s/%s_vs_%s_%s_subset%s' % (tmp_dir, tree1_basename, tree2_basename, tree2_basename, tree2_extension)
144
+ subset_cmd_t1 = 'BioSAK subset_tree -tree %s -taxon %s -out %s -q' % (tree_file_1, shared_leaves_txt, t1_subset)
145
+ subset_cmd_t2 = 'BioSAK subset_tree -tree %s -taxon %s -out %s -q' % (tree_file_2, shared_leaves_txt, t2_subset)
146
+ os.system(subset_cmd_t1)
147
+ os.system(subset_cmd_t2)
148
+
149
+ compare_trees_cmd = 'Rscript %s -a %s -b %s > %s' % (compare_trees_R, t1_subset, t2_subset, op_stats)
150
+ os.system(compare_trees_cmd)
151
+
152
+ if keep_tmp_file is False:
153
+ os.system('rm %s' % shared_leaves_txt)
154
+ os.system('rm %s' % t1_subset)
155
+ os.system('rm %s' % t2_subset)
156
+
157
+
158
+ def compare_trees(args):
159
+
160
+ op_dir = args['o']
161
+ tree_file_1 = args['t1']
162
+ tree_file_2 = args['t2']
163
+ tree_file_ext = args['tx']
164
+ export_dm = args['dm']
165
+ num_threads = args['t']
166
+ keep_tmp = args['tmp']
167
+ force_create_op_dir = args['f']
168
+
169
+ current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
170
+ compare_trees_R = '%s/compare_trees.R' % current_file_path
171
+ tmp_dir = '%s/tmp' % op_dir
172
+
173
+ query_tree_list = []
174
+ if os.path.isfile(tree_file_1):
175
+ query_tree_list = [tree_file_1]
176
+ elif os.path.isdir(tree_file_1):
177
+ query_tree_re = '%s/*.%s' % (tree_file_1, tree_file_ext)
178
+ query_tree_list = glob.glob(query_tree_re)
179
+
180
+ subject_tree_list = []
181
+ if os.path.isfile(tree_file_2):
182
+ subject_tree_list = [tree_file_2]
183
+ elif os.path.isdir(tree_file_2):
184
+ subject_tree_re = '%s/*.%s' % (tree_file_2, tree_file_ext)
185
+ subject_tree_list = glob.glob(subject_tree_re)
186
+
187
+ # prepare arg list for compare_trees_worker
188
+ to_be_calculated_set = set()
189
+ list_for_compare_trees_worker = []
190
+ for each_query_tree in query_tree_list:
191
+ for each_subject_tree in subject_tree_list:
192
+
193
+ tree_1_vs_2 = '%s_vs_%s' % (each_query_tree, each_subject_tree)
194
+ tree_2_vs_1 = '%s_vs_%s' % (each_subject_tree, each_query_tree)
195
+
196
+ if tree_1_vs_2 not in to_be_calculated_set:
197
+ list_for_compare_trees_worker.append([compare_trees_R, each_query_tree, each_subject_tree, tmp_dir, keep_tmp])
198
+ to_be_calculated_set.add(tree_1_vs_2)
199
+ to_be_calculated_set.add(tree_2_vs_1)
200
+
201
+ print('Total pairs of trees to compare: %s' % len(list_for_compare_trees_worker))
202
+
203
+ # create op_dir
204
+ if os.path.isdir(op_dir) is True:
205
+ if force_create_op_dir is True:
206
+ os.system('rm -r %s' % op_dir)
207
+ else:
208
+ print('Output folder detected, program exited!')
209
+ exit()
210
+ os.system('mkdir %s' % op_dir)
211
+ os.system('mkdir %s' % tmp_dir)
212
+
213
+ # compare trees with multiprocessing
214
+ pool = mp.Pool(processes=num_threads)
215
+ pool.map(compare_trees_worker, list_for_compare_trees_worker)
216
+ pool.close()
217
+ pool.join()
218
+
219
+ # get matrix
220
+ output_matrix_similarity = '%s/Matrix_similarity.txt' % op_dir
221
+ output_matrix_distance = '%s/Matrix_distance.txt' % op_dir
222
+ query_tree_list_basename = []
223
+ for each_q_tree in query_tree_list:
224
+ q_tree_path, q_tree_basename, q_tree_ext = sep_path_basename_ext(each_q_tree)
225
+ query_tree_list_basename.append(q_tree_basename)
226
+
227
+ subject_tree_list_basename = []
228
+ for each_s_tree in subject_tree_list:
229
+ s_tree_path, s_tree_basename, s_tree_ext = sep_path_basename_ext(each_s_tree)
230
+ subject_tree_list_basename.append(s_tree_basename)
231
+
232
+ get_matrix(sorted(query_tree_list_basename), sorted(subject_tree_list_basename), tmp_dir, export_dm, output_matrix_similarity, output_matrix_distance)
233
+
234
+ # final report
235
+ if export_dm is True:
236
+ print('Data matrix exported to: %s and %s' % (output_matrix_similarity, output_matrix_distance))
237
+ else:
238
+ print('Data matrix exported to: %s' % output_matrix_similarity)
239
+
240
+ print('Done!')
241
+
242
+
243
+ if __name__ == '__main__':
244
+
245
+ compare_trees_parser = argparse.ArgumentParser(usage=compare_trees_usage)
246
+ compare_trees_parser.add_argument('-o', required=True, help='output directory')
247
+ compare_trees_parser.add_argument('-t1', required=True, help='tree (folder) 1')
248
+ compare_trees_parser.add_argument('-t2', required=True, help='tree (folder) 2')
249
+ compare_trees_parser.add_argument('-tx', required=False, default='newick', help='extention of tree files, default: newick')
250
+ compare_trees_parser.add_argument('-dm', required=False, action="store_true", help='export distance-alike matrix, obtained by subtract the similarity value from 1')
251
+ compare_trees_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads')
252
+ compare_trees_parser.add_argument('-tmp', required=False, action="store_true", help='keep tmp files')
253
+ compare_trees_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
254
+ args = vars(compare_trees_parser.parse_args())
255
+ compare_trees(args)
TreeSAK/dating.py ADDED
@@ -0,0 +1,264 @@
1
+ import os
2
+ import argparse
3
+ import itertools
4
+ from distutils.spawn import find_executable
5
+
6
+
7
+ dating_usage = '''
8
+ ============================ dating example commands ============================
9
+
10
+ # Requirement: PAML
11
+
12
+ TreeSAK dating -i gnm.tree -m msa.phy -p topo1 -o dating_wd -f -s parameter.txt
13
+
14
+ # parameter.txt file format (tab separated)
15
+ clock 2,3
16
+ nsample 50000
17
+
18
+ # assess dating results
19
+ ESS of at least 200 is commonly recommended, although ESS higher than 100 is
20
+ also often seen in literature.
21
+
22
+ =================================================================================
23
+ '''
24
+
25
+
26
+ def check_dependencies(program_list):
27
+
28
+ not_detected_programs = []
29
+ for needed_program in program_list:
30
+ if find_executable(needed_program) is None:
31
+ not_detected_programs.append(needed_program)
32
+
33
+ if not_detected_programs != []:
34
+ print('%s not found, program exited!' % ','.join(not_detected_programs))
35
+ exit()
36
+
37
+
38
+ def sep_path_basename_ext(file_in):
39
+
40
+ f_path, f_name = os.path.split(file_in)
41
+ if f_path == '':
42
+ f_path = '.'
43
+ f_base, f_ext = os.path.splitext(f_name)
44
+
45
+ return f_name, f_path, f_base, f_ext[1:]
46
+
47
+
48
+ def prep_mcmctree_ctl(ctl_para_dict, mcmctree_ctl_file):
49
+
50
+ ctl_file_handle = open(mcmctree_ctl_file, 'w')
51
+ ctl_file_handle.write(' seed = %s\n' % ctl_para_dict.get('seed', '-1'))
52
+ ctl_file_handle.write(' seqfile = %s\n' % ctl_para_dict['seqfile'])
53
+ ctl_file_handle.write(' treefile = %s\n' % ctl_para_dict['treefile'])
54
+ ctl_file_handle.write(' mcmcfile = %s\n' % ctl_para_dict['mcmcfile'])
55
+ ctl_file_handle.write(' outfile = %s\n' % ctl_para_dict['outfile'])
56
+ ctl_file_handle.write(' ndata = %s\n' % ctl_para_dict.get('ndata', 1))
57
+ ctl_file_handle.write(' seqtype = %s * 0: nucleotides; 1:codons; 2:AAs\n' % ctl_para_dict['seqtype'])
58
+ ctl_file_handle.write(' usedata = %s * 0: no data; 1:seq like; 2:normal approximation; 3:out.BV (in.BV)\n' % ctl_para_dict['usedata'])
59
+ ctl_file_handle.write(' clock = %s * 1: global clock; 2: independent rates; 3: correlated rates\n' % ctl_para_dict.get('clock', 2))
60
+ ctl_file_handle.write(' RootAge = %s * safe constraint on root age, used if no fossil for root.\n' % ctl_para_dict.get('RootAge', '<1.0'))
61
+ ctl_file_handle.write(' model = %s * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85\n' % ctl_para_dict.get('model', 0))
62
+ ctl_file_handle.write(' alpha = %s * alpha for gamma rates at sites\n' % ctl_para_dict.get('alpha', 0.5))
63
+ ctl_file_handle.write(' ncatG = %s * No. categories in discrete gamma\n' % ctl_para_dict.get('ncatG', 4))
64
+ ctl_file_handle.write(' cleandata = %s * remove sites with ambiguity data (1:yes, 0:no)?\n' % ctl_para_dict.get('cleandata', 0))
65
+ ctl_file_handle.write(' BDparas = %s * birth, death, sampling\n' % ctl_para_dict.get('BDparas', '1 1 0.1'))
66
+ ctl_file_handle.write(' kappa_gamma = %s * gamma prior for kappa\n' % ctl_para_dict.get('kappa_gamma', '6 2'))
67
+ ctl_file_handle.write(' alpha_gamma = %s * gamma prior for alpha\n' % ctl_para_dict.get('alpha_gamma', '1 1'))
68
+ ctl_file_handle.write(' rgene_gamma = %s * gammaDir prior for rate for genes\n' % ctl_para_dict.get('rgene_gamma', '1 50 1'))
69
+ ctl_file_handle.write(' sigma2_gamma = %s * gammaDir prior for sigma^2 (for clock=2 or 3)\n' % ctl_para_dict.get('sigma2_gamma', '1 10 1'))
70
+ ctl_file_handle.write(' finetune = %s * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr\n' % ctl_para_dict.get('finetune', '1: .1 .1 .1 .1 .1 .1'))
71
+ ctl_file_handle.write(' print = %s * 0: no mcmc sample; 1: everything except branch rates 2: everything\n' % ctl_para_dict.get('print', 1))
72
+ ctl_file_handle.write(' burnin = %s\n' % ctl_para_dict.get('burnin', 50000))
73
+ ctl_file_handle.write(' sampfreq = %s\n' % ctl_para_dict.get('sampfreq', 50))
74
+ ctl_file_handle.write(' nsample = %s\n' % ctl_para_dict.get('nsample', 10000))
75
+ ctl_file_handle.close()
76
+
77
+
78
+ def get_parameter_combinations(para_to_test_dict):
79
+
80
+ para_lol_name = []
81
+ para_lol_value = []
82
+ para_lol_name_with_value = []
83
+ for each_para in sorted(list(para_to_test_dict.keys())):
84
+ para_setting_list_name = []
85
+ para_setting_list_value = []
86
+ para_setting_list_name_with_value = []
87
+ for each_setting in sorted(para_to_test_dict[each_para]):
88
+ name_str = ('%s%s' % (each_para, each_setting)).replace(' ', '_')
89
+ para_setting_list_name.append(each_para)
90
+ para_setting_list_value.append(each_setting)
91
+ para_setting_list_name_with_value.append(name_str)
92
+ para_lol_name.append(para_setting_list_name)
93
+ para_lol_value.append(para_setting_list_value)
94
+ para_lol_name_with_value.append(para_setting_list_name_with_value)
95
+
96
+ all_combination_list_name = [p for p in itertools.product(*para_lol_name)]
97
+ all_combination_list_value = [p for p in itertools.product(*para_lol_value)]
98
+ all_combination_list_name_with_value = [p for p in itertools.product(*para_lol_name_with_value)]
99
+ all_combination_list_name_with_value_str = ['_'.join(i) for i in all_combination_list_name_with_value]
100
+
101
+ para_dod = dict()
102
+ element_index = 0
103
+ for each_combination in all_combination_list_name_with_value_str:
104
+ current_name_list = all_combination_list_name[element_index]
105
+ current_value_list = all_combination_list_value[element_index]
106
+ current_para_dict = dict()
107
+ for key, value in zip(current_name_list, current_value_list):
108
+ current_para_dict[key] = value
109
+ para_dod[each_combination] = current_para_dict
110
+ element_index += 1
111
+
112
+ return para_dod
113
+
114
+
115
+ def dating(args):
116
+
117
+ tree_file = args['i']
118
+ msa_file = args['m']
119
+ op_dir = args['o']
120
+ op_prefix = args['p']
121
+ seq_type = args['st']
122
+ settings_to_compare = args['s']
123
+ wrap_with_srun = args['srun']
124
+ force_overwrite = args['f']
125
+
126
+ check_dependencies(['mcmctree'])
127
+
128
+ para_to_test_dict = dict()
129
+ for each_para in open(settings_to_compare):
130
+ each_para_split = each_para.strip().split()
131
+ para_list = each_para_split[1].split(',')
132
+ para_to_test_dict[each_para_split[0]] = para_list
133
+
134
+ ####################################################################################################################
135
+
136
+ current_pwd = os.getcwd()
137
+
138
+ tree_f_name, tree_f_path, tree_f_base, tree_f_ext = sep_path_basename_ext(tree_file)
139
+ msa_f_name, msa_f_path, msa_f_base, msa_f_ext = sep_path_basename_ext(msa_file)
140
+
141
+ get_bv_wd = '%s/get_bv_wd' % op_dir
142
+ mcmctree_ctl_bv = '%s/mcmctree.ctl' % get_bv_wd
143
+ get_BV_cmd_txt = '%s/get_BV_cmd.txt' % get_bv_wd
144
+ dating_cmds_txt = '%s/dating_cmds.txt' % op_dir
145
+
146
+ # create output folder
147
+ if os.path.isdir(op_dir) is True:
148
+ if force_overwrite is True:
149
+ os.system('rm -r %s' % op_dir)
150
+ else:
151
+ print('Output folder exist, program exited!')
152
+ exit()
153
+
154
+ os.system('mkdir %s' % op_dir)
155
+
156
+ ############################################# write out step 1 command #############################################
157
+
158
+ # prepare files for getting bv file
159
+ os.system('mkdir %s' % get_bv_wd)
160
+ os.system('cp %s %s/' % (tree_file, get_bv_wd))
161
+ os.system('cp %s %s/' % (msa_file, get_bv_wd))
162
+
163
+ get_bv_para_dict = dict()
164
+ get_bv_para_dict['seqfile'] = msa_f_name
165
+ get_bv_para_dict['treefile'] = tree_f_name
166
+ get_bv_para_dict['mcmcfile'] = 'mcmc.txt'
167
+ get_bv_para_dict['outfile'] = 'out.txt'
168
+ get_bv_para_dict['seqtype'] = seq_type
169
+ get_bv_para_dict['usedata'] = '3'
170
+
171
+ prep_mcmctree_ctl(get_bv_para_dict, mcmctree_ctl_bv)
172
+
173
+ # write out get bv command
174
+ get_BV_cmd_txt_handle = open(get_BV_cmd_txt, 'w')
175
+ get_BV_cmd_txt_handle.write('mcmctree\n')
176
+ get_BV_cmd_txt_handle.close()
177
+
178
+ # run command to get bv file
179
+ print('Running step one command to get the BV file.')
180
+ os.chdir(get_bv_wd)
181
+ os.system('mcmctree > log.txt')
182
+ #os.system('touch out.BV')
183
+ print('Step one finished.')
184
+ os.chdir(current_pwd)
185
+
186
+ ############################################# write out step 2 command #############################################
187
+
188
+ print('Preparing files for dating estimation')
189
+
190
+ para_comb_dict = get_parameter_combinations(para_to_test_dict)
191
+ print('para_comb_dict')
192
+ print(para_comb_dict)
193
+
194
+ dating_cmds_txt_handle = open(dating_cmds_txt, 'w')
195
+ for para_comb in sorted(list(para_comb_dict.keys())):
196
+
197
+ # create dir
198
+ current_dating_wd_1 = '%s/%s_run1' % (op_dir, para_comb)
199
+ current_dating_wd_2 = '%s/%s_run2' % (op_dir, para_comb)
200
+ os.system('mkdir %s' % current_dating_wd_1)
201
+ os.system('mkdir %s' % current_dating_wd_2)
202
+
203
+ # copy tree and msa file
204
+ os.system('cp %s %s/' % (tree_file, current_dating_wd_1))
205
+ os.system('cp %s %s/' % (tree_file, current_dating_wd_2))
206
+ os.system('cp %s %s/' % (msa_file, current_dating_wd_1))
207
+ os.system('cp %s %s/' % (msa_file, current_dating_wd_2))
208
+
209
+ # prepare mcmctree.ctl file
210
+ mcmctree_ctl_1 = '%s/mcmctree.ctl' % current_dating_wd_1
211
+ mcmctree_ctl_2 = '%s/mcmctree.ctl' % current_dating_wd_2
212
+
213
+ # run 1
214
+ current_para_dict_run1 = para_comb_dict[para_comb].copy()
215
+ current_para_dict_run1['seqfile'] = msa_f_name
216
+ current_para_dict_run1['treefile'] = tree_f_name
217
+ current_para_dict_run1['mcmcfile'] = '%s_%s_run1_mcmc.txt' % (op_prefix, para_comb)
218
+ current_para_dict_run1['outfile'] = '%s_%s_run1_out.txt' % (op_prefix, para_comb)
219
+ current_para_dict_run1['seqtype'] = seq_type
220
+ current_para_dict_run1['usedata'] = '2'
221
+
222
+ # run 2
223
+ current_para_dict_run2 = para_comb_dict[para_comb].copy()
224
+ current_para_dict_run2['seqfile'] = msa_f_name
225
+ current_para_dict_run2['treefile'] = tree_f_name
226
+ current_para_dict_run2['mcmcfile'] = '%s_%s_run2_mcmc.txt' % (op_prefix, para_comb)
227
+ current_para_dict_run2['outfile'] = '%s_%s_run2_out.txt' % (op_prefix, para_comb)
228
+ current_para_dict_run2['seqtype'] = seq_type
229
+ current_para_dict_run2['usedata'] = '2'
230
+
231
+ prep_mcmctree_ctl(current_para_dict_run1, mcmctree_ctl_1)
232
+ prep_mcmctree_ctl(current_para_dict_run2, mcmctree_ctl_2)
233
+
234
+ # copy BV files generated in step one
235
+ os.system('cp %s/out.BV %s/in.BV' % (get_bv_wd, current_dating_wd_1))
236
+ os.system('cp %s/out.BV %s/in.BV' % (get_bv_wd, current_dating_wd_2))
237
+
238
+ # write out commands
239
+ cmd_run_1 = 'cd %s/%s/%s; mcmctree' % (current_pwd, op_dir, current_dating_wd_1.split('/')[-1])
240
+ cmd_run_2 = 'cd %s/%s/%s; mcmctree' % (current_pwd, op_dir, current_dating_wd_2.split('/')[-1])
241
+ if wrap_with_srun is True:
242
+ cmd_run_1 = 'BioSAK srun -c "%s"' % cmd_run_1
243
+ cmd_run_2 = 'BioSAK srun -c "%s"' % cmd_run_2
244
+ dating_cmds_txt_handle.write(cmd_run_1 + '\n')
245
+ dating_cmds_txt_handle.write(cmd_run_2 + '\n')
246
+ dating_cmds_txt_handle.close()
247
+
248
+ print('Job script for performing dating exported to: %s' % dating_cmds_txt)
249
+
250
+
251
+ if __name__ == '__main__':
252
+
253
+ dating_parser = argparse.ArgumentParser()
254
+ dating_parser.add_argument('-i', required=True, help='input tree file')
255
+ dating_parser.add_argument('-m', required=True, help='sequence alignments')
256
+ dating_parser.add_argument('-o', required=True, help='output directory')
257
+ dating_parser.add_argument('-p', required=True, help='output prefix')
258
+ dating_parser.add_argument('-s', required=True, help='settings to compare')
259
+ dating_parser.add_argument('-st', required=False, default='2', help='sequence type, 0 for nucleotides, 1 for codons, 2 for AAs, default: 2')
260
+ dating_parser.add_argument('-srun', required=False, action="store_true", help='wrap commands with BioSAK srun')
261
+ dating_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
262
+ args = vars(dating_parser.parse_args())
263
+ dating(args)
264
+