treesak 1.51.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of treesak might be problematic. Click here for more details.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +130 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +290 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +106 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/RootTreeGTDB226.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +19 -0
- TreeSAK/SplitScore1.py +178 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +597 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +158 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +77 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.51.2.data/scripts/TreeSAK +950 -0
- treesak-1.51.2.dist-info/LICENSE +674 -0
- treesak-1.51.2.dist-info/METADATA +27 -0
- treesak-1.51.2.dist-info/RECORD +125 -0
- treesak-1.51.2.dist-info/WHEEL +5 -0
- treesak-1.51.2.dist-info/top_level.txt +1 -0
TreeSAK/dating.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
import itertools
|
|
4
|
+
from distutils.spawn import find_executable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
dating_usage = '''
|
|
8
|
+
============================ dating example commands ============================
|
|
9
|
+
|
|
10
|
+
# Requirement: PAML
|
|
11
|
+
|
|
12
|
+
TreeSAK dating -i gnm.tree -m msa.phy -p topo1 -o dating_wd -f -s parameter.txt
|
|
13
|
+
|
|
14
|
+
# parameter.txt file format (tab separated)
|
|
15
|
+
clock 2,3
|
|
16
|
+
nsample 50000
|
|
17
|
+
|
|
18
|
+
# assess dating results
|
|
19
|
+
ESS of at least 200 is commonly recommended, although ESS higher than 100 is
|
|
20
|
+
also often seen in literature.
|
|
21
|
+
|
|
22
|
+
=================================================================================
|
|
23
|
+
'''
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def check_dependencies(program_list):
|
|
27
|
+
|
|
28
|
+
not_detected_programs = []
|
|
29
|
+
for needed_program in program_list:
|
|
30
|
+
if find_executable(needed_program) is None:
|
|
31
|
+
not_detected_programs.append(needed_program)
|
|
32
|
+
|
|
33
|
+
if not_detected_programs != []:
|
|
34
|
+
print('%s not found, program exited!' % ','.join(not_detected_programs))
|
|
35
|
+
exit()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def sep_path_basename_ext(file_in):
|
|
39
|
+
|
|
40
|
+
f_path, f_name = os.path.split(file_in)
|
|
41
|
+
if f_path == '':
|
|
42
|
+
f_path = '.'
|
|
43
|
+
f_base, f_ext = os.path.splitext(f_name)
|
|
44
|
+
|
|
45
|
+
return f_name, f_path, f_base, f_ext[1:]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def prep_mcmctree_ctl(ctl_para_dict, mcmctree_ctl_file):
|
|
49
|
+
|
|
50
|
+
ctl_file_handle = open(mcmctree_ctl_file, 'w')
|
|
51
|
+
ctl_file_handle.write(' seed = %s\n' % ctl_para_dict.get('seed', '-1'))
|
|
52
|
+
ctl_file_handle.write(' seqfile = %s\n' % ctl_para_dict['seqfile'])
|
|
53
|
+
ctl_file_handle.write(' treefile = %s\n' % ctl_para_dict['treefile'])
|
|
54
|
+
ctl_file_handle.write(' mcmcfile = %s\n' % ctl_para_dict['mcmcfile'])
|
|
55
|
+
ctl_file_handle.write(' outfile = %s\n' % ctl_para_dict['outfile'])
|
|
56
|
+
ctl_file_handle.write(' ndata = %s\n' % ctl_para_dict.get('ndata', 1))
|
|
57
|
+
ctl_file_handle.write(' seqtype = %s * 0: nucleotides; 1:codons; 2:AAs\n' % ctl_para_dict['seqtype'])
|
|
58
|
+
ctl_file_handle.write(' usedata = %s * 0: no data; 1:seq like; 2:normal approximation; 3:out.BV (in.BV)\n' % ctl_para_dict['usedata'])
|
|
59
|
+
ctl_file_handle.write(' clock = %s * 1: global clock; 2: independent rates; 3: correlated rates\n' % ctl_para_dict.get('clock', 2))
|
|
60
|
+
ctl_file_handle.write(' RootAge = %s * safe constraint on root age, used if no fossil for root.\n' % ctl_para_dict.get('RootAge', '<1.0'))
|
|
61
|
+
ctl_file_handle.write(' model = %s * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85\n' % ctl_para_dict.get('model', 0))
|
|
62
|
+
ctl_file_handle.write(' alpha = %s * alpha for gamma rates at sites\n' % ctl_para_dict.get('alpha', 0.5))
|
|
63
|
+
ctl_file_handle.write(' ncatG = %s * No. categories in discrete gamma\n' % ctl_para_dict.get('ncatG', 4))
|
|
64
|
+
ctl_file_handle.write(' cleandata = %s * remove sites with ambiguity data (1:yes, 0:no)?\n' % ctl_para_dict.get('cleandata', 0))
|
|
65
|
+
ctl_file_handle.write(' BDparas = %s * birth, death, sampling\n' % ctl_para_dict.get('BDparas', '1 1 0.1'))
|
|
66
|
+
ctl_file_handle.write(' kappa_gamma = %s * gamma prior for kappa\n' % ctl_para_dict.get('kappa_gamma', '6 2'))
|
|
67
|
+
ctl_file_handle.write(' alpha_gamma = %s * gamma prior for alpha\n' % ctl_para_dict.get('alpha_gamma', '1 1'))
|
|
68
|
+
ctl_file_handle.write(' rgene_gamma = %s * gammaDir prior for rate for genes\n' % ctl_para_dict.get('rgene_gamma', '1 50 1'))
|
|
69
|
+
ctl_file_handle.write(' sigma2_gamma = %s * gammaDir prior for sigma^2 (for clock=2 or 3)\n' % ctl_para_dict.get('sigma2_gamma', '1 10 1'))
|
|
70
|
+
ctl_file_handle.write(' finetune = %s * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr\n' % ctl_para_dict.get('finetune', '1: .1 .1 .1 .1 .1 .1'))
|
|
71
|
+
ctl_file_handle.write(' print = %s * 0: no mcmc sample; 1: everything except branch rates 2: everything\n' % ctl_para_dict.get('print', 1))
|
|
72
|
+
ctl_file_handle.write(' burnin = %s\n' % ctl_para_dict.get('burnin', 50000))
|
|
73
|
+
ctl_file_handle.write(' sampfreq = %s\n' % ctl_para_dict.get('sampfreq', 50))
|
|
74
|
+
ctl_file_handle.write(' nsample = %s\n' % ctl_para_dict.get('nsample', 10000))
|
|
75
|
+
ctl_file_handle.close()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_parameter_combinations(para_to_test_dict):
|
|
79
|
+
|
|
80
|
+
para_lol_name = []
|
|
81
|
+
para_lol_value = []
|
|
82
|
+
para_lol_name_with_value = []
|
|
83
|
+
for each_para in sorted(list(para_to_test_dict.keys())):
|
|
84
|
+
para_setting_list_name = []
|
|
85
|
+
para_setting_list_value = []
|
|
86
|
+
para_setting_list_name_with_value = []
|
|
87
|
+
for each_setting in sorted(para_to_test_dict[each_para]):
|
|
88
|
+
name_str = ('%s%s' % (each_para, each_setting)).replace(' ', '_')
|
|
89
|
+
para_setting_list_name.append(each_para)
|
|
90
|
+
para_setting_list_value.append(each_setting)
|
|
91
|
+
para_setting_list_name_with_value.append(name_str)
|
|
92
|
+
para_lol_name.append(para_setting_list_name)
|
|
93
|
+
para_lol_value.append(para_setting_list_value)
|
|
94
|
+
para_lol_name_with_value.append(para_setting_list_name_with_value)
|
|
95
|
+
|
|
96
|
+
all_combination_list_name = [p for p in itertools.product(*para_lol_name)]
|
|
97
|
+
all_combination_list_value = [p for p in itertools.product(*para_lol_value)]
|
|
98
|
+
all_combination_list_name_with_value = [p for p in itertools.product(*para_lol_name_with_value)]
|
|
99
|
+
all_combination_list_name_with_value_str = ['_'.join(i) for i in all_combination_list_name_with_value]
|
|
100
|
+
|
|
101
|
+
para_dod = dict()
|
|
102
|
+
element_index = 0
|
|
103
|
+
for each_combination in all_combination_list_name_with_value_str:
|
|
104
|
+
current_name_list = all_combination_list_name[element_index]
|
|
105
|
+
current_value_list = all_combination_list_value[element_index]
|
|
106
|
+
current_para_dict = dict()
|
|
107
|
+
for key, value in zip(current_name_list, current_value_list):
|
|
108
|
+
current_para_dict[key] = value
|
|
109
|
+
para_dod[each_combination] = current_para_dict
|
|
110
|
+
element_index += 1
|
|
111
|
+
|
|
112
|
+
return para_dod
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def dating(args):
|
|
116
|
+
|
|
117
|
+
tree_file = args['i']
|
|
118
|
+
msa_file = args['m']
|
|
119
|
+
op_dir = args['o']
|
|
120
|
+
op_prefix = args['p']
|
|
121
|
+
seq_type = args['st']
|
|
122
|
+
settings_to_compare = args['s']
|
|
123
|
+
wrap_with_srun = args['srun']
|
|
124
|
+
force_overwrite = args['f']
|
|
125
|
+
|
|
126
|
+
check_dependencies(['mcmctree'])
|
|
127
|
+
|
|
128
|
+
para_to_test_dict = dict()
|
|
129
|
+
for each_para in open(settings_to_compare):
|
|
130
|
+
each_para_split = each_para.strip().split()
|
|
131
|
+
para_list = each_para_split[1].split(',')
|
|
132
|
+
para_to_test_dict[each_para_split[0]] = para_list
|
|
133
|
+
|
|
134
|
+
####################################################################################################################
|
|
135
|
+
|
|
136
|
+
current_pwd = os.getcwd()
|
|
137
|
+
|
|
138
|
+
tree_f_name, tree_f_path, tree_f_base, tree_f_ext = sep_path_basename_ext(tree_file)
|
|
139
|
+
msa_f_name, msa_f_path, msa_f_base, msa_f_ext = sep_path_basename_ext(msa_file)
|
|
140
|
+
|
|
141
|
+
get_bv_wd = '%s/get_bv_wd' % op_dir
|
|
142
|
+
mcmctree_ctl_bv = '%s/mcmctree.ctl' % get_bv_wd
|
|
143
|
+
get_BV_cmd_txt = '%s/get_BV_cmd.txt' % get_bv_wd
|
|
144
|
+
dating_cmds_txt = '%s/dating_cmds.txt' % op_dir
|
|
145
|
+
|
|
146
|
+
# create output folder
|
|
147
|
+
if os.path.isdir(op_dir) is True:
|
|
148
|
+
if force_overwrite is True:
|
|
149
|
+
os.system('rm -r %s' % op_dir)
|
|
150
|
+
else:
|
|
151
|
+
print('Output folder exist, program exited!')
|
|
152
|
+
exit()
|
|
153
|
+
|
|
154
|
+
os.system('mkdir %s' % op_dir)
|
|
155
|
+
|
|
156
|
+
############################################# write out step 1 command #############################################
|
|
157
|
+
|
|
158
|
+
# prepare files for getting bv file
|
|
159
|
+
os.system('mkdir %s' % get_bv_wd)
|
|
160
|
+
os.system('cp %s %s/' % (tree_file, get_bv_wd))
|
|
161
|
+
os.system('cp %s %s/' % (msa_file, get_bv_wd))
|
|
162
|
+
|
|
163
|
+
get_bv_para_dict = dict()
|
|
164
|
+
get_bv_para_dict['seqfile'] = msa_f_name
|
|
165
|
+
get_bv_para_dict['treefile'] = tree_f_name
|
|
166
|
+
get_bv_para_dict['mcmcfile'] = 'mcmc.txt'
|
|
167
|
+
get_bv_para_dict['outfile'] = 'out.txt'
|
|
168
|
+
get_bv_para_dict['seqtype'] = seq_type
|
|
169
|
+
get_bv_para_dict['usedata'] = '3'
|
|
170
|
+
|
|
171
|
+
prep_mcmctree_ctl(get_bv_para_dict, mcmctree_ctl_bv)
|
|
172
|
+
|
|
173
|
+
# write out get bv command
|
|
174
|
+
get_BV_cmd_txt_handle = open(get_BV_cmd_txt, 'w')
|
|
175
|
+
get_BV_cmd_txt_handle.write('mcmctree\n')
|
|
176
|
+
get_BV_cmd_txt_handle.close()
|
|
177
|
+
|
|
178
|
+
# run command to get bv file
|
|
179
|
+
print('Running step one command to get the BV file.')
|
|
180
|
+
os.chdir(get_bv_wd)
|
|
181
|
+
os.system('mcmctree > log.txt')
|
|
182
|
+
#os.system('touch out.BV')
|
|
183
|
+
print('Step one finished.')
|
|
184
|
+
os.chdir(current_pwd)
|
|
185
|
+
|
|
186
|
+
############################################# write out step 2 command #############################################
|
|
187
|
+
|
|
188
|
+
print('Preparing files for dating estimation')
|
|
189
|
+
|
|
190
|
+
para_comb_dict = get_parameter_combinations(para_to_test_dict)
|
|
191
|
+
print('para_comb_dict')
|
|
192
|
+
print(para_comb_dict)
|
|
193
|
+
|
|
194
|
+
dating_cmds_txt_handle = open(dating_cmds_txt, 'w')
|
|
195
|
+
for para_comb in sorted(list(para_comb_dict.keys())):
|
|
196
|
+
|
|
197
|
+
# create dir
|
|
198
|
+
current_dating_wd_1 = '%s/%s_run1' % (op_dir, para_comb)
|
|
199
|
+
current_dating_wd_2 = '%s/%s_run2' % (op_dir, para_comb)
|
|
200
|
+
os.system('mkdir %s' % current_dating_wd_1)
|
|
201
|
+
os.system('mkdir %s' % current_dating_wd_2)
|
|
202
|
+
|
|
203
|
+
# copy tree and msa file
|
|
204
|
+
os.system('cp %s %s/' % (tree_file, current_dating_wd_1))
|
|
205
|
+
os.system('cp %s %s/' % (tree_file, current_dating_wd_2))
|
|
206
|
+
os.system('cp %s %s/' % (msa_file, current_dating_wd_1))
|
|
207
|
+
os.system('cp %s %s/' % (msa_file, current_dating_wd_2))
|
|
208
|
+
|
|
209
|
+
# prepare mcmctree.ctl file
|
|
210
|
+
mcmctree_ctl_1 = '%s/mcmctree.ctl' % current_dating_wd_1
|
|
211
|
+
mcmctree_ctl_2 = '%s/mcmctree.ctl' % current_dating_wd_2
|
|
212
|
+
|
|
213
|
+
# run 1
|
|
214
|
+
current_para_dict_run1 = para_comb_dict[para_comb].copy()
|
|
215
|
+
current_para_dict_run1['seqfile'] = msa_f_name
|
|
216
|
+
current_para_dict_run1['treefile'] = tree_f_name
|
|
217
|
+
current_para_dict_run1['mcmcfile'] = '%s_%s_run1_mcmc.txt' % (op_prefix, para_comb)
|
|
218
|
+
current_para_dict_run1['outfile'] = '%s_%s_run1_out.txt' % (op_prefix, para_comb)
|
|
219
|
+
current_para_dict_run1['seqtype'] = seq_type
|
|
220
|
+
current_para_dict_run1['usedata'] = '2'
|
|
221
|
+
|
|
222
|
+
# run 2
|
|
223
|
+
current_para_dict_run2 = para_comb_dict[para_comb].copy()
|
|
224
|
+
current_para_dict_run2['seqfile'] = msa_f_name
|
|
225
|
+
current_para_dict_run2['treefile'] = tree_f_name
|
|
226
|
+
current_para_dict_run2['mcmcfile'] = '%s_%s_run2_mcmc.txt' % (op_prefix, para_comb)
|
|
227
|
+
current_para_dict_run2['outfile'] = '%s_%s_run2_out.txt' % (op_prefix, para_comb)
|
|
228
|
+
current_para_dict_run2['seqtype'] = seq_type
|
|
229
|
+
current_para_dict_run2['usedata'] = '2'
|
|
230
|
+
|
|
231
|
+
prep_mcmctree_ctl(current_para_dict_run1, mcmctree_ctl_1)
|
|
232
|
+
prep_mcmctree_ctl(current_para_dict_run2, mcmctree_ctl_2)
|
|
233
|
+
|
|
234
|
+
# copy BV files generated in step one
|
|
235
|
+
os.system('cp %s/out.BV %s/in.BV' % (get_bv_wd, current_dating_wd_1))
|
|
236
|
+
os.system('cp %s/out.BV %s/in.BV' % (get_bv_wd, current_dating_wd_2))
|
|
237
|
+
|
|
238
|
+
# write out commands
|
|
239
|
+
cmd_run_1 = 'cd %s/%s/%s; mcmctree' % (current_pwd, op_dir, current_dating_wd_1.split('/')[-1])
|
|
240
|
+
cmd_run_2 = 'cd %s/%s/%s; mcmctree' % (current_pwd, op_dir, current_dating_wd_2.split('/')[-1])
|
|
241
|
+
if wrap_with_srun is True:
|
|
242
|
+
cmd_run_1 = 'BioSAK srun -c "%s"' % cmd_run_1
|
|
243
|
+
cmd_run_2 = 'BioSAK srun -c "%s"' % cmd_run_2
|
|
244
|
+
dating_cmds_txt_handle.write(cmd_run_1 + '\n')
|
|
245
|
+
dating_cmds_txt_handle.write(cmd_run_2 + '\n')
|
|
246
|
+
dating_cmds_txt_handle.close()
|
|
247
|
+
|
|
248
|
+
print('Job script for performing dating exported to: %s' % dating_cmds_txt)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
if __name__ == '__main__':
|
|
252
|
+
|
|
253
|
+
dating_parser = argparse.ArgumentParser()
|
|
254
|
+
dating_parser.add_argument('-i', required=True, help='input tree file')
|
|
255
|
+
dating_parser.add_argument('-m', required=True, help='sequence alignments')
|
|
256
|
+
dating_parser.add_argument('-o', required=True, help='output directory')
|
|
257
|
+
dating_parser.add_argument('-p', required=True, help='output prefix')
|
|
258
|
+
dating_parser.add_argument('-s', required=True, help='settings to compare')
|
|
259
|
+
dating_parser.add_argument('-st', required=False, default='2', help='sequence type, 0 for nucleotides, 1 for codons, 2 for AAs, default: 2')
|
|
260
|
+
dating_parser.add_argument('-srun', required=False, action="store_true", help='wrap commands with BioSAK srun')
|
|
261
|
+
dating_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
|
|
262
|
+
args = vars(dating_parser.parse_args())
|
|
263
|
+
dating(args)
|
|
264
|
+
|
TreeSAK/dating_ss.py
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
import itertools
|
|
4
|
+
from ete3 import Tree
|
|
5
|
+
from Bio import AlignIO
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Dating_usage = '''
|
|
9
|
+
============================= Dating example commands =============================
|
|
10
|
+
|
|
11
|
+
# example commands
|
|
12
|
+
TreeSAK Dating_ss -deltall DeltaLL_stdout.txt -aod s11_marker_sets_by_DeltaLL -o s12_dating_wd -c 25-50-75-100 -mmn 20 -f
|
|
13
|
+
|
|
14
|
+
===================================================================================
|
|
15
|
+
'''
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sep_path_basename_ext(file_in):
|
|
19
|
+
file_path, file_name = os.path.split(file_in)
|
|
20
|
+
if file_path == '':
|
|
21
|
+
file_path = '.'
|
|
22
|
+
file_basename, file_extension = os.path.splitext(file_name)
|
|
23
|
+
return file_path, file_basename, file_extension
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def submit_js(js):
|
|
27
|
+
current_wd = os.getcwd()
|
|
28
|
+
js_path, js_basename, js_ext = sep_path_basename_ext(js)
|
|
29
|
+
os.chdir(js_path)
|
|
30
|
+
os.system('qsub %s%s' % (js_basename, js_ext))
|
|
31
|
+
os.chdir(current_wd)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def root_with_out_group(tree_file, out_group_txt, tree_file_rooted):
|
|
35
|
+
|
|
36
|
+
out_group_set = set()
|
|
37
|
+
for each_og in open(out_group_txt):
|
|
38
|
+
out_group_set.add(each_og.strip())
|
|
39
|
+
|
|
40
|
+
tre = Tree(tree_file, format=1)
|
|
41
|
+
out_group_lca = tre.get_common_ancestor(out_group_set)
|
|
42
|
+
tre.set_outgroup(out_group_lca)
|
|
43
|
+
tre.write(outfile=tree_file_rooted)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def replace_clades(main_tree, sub_tree, tree_out, quote_node_name):
|
|
47
|
+
|
|
48
|
+
tre_sub = Tree(sub_tree, format=1, quoted_node_names=quote_node_name)
|
|
49
|
+
subtree_leaf_name_list = tre_sub.get_leaf_names()
|
|
50
|
+
tre_main = Tree(main_tree)
|
|
51
|
+
lca = tre_main.get_common_ancestor(subtree_leaf_name_list)
|
|
52
|
+
|
|
53
|
+
if len(lca.get_leaf_names()) != len(subtree_leaf_name_list):
|
|
54
|
+
print('LCA of subtree leaves in main tree contain extra leaves, program exited!')
|
|
55
|
+
exit()
|
|
56
|
+
|
|
57
|
+
lca_p = lca.up
|
|
58
|
+
lca_p.remove_child(lca)
|
|
59
|
+
lca_p.add_child(tre_sub)
|
|
60
|
+
tre_main.write(outfile=tree_out, format=8, quoted_node_names=quote_node_name)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def prep_mcmctree_ctl(ctl_para_dict, mcmctree_ctl_file):
|
|
64
|
+
|
|
65
|
+
with open(mcmctree_ctl_file, 'w') as ctl_file_handle:
|
|
66
|
+
ctl_file_handle.write(' finetune = %s\n' % ctl_para_dict.get('seed', '-1'))
|
|
67
|
+
ctl_file_handle.write(' seqfile = %s\n' % ctl_para_dict['seqfile'])
|
|
68
|
+
ctl_file_handle.write(' treefile = %s\n' % ctl_para_dict['treefile'])
|
|
69
|
+
ctl_file_handle.write(' mcmcfile = %s\n' % ctl_para_dict['mcmcfile'])
|
|
70
|
+
ctl_file_handle.write(' outfile = %s\n' % ctl_para_dict['outfile'])
|
|
71
|
+
ctl_file_handle.write(' ndata = %s\n' % ctl_para_dict.get('ndata', 1))
|
|
72
|
+
ctl_file_handle.write(' seqtype = %s * 0: nucleotides; 1:codons; 2:AAs\n' % ctl_para_dict['seqtype'])
|
|
73
|
+
ctl_file_handle.write(' usedata = %s * 0: no data; 1:seq like; 2:normal approximation; 3:out.BV (in.BV)\n' % ctl_para_dict['usedata'])
|
|
74
|
+
ctl_file_handle.write(' clock = %s * 1: global clock; 2: independent rates; 3: correlated rates\n' % ctl_para_dict['clock'])
|
|
75
|
+
ctl_file_handle.write(' RootAge = %s * safe constraint on root age, used if no fossil for root.\n' % ctl_para_dict.get('RootAge', '<1.0'))
|
|
76
|
+
ctl_file_handle.write(' model = %s * 0:JC69, 1:K80, 2:F81, 3:F84, 4:HKY85\n' % ctl_para_dict.get('model', 0))
|
|
77
|
+
ctl_file_handle.write(' alpha = %s * alpha for gamma rates at sites\n' % ctl_para_dict.get('alpha', 0.5))
|
|
78
|
+
ctl_file_handle.write(' ncatG = %s * No. categories in discrete gamma\n' % ctl_para_dict.get('ncatG', 4))
|
|
79
|
+
ctl_file_handle.write(' cleandata = %s * remove sites with ambiguity data (1:yes, 0:no)?\n' % ctl_para_dict.get('cleandata', 0))
|
|
80
|
+
ctl_file_handle.write(' BDparas = %s * birth, death, sampling\n' % ctl_para_dict.get('BDparas', '1 1 0.1'))
|
|
81
|
+
ctl_file_handle.write(' kappa_gamma = %s * gamma prior for kappa\n' % ctl_para_dict.get('kappa_gamma', '6 2'))
|
|
82
|
+
ctl_file_handle.write(' alpha_gamma = %s * gamma prior for alpha\n' % ctl_para_dict.get('alpha_gamma', '1 1'))
|
|
83
|
+
ctl_file_handle.write(' rgene_gamma = %s * gammaDir prior for rate for genes\n' % ctl_para_dict.get('rgene_gamma', '1 50 1'))
|
|
84
|
+
ctl_file_handle.write(' sigma2_gamma = %s * gammaDir prior for sigma^2 (for clock=2 or 3)\n' % ctl_para_dict.get('sigma2_gamma', '1 10 1'))
|
|
85
|
+
ctl_file_handle.write(' finetune = %s * auto (0 or 1): times, musigma2, rates, mixing, paras, FossilErr\n' % ctl_para_dict.get('finetune', '1: .1 .1 .1 .1 .1 .1'))
|
|
86
|
+
ctl_file_handle.write(' print = %s * 0: no mcmc sample; 1: everything except branch rates 2: everything\n' % ctl_para_dict.get('print', 1))
|
|
87
|
+
ctl_file_handle.write(' burnin = %s\n' % ctl_para_dict.get('burnin', 50000))
|
|
88
|
+
ctl_file_handle.write(' sampfreq = %s\n' % ctl_para_dict.get('sampfreq', 5))
|
|
89
|
+
ctl_file_handle.write(' nsample = %s\n' % ctl_para_dict.get('nsample', 150000))
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_parameter_combinations(para_to_test_dict):
|
|
93
|
+
|
|
94
|
+
para_lol_name = []
|
|
95
|
+
para_lol_value = []
|
|
96
|
+
para_lol_name_with_value = []
|
|
97
|
+
for each_para in sorted(list(para_to_test_dict.keys())):
|
|
98
|
+
para_setting_list_name = []
|
|
99
|
+
para_setting_list_value = []
|
|
100
|
+
para_setting_list_name_with_value = []
|
|
101
|
+
for each_setting in sorted(para_to_test_dict[each_para]):
|
|
102
|
+
name_str = ('%s%s' % (each_para, each_setting)).replace(' ', '_')
|
|
103
|
+
para_setting_list_name.append(each_para)
|
|
104
|
+
para_setting_list_value.append(each_setting)
|
|
105
|
+
para_setting_list_name_with_value.append(name_str)
|
|
106
|
+
para_lol_name.append(para_setting_list_name)
|
|
107
|
+
para_lol_value.append(para_setting_list_value)
|
|
108
|
+
para_lol_name_with_value.append(para_setting_list_name_with_value)
|
|
109
|
+
|
|
110
|
+
all_combination_list_name = [p for p in itertools.product(*para_lol_name)]
|
|
111
|
+
all_combination_list_value = [p for p in itertools.product(*para_lol_value)]
|
|
112
|
+
all_combination_list_name_with_value = [p for p in itertools.product(*para_lol_name_with_value)]
|
|
113
|
+
all_combination_list_name_with_value_str = ['_'.join(i) for i in all_combination_list_name_with_value]
|
|
114
|
+
|
|
115
|
+
para_dod = dict()
|
|
116
|
+
element_index = 0
|
|
117
|
+
for each_combination in all_combination_list_name_with_value_str:
|
|
118
|
+
current_name_list = all_combination_list_name[element_index]
|
|
119
|
+
current_value_list = all_combination_list_value[element_index]
|
|
120
|
+
current_para_dict = dict()
|
|
121
|
+
for key, value in zip(current_name_list, current_value_list):
|
|
122
|
+
current_para_dict[key] = value
|
|
123
|
+
para_dod[each_combination] = current_para_dict
|
|
124
|
+
element_index += 1
|
|
125
|
+
|
|
126
|
+
return para_dod
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def fa2phy(fasta_in, phy_out):
|
|
130
|
+
|
|
131
|
+
alignment = AlignIO.read(fasta_in, 'fasta')
|
|
132
|
+
|
|
133
|
+
max_seq_id_len = 0
|
|
134
|
+
for each_seq in alignment:
|
|
135
|
+
seq_id_len = len(each_seq.id)
|
|
136
|
+
if seq_id_len > max_seq_id_len:
|
|
137
|
+
max_seq_id_len = seq_id_len
|
|
138
|
+
|
|
139
|
+
with open(phy_out, 'w') as msa_out_handle:
|
|
140
|
+
msa_out_handle.write('%s %s\n' % (len(alignment), alignment.get_alignment_length()))
|
|
141
|
+
for each_seq in alignment:
|
|
142
|
+
seq_id = each_seq.id
|
|
143
|
+
seq_id_with_space = '%s%s' % (seq_id, ' ' * (max_seq_id_len + 2 - len(seq_id)))
|
|
144
|
+
msa_out_handle.write('%s%s\n' % (seq_id_with_space, str(each_seq.seq)))
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def dating_ss(args):
|
|
148
|
+
|
|
149
|
+
deltall_stdout_txt = args['deltall']
|
|
150
|
+
aod = args['aod']
|
|
151
|
+
out_group_txt = args['og']
|
|
152
|
+
eu_tree = args['eu']
|
|
153
|
+
op_dir = args['o']
|
|
154
|
+
deltall_keep_pct_str = args['c']
|
|
155
|
+
min_marker_num = args['mmn']
|
|
156
|
+
force_overwrite = args['f']
|
|
157
|
+
root_age = args['ra']
|
|
158
|
+
submit_job = args['qsub']
|
|
159
|
+
para_to_test = args['to_test']
|
|
160
|
+
js_cpu_num = 1
|
|
161
|
+
quote_node_name = False
|
|
162
|
+
|
|
163
|
+
para_to_test_dict = dict()
|
|
164
|
+
for each_para in open(para_to_test):
|
|
165
|
+
each_para_split = each_para.strip().split()
|
|
166
|
+
para_list = each_para_split[1].split(',')
|
|
167
|
+
para_to_test_dict[each_para_split[0]] = para_list
|
|
168
|
+
print('Parameters to test: %s' % para_to_test_dict)
|
|
169
|
+
|
|
170
|
+
if os.path.isfile(eu_tree) is False:
|
|
171
|
+
print('%s not found, program exited!' % eu_tree)
|
|
172
|
+
exit()
|
|
173
|
+
|
|
174
|
+
deltall_keep_pct_list = [int(i) for i in deltall_keep_pct_str.split('-')]
|
|
175
|
+
deltall_stdout_path, deltall_stdout_basename, deltall_stdout_ext = sep_path_basename_ext(deltall_stdout_txt)
|
|
176
|
+
|
|
177
|
+
# create dir
|
|
178
|
+
if os.path.isdir(op_dir) is True:
|
|
179
|
+
if force_overwrite is True:
|
|
180
|
+
os.system('rm -r %s' % op_dir)
|
|
181
|
+
else:
|
|
182
|
+
print('output folder detected, program exited!')
|
|
183
|
+
exit()
|
|
184
|
+
os.system('mkdir %s' % op_dir)
|
|
185
|
+
|
|
186
|
+
# read in deltall_stdout_txt
|
|
187
|
+
deltall_op_dict = dict()
|
|
188
|
+
for each_line in open(deltall_stdout_txt):
|
|
189
|
+
if not ((each_line.startswith('WARNING:')) or (each_line.startswith('awk:'))):
|
|
190
|
+
each_line_split = each_line.strip().split('\t')
|
|
191
|
+
marker_id = each_line_split[0]
|
|
192
|
+
value = float(each_line_split[1])
|
|
193
|
+
if marker_id not in deltall_op_dict:
|
|
194
|
+
deltall_op_dict[marker_id] = [value]
|
|
195
|
+
else:
|
|
196
|
+
deltall_op_dict[marker_id].append(value)
|
|
197
|
+
|
|
198
|
+
# assigned score to marker
|
|
199
|
+
metric_1_dict = dict()
|
|
200
|
+
metric_2_dict = dict()
|
|
201
|
+
for each_marker in deltall_op_dict:
|
|
202
|
+
metric_1_value = float("{0:.2f}".format(deltall_op_dict[each_marker][0]))
|
|
203
|
+
metric_2_value = float("{0:.2f}".format(deltall_op_dict[each_marker][1]))
|
|
204
|
+
metric_1_dict[each_marker] = metric_1_value
|
|
205
|
+
metric_2_dict[each_marker] = metric_2_value
|
|
206
|
+
|
|
207
|
+
metric_1_dict_sorted = {k: v for k, v in sorted(metric_1_dict.items(), key=lambda item: item[1])[::-1]}
|
|
208
|
+
metric_2_dict_sorted = {k: v for k, v in sorted(metric_2_dict.items(), key=lambda item: item[1])}
|
|
209
|
+
|
|
210
|
+
metric_1_score_dict = dict()
|
|
211
|
+
metric_1_score = 1
|
|
212
|
+
for each_marker_1 in metric_1_dict_sorted:
|
|
213
|
+
metric_1_score_dict[each_marker_1] = metric_1_score
|
|
214
|
+
metric_1_score += 1
|
|
215
|
+
|
|
216
|
+
metric_2_score_dict = dict()
|
|
217
|
+
metric_2_score = 1
|
|
218
|
+
for each_marker_2 in metric_2_dict_sorted:
|
|
219
|
+
metric_2_score_dict[each_marker_2] = metric_2_score
|
|
220
|
+
metric_2_score += 1
|
|
221
|
+
|
|
222
|
+
overall_score_dict = dict()
|
|
223
|
+
for each_marker in deltall_op_dict:
|
|
224
|
+
metric_score_1 = metric_1_score_dict[each_marker]
|
|
225
|
+
metric_score_2 = metric_2_score_dict[each_marker]
|
|
226
|
+
metric_score_overall = metric_score_1 + metric_score_2
|
|
227
|
+
overall_score_dict[each_marker] = metric_score_overall
|
|
228
|
+
marker_list_sorted_by_deltall = [k for k, v in sorted(overall_score_dict.items(), key=lambda item: item[1])]
|
|
229
|
+
|
|
230
|
+
# get qualified marker list
|
|
231
|
+
for each_keep_pct in deltall_keep_pct_list:
|
|
232
|
+
marker_num_to_keep = round(len(marker_list_sorted_by_deltall)*each_keep_pct/100)
|
|
233
|
+
|
|
234
|
+
if marker_num_to_keep < min_marker_num:
|
|
235
|
+
print('Ignored DeltaLL cutoff at %s , the number of qualified markers (%s) less than %s' % (each_keep_pct, marker_num_to_keep, min_marker_num))
|
|
236
|
+
else:
|
|
237
|
+
prefix_base = '%s_DeltaLL_%s' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
238
|
+
aln_concatenated = '%s_DeltaLL_%s_concatenated.phy' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
239
|
+
aln_concatenated_in_aod_wd_fasta = '%s_DeltaLL_%s_concatenated.phy.fasta' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
240
|
+
c60_tree_file_rooted_with_time_final = '%s_DeltaLL_%s_rooted_with_time_final.treefile' % (deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
241
|
+
pwd_c60_tree_file = '%s/%s_DeltaLL_%s_iqtree_C60_PMSF/concatenated.treefile' % (aod, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
242
|
+
pwd_c60_tree_file_renamed = '%s/%s_DeltaLL_%s_raw.treefile' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
243
|
+
pwd_c60_tree_file_rooted = '%s/%s_DeltaLL_%s_rooted.treefile' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
244
|
+
pwd_c60_tree_file_rooted_with_time = '%s/%s_DeltaLL_%s_rooted_with_time.treefile' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct)
|
|
245
|
+
pwd_aln_concatenated_in_aod_wd_fasta = '%s/%s' % (aod, aln_concatenated_in_aod_wd_fasta)
|
|
246
|
+
pwd_aln_concatenated_in_op_wd_phylip = '%s/%s' % (op_dir, aln_concatenated)
|
|
247
|
+
pwd_c60_tree_file_rooted_with_time_final = '%s/%s' % (op_dir, c60_tree_file_rooted_with_time_final)
|
|
248
|
+
get_BV_wd = '%s/%s_get_BV_wd' % (op_dir, prefix_base)
|
|
249
|
+
pwd_aln_concatenated_in_bv_wd_phylip = '%s/%s' % (get_BV_wd, aln_concatenated)
|
|
250
|
+
|
|
251
|
+
fa2phy(pwd_aln_concatenated_in_aod_wd_fasta, pwd_aln_concatenated_in_op_wd_phylip)
|
|
252
|
+
os.system('cp %s %s' % (pwd_c60_tree_file, pwd_c60_tree_file_renamed))
|
|
253
|
+
|
|
254
|
+
# root genome tree with outgroup
|
|
255
|
+
root_with_out_group(pwd_c60_tree_file_renamed, out_group_txt, pwd_c60_tree_file_rooted)
|
|
256
|
+
|
|
257
|
+
# add time constraints
|
|
258
|
+
replace_clades(pwd_c60_tree_file_rooted, eu_tree, pwd_c60_tree_file_rooted_with_time, quote_node_name)
|
|
259
|
+
|
|
260
|
+
# remove "NoName" from the rooted tree with time constraints
|
|
261
|
+
tree_str = open(pwd_c60_tree_file_rooted_with_time).readline().strip().replace('NoName', '')
|
|
262
|
+
|
|
263
|
+
# add root age
|
|
264
|
+
tree_str = tree_str.replace(';', '<%s;' % root_age)
|
|
265
|
+
tre_object = Tree(tree_str, format=8, quoted_node_names=quote_node_name)
|
|
266
|
+
with open(pwd_c60_tree_file_rooted_with_time_final, 'w') as pwd_c60_tree_file_rooted_with_time_final_hanlde:
|
|
267
|
+
pwd_c60_tree_file_rooted_with_time_final_hanlde.write('%s\t1\n' % len(tre_object.get_leaf_names()))
|
|
268
|
+
pwd_c60_tree_file_rooted_with_time_final_hanlde.write(tree_str.replace('""', '') + '\n')
|
|
269
|
+
#pwd_c60_tree_file_rooted_with_time_final_hanlde.write(tree_str + '\n')
|
|
270
|
+
|
|
271
|
+
# rm tmp tree files
|
|
272
|
+
os.system('rm %s' % pwd_c60_tree_file_renamed)
|
|
273
|
+
os.system('rm %s' % pwd_c60_tree_file_rooted)
|
|
274
|
+
os.system('rm %s' % pwd_c60_tree_file_rooted_with_time)
|
|
275
|
+
|
|
276
|
+
# get BV file
|
|
277
|
+
os.mkdir(get_BV_wd)
|
|
278
|
+
fa2phy(pwd_aln_concatenated_in_aod_wd_fasta, pwd_aln_concatenated_in_bv_wd_phylip) # sequence in phylip format need to be in one line
|
|
279
|
+
os.system('cp %s %s/' % (pwd_c60_tree_file_rooted_with_time_final, get_BV_wd))
|
|
280
|
+
|
|
281
|
+
get_BV_js = '%s/%s_get_BV.sh' % (op_dir, prefix_base)
|
|
282
|
+
get_BV_mcmctree_ctl = '%s_get_BV_mcmctree.ctl' % (prefix_base)
|
|
283
|
+
pwd_get_BV_mcmctree_ctl = '%s/%s' % (get_BV_wd, get_BV_mcmctree_ctl)
|
|
284
|
+
|
|
285
|
+
get_BV_para_dict = dict()
|
|
286
|
+
get_BV_para_dict['seqfile'] = aln_concatenated
|
|
287
|
+
get_BV_para_dict['treefile'] = c60_tree_file_rooted_with_time_final
|
|
288
|
+
get_BV_para_dict['mcmcfile'] = '%s_mcmc.txt' % prefix_base
|
|
289
|
+
get_BV_para_dict['outfile'] = '%s_out.txt' % prefix_base
|
|
290
|
+
get_BV_para_dict['seqtype'] = '2'
|
|
291
|
+
get_BV_para_dict['usedata'] = '3'
|
|
292
|
+
get_BV_para_dict['clock'] = '3'
|
|
293
|
+
prep_mcmctree_ctl(get_BV_para_dict, pwd_get_BV_mcmctree_ctl)
|
|
294
|
+
|
|
295
|
+
with open(get_BV_js, 'w') as get_BV_js_handle:
|
|
296
|
+
get_BV_js_handle.write('#!/bin/bash\n#SBATCH --ntasks 1\n#SBATCH --cpus-per-task %s\n\n' % js_cpu_num)
|
|
297
|
+
get_BV_js_handle.write('cd %s/%s\n' % (os.getcwd(), get_BV_wd))
|
|
298
|
+
get_BV_js_handle.write('mcmctree %s\n' % get_BV_mcmctree_ctl)
|
|
299
|
+
|
|
300
|
+
# prepare files for dating
|
|
301
|
+
para_dod = get_parameter_combinations(para_to_test_dict)
|
|
302
|
+
for para_combination in para_dod:
|
|
303
|
+
mcmctree_ctl = '%s_%s_mcmctree.ctl' % (prefix_base, para_combination)
|
|
304
|
+
current_dating_wd = '%s/%s_DeltaLL_%s_%s_dating_wd' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct, para_combination)
|
|
305
|
+
pwd_mcmctree_ctl = '%s/%s_%s_mcmctree.ctl' % (current_dating_wd, prefix_base, para_combination)
|
|
306
|
+
js_mcmctree = '%s/js_%s_DeltaLL_%s_%s.sh' % (op_dir, deltall_stdout_basename.split('_DeltaLL_stdout')[0], each_keep_pct, para_combination)
|
|
307
|
+
pwd_aln_in_dating_wd = '%s/%s' % (current_dating_wd, aln_concatenated)
|
|
308
|
+
|
|
309
|
+
# create dating wd and copy tree and alignment files into it
|
|
310
|
+
os.mkdir(current_dating_wd)
|
|
311
|
+
fa2phy(pwd_aln_concatenated_in_aod_wd_fasta, pwd_aln_in_dating_wd) # sequence in phylip format need to be in one line
|
|
312
|
+
os.system('cp %s %s/' % (pwd_c60_tree_file_rooted_with_time_final, current_dating_wd))
|
|
313
|
+
|
|
314
|
+
current_para_dict = para_dod[para_combination]
|
|
315
|
+
current_para_dict['seqfile'] = aln_concatenated
|
|
316
|
+
current_para_dict['treefile'] = c60_tree_file_rooted_with_time_final
|
|
317
|
+
current_para_dict['mcmcfile'] = '%s_%s_mcmc.txt' % (prefix_base, para_combination)
|
|
318
|
+
current_para_dict['outfile'] = '%s_%s_out.txt' % (prefix_base, para_combination)
|
|
319
|
+
current_para_dict['seqtype'] = '2'
|
|
320
|
+
current_para_dict['usedata'] = '2'
|
|
321
|
+
|
|
322
|
+
prep_mcmctree_ctl(current_para_dict, pwd_mcmctree_ctl)
|
|
323
|
+
|
|
324
|
+
with open(js_mcmctree, 'w') as js_mcmctree_handle:
|
|
325
|
+
js_mcmctree_handle.write('#!/bin/bash\n\n')
|
|
326
|
+
js_mcmctree_handle.write('cd %s/%s\n' % (os.getcwd(), current_dating_wd))
|
|
327
|
+
js_mcmctree_handle.write('cp ../%s_get_BV_wd/out.BV in.BV\n' % prefix_base)
|
|
328
|
+
js_mcmctree_handle.write('mcmctree %s\n' % mcmctree_ctl)
|
|
329
|
+
print('Job script for performing dating exported to %s' % js_mcmctree)
|
|
330
|
+
|
|
331
|
+
if submit_job is True:
|
|
332
|
+
submit_js(get_BV_js)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
if __name__ == '__main__':
|
|
336
|
+
|
|
337
|
+
parser = argparse.ArgumentParser()
|
|
338
|
+
parser.add_argument('-deltall', required=True, help='DeltaLL stdout')
|
|
339
|
+
parser.add_argument('-aod', required=True, help='AssessMarkerDeltaLL output dir')
|
|
340
|
+
parser.add_argument('-og', required=True, help='outgroup leaves, one id per line')
|
|
341
|
+
parser.add_argument('-eu', required=True, help='EU tree with time constraints')
|
|
342
|
+
parser.add_argument('-o', required=True, help='dating wd')
|
|
343
|
+
parser.add_argument('-c', required=False, default='25-50-75-100', help='cutoffs, default: 25-50-75-100')
|
|
344
|
+
parser.add_argument('-mmn', required=False, default=20, type=int, help='minimal marker number, default: 20')
|
|
345
|
+
parser.add_argument('-ra', required=False, default=45, type=int, help='root age, default: 45')
|
|
346
|
+
parser.add_argument('-qsub', required=False, action="store_true", help='submit job scripts for getting in.BV')
|
|
347
|
+
parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
|
|
348
|
+
parser.add_argument('-to_test', required=True, help='Settings to test')
|
|
349
|
+
args = vars(parser.parse_args())
|
|
350
|
+
dating_ss(args)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
'''
|
|
354
|
+
|
|
355
|
+
cd /Users/songweizhi/Desktop/dating_test
|
|
356
|
+
python3 /Users/songweizhi/PycharmProjects/TreeSAK/TreeSAK/dating_ss.py -deltall Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s10_assess_marker_deltaLL/PA_75_DeltaLL_stdout.txt -aod Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s11_marker_sets_by_DeltaLL -og out_group.txt -eu 27.nwk -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s12_dating_wd -c 25-50-75-100 -mmn 20 -f
|
|
357
|
+
|
|
358
|
+
cd /home-user/wzsong/DateArTree
|
|
359
|
+
python3 dating_ss.py -deltall Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s10_assess_marker_deltaLL/PA_75_DeltaLL_stdout.txt -aod Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s11_marker_sets_by_DeltaLL -og out_group.txt -eu 27.nwk -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30/s12_dating_wd -c 25-50-75-100 -mmn 20 -f
|
|
360
|
+
|
|
361
|
+
'''
|