treesak 1.53.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +113 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/C60SR4.nex +127 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +299 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +115 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB.py +371 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +21 -0
- TreeSAK/SplitScore1.py +177 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +608 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +164 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/batch_itol.py +171 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/filter_rename_ar53.py +118 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_gene_tree_by_gnm.py +34 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +78 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/tmp_4.py +43 -0
- TreeSAK/tmp_5.py +12 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.53.3.data/scripts/TreeSAK +955 -0
- treesak-1.53.3.dist-info/LICENSE +674 -0
- treesak-1.53.3.dist-info/METADATA +27 -0
- treesak-1.53.3.dist-info/RECORD +131 -0
- treesak-1.53.3.dist-info/WHEEL +5 -0
- treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/Dir.rb
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
require 'find'
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
################################################################################
|
|
5
|
+
class Dir
|
|
6
|
+
def self.mkdirs(path)
|
|
7
|
+
if(!File.directory?(path))
|
|
8
|
+
if(!mkdirs(File.dirname(path)))
|
|
9
|
+
return false;
|
|
10
|
+
end
|
|
11
|
+
mkdir(path)
|
|
12
|
+
end
|
|
13
|
+
return true
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
################################################################################
|
|
19
|
+
def mkdir_with_force(outdir, is_force=false, is_tolerate=false)
|
|
20
|
+
if outdir.class != String
|
|
21
|
+
raise "outdir wrong? Exiting ......"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if ! Dir.exists?(outdir)
|
|
25
|
+
`mkdir -p #{outdir}`
|
|
26
|
+
else
|
|
27
|
+
if is_tolerate
|
|
28
|
+
;
|
|
29
|
+
elsif is_force
|
|
30
|
+
`rm -rf #{outdir}`
|
|
31
|
+
`mkdir -p #{outdir}`
|
|
32
|
+
else
|
|
33
|
+
raise "The outdir #{outdir} has already existed!"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read_infiles(indir, suffix='', is_all_subfolder=false)
|
|
40
|
+
infiles = Array.new
|
|
41
|
+
if ! is_all_subfolder
|
|
42
|
+
Dir.foreach(indir) do |b|
|
|
43
|
+
next if b =~ /^\./
|
|
44
|
+
if suffix.is_a?(String)
|
|
45
|
+
if suffix != ''
|
|
46
|
+
next if b !~ /#{suffix}$/
|
|
47
|
+
end
|
|
48
|
+
elsif suffix.is_a?(Array)
|
|
49
|
+
next unless suffix.any?{|i| b =~ /#{i}$/ }
|
|
50
|
+
end
|
|
51
|
+
infiles << File.join(indir, b)
|
|
52
|
+
end
|
|
53
|
+
else
|
|
54
|
+
Find.find(indir) do |path|
|
|
55
|
+
next if File.directory?(path)
|
|
56
|
+
next if File.basename(path) =~ /^\./
|
|
57
|
+
infiles << path if suffix.is_a?(String) ? path =~ /\.#{suffix}$/ : suffix.any?{|i| path =~ /#{i}$/ }
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
return(infiles)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def getFilesBySuffices(indir, suffices)
|
|
65
|
+
files = Array.new
|
|
66
|
+
infiles = read_infiles(indir)
|
|
67
|
+
infiles.each do |infile|
|
|
68
|
+
if suffices.include?(File.extname(infile))
|
|
69
|
+
files << infile
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
return(files)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_file_path(file)
|
|
77
|
+
path = File.symlink?(file) ? File.readlink(file) : file
|
|
78
|
+
return(path)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
################################################################################
|
|
82
|
+
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import argparse
|
|
4
|
+
from Bio import SeqIO
|
|
5
|
+
import multiprocessing as mp
|
|
6
|
+
from distutils.spawn import find_executable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
ExtractMarkerSeq_usage = '''
|
|
10
|
+
============================ ExtractMarkerSeq example commands ============================
|
|
11
|
+
|
|
12
|
+
Dependencies: blastp
|
|
13
|
+
|
|
14
|
+
BioSAK ExtractMarkerSeq -m marker_ref_seq -mx fa -aa faa_files -aax faa -o op_dir -e "1e-30" -t 6
|
|
15
|
+
|
|
16
|
+
===========================================================================================
|
|
17
|
+
'''
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def check_dependencies(program_list):
|
|
21
|
+
not_detected_programs = []
|
|
22
|
+
for needed_program in program_list:
|
|
23
|
+
if find_executable(needed_program) is None:
|
|
24
|
+
not_detected_programs.append(needed_program)
|
|
25
|
+
|
|
26
|
+
if not_detected_programs != []:
|
|
27
|
+
print('%s not found, program exited!' % ','.join(not_detected_programs))
|
|
28
|
+
exit()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def exe_cmds(cmd_list, num_threads):
|
|
32
|
+
print('Running %s commands with %s cores' % (len(cmd_list), num_threads))
|
|
33
|
+
pool = mp.Pool(processes=num_threads)
|
|
34
|
+
pool.map(os.system, cmd_list)
|
|
35
|
+
pool.close()
|
|
36
|
+
pool.join()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def sep_path_basename_ext(file_in):
|
|
40
|
+
file_path, file_name = os.path.split(file_in)
|
|
41
|
+
if file_path == '':
|
|
42
|
+
file_path = '.'
|
|
43
|
+
file_basename, file_extension = os.path.splitext(file_name)
|
|
44
|
+
return file_path, file_basename, file_extension
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def select_seq(seq_file, id_file,select_option, output_file, one_line, in_fastq):
|
|
48
|
+
|
|
49
|
+
# get provided id list
|
|
50
|
+
seq_id_list = set()
|
|
51
|
+
for seq_id in open(id_file):
|
|
52
|
+
seq_id_list.add(seq_id.strip())
|
|
53
|
+
|
|
54
|
+
seq_in_format = 'fasta'
|
|
55
|
+
if in_fastq is True:
|
|
56
|
+
seq_in_format = 'fastq'
|
|
57
|
+
|
|
58
|
+
# extract sequences
|
|
59
|
+
output_file_handle = open(output_file, 'w')
|
|
60
|
+
for seq_record in SeqIO.parse(seq_file, seq_in_format):
|
|
61
|
+
seq_id = seq_record.id
|
|
62
|
+
if select_option == 1:
|
|
63
|
+
if seq_id in seq_id_list:
|
|
64
|
+
|
|
65
|
+
if in_fastq is False:
|
|
66
|
+
if one_line is False:
|
|
67
|
+
SeqIO.write(seq_record, output_file_handle, 'fasta')
|
|
68
|
+
else:
|
|
69
|
+
SeqIO.write(seq_record, output_file_handle, 'fasta-2line')
|
|
70
|
+
else:
|
|
71
|
+
SeqIO.write(seq_record, output_file_handle, 'fastq')
|
|
72
|
+
|
|
73
|
+
if select_option == 0:
|
|
74
|
+
if seq_id not in seq_id_list:
|
|
75
|
+
|
|
76
|
+
if in_fastq is False:
|
|
77
|
+
if one_line is False:
|
|
78
|
+
SeqIO.write(seq_record, output_file_handle, 'fasta')
|
|
79
|
+
else:
|
|
80
|
+
SeqIO.write(seq_record, output_file_handle, 'fasta-2line')
|
|
81
|
+
else:
|
|
82
|
+
SeqIO.write(seq_record, output_file_handle, 'fastq')
|
|
83
|
+
output_file_handle.close()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def ExtractMarkerSeq(args):
|
|
87
|
+
|
|
88
|
+
marker_seq_dir = args['m']
|
|
89
|
+
marker_seq_ext = args['mx']
|
|
90
|
+
faa_file_dir = args['aa']
|
|
91
|
+
faa_file_ext = args['aax']
|
|
92
|
+
op_dir = args['o']
|
|
93
|
+
e_value = args['e']
|
|
94
|
+
num_of_threads = args['t']
|
|
95
|
+
force_overwrite = args['f']
|
|
96
|
+
|
|
97
|
+
# check dependencies
|
|
98
|
+
check_dependencies(['blastp'])
|
|
99
|
+
|
|
100
|
+
# get marker id set
|
|
101
|
+
marker_seq_re = '%s/*.%s' % (marker_seq_dir, marker_seq_ext)
|
|
102
|
+
marker_seq_list = [os.path.basename(file_name) for file_name in glob.glob(marker_seq_re)]
|
|
103
|
+
marker_id_set = set()
|
|
104
|
+
for each_marker_seq_file in marker_seq_list:
|
|
105
|
+
marker_seq_path, marker_seq_basename, marker_seq_ext = sep_path_basename_ext(each_marker_seq_file)
|
|
106
|
+
marker_id_set.add(marker_seq_basename)
|
|
107
|
+
|
|
108
|
+
# get gnm id list
|
|
109
|
+
faa_file_re = '%s/*.%s' % (faa_file_dir, faa_file_ext)
|
|
110
|
+
faa_file_list = [os.path.basename(file_name) for file_name in glob.glob(faa_file_re)]
|
|
111
|
+
gnm_set = set()
|
|
112
|
+
for each_faa_file in faa_file_list:
|
|
113
|
+
faa_path, faa_basename, faa_ext = sep_path_basename_ext(each_faa_file)
|
|
114
|
+
gnm_set.add(faa_basename)
|
|
115
|
+
gnm_id_list_sorted = sorted([i for i in gnm_set])
|
|
116
|
+
|
|
117
|
+
# define output dir
|
|
118
|
+
blastp_cmd_txt = '%s/blastp_cmds_%s.txt' % (op_dir, (len(gnm_id_list_sorted)*len(marker_id_set)))
|
|
119
|
+
pwd_combined_protein = '%s/combined.faa' % op_dir
|
|
120
|
+
blast_op_dir = '%s/s01_blast_op' % op_dir
|
|
121
|
+
best_hit_id_by_marker_dir = '%s/s02_identified_marker_id' % op_dir
|
|
122
|
+
best_hit_seq_by_marker_dir = '%s/s03_identified_marker_seq' % op_dir
|
|
123
|
+
best_hit_seq_by_marker_dir_renamed = '%s/s04_identified_marker_seq_renamed' % op_dir
|
|
124
|
+
|
|
125
|
+
# create folder
|
|
126
|
+
if force_overwrite is True:
|
|
127
|
+
if os.path.isdir(op_dir) is True:
|
|
128
|
+
os.system('rm -r %s' % op_dir)
|
|
129
|
+
os.system('mkdir %s' % op_dir)
|
|
130
|
+
os.system('mkdir %s' % blast_op_dir)
|
|
131
|
+
else:
|
|
132
|
+
if os.path.isdir(op_dir) is False:
|
|
133
|
+
os.system('mkdir %s' % op_dir)
|
|
134
|
+
if os.path.isdir(blast_op_dir) is False:
|
|
135
|
+
os.system('mkdir %s' % blast_op_dir)
|
|
136
|
+
|
|
137
|
+
os.system('cat %s/*.%s > %s' % (faa_file_dir, faa_file_ext, pwd_combined_protein))
|
|
138
|
+
|
|
139
|
+
# get blastp command
|
|
140
|
+
blast_cmd_list = []
|
|
141
|
+
blast_op_to_cmd_dict = dict()
|
|
142
|
+
blastp_cmd_txt_handle = open(blastp_cmd_txt, 'w')
|
|
143
|
+
for gnm_id in gnm_id_list_sorted:
|
|
144
|
+
for each_cog in marker_id_set:
|
|
145
|
+
pwd_blast_op = '%s/%s_vs_%s_blastp.txt' % (blast_op_dir, gnm_id, each_cog)
|
|
146
|
+
blastp_cmd = 'blastp -subject %s/%s.fa -evalue %s -outfmt 6 -query %s/%s.faa -out %s' % (marker_seq_dir, each_cog, e_value, faa_file_dir, gnm_id, pwd_blast_op)
|
|
147
|
+
blast_op_to_cmd_dict[pwd_blast_op] = blastp_cmd
|
|
148
|
+
blastp_cmd_txt_handle.write(blastp_cmd + '\n')
|
|
149
|
+
blast_cmd_list.append(blastp_cmd)
|
|
150
|
+
blastp_cmd_txt_handle.close()
|
|
151
|
+
|
|
152
|
+
# run blastp
|
|
153
|
+
if force_overwrite is True:
|
|
154
|
+
exe_cmds(blast_cmd_list, num_of_threads)
|
|
155
|
+
else:
|
|
156
|
+
cmds_to_rerun = []
|
|
157
|
+
num_of_good_ones = 0
|
|
158
|
+
for each_blast_op in blast_op_to_cmd_dict:
|
|
159
|
+
|
|
160
|
+
look_good = False
|
|
161
|
+
if os.path.isfile(each_blast_op) is True:
|
|
162
|
+
look_good = True
|
|
163
|
+
num_of_good_ones += 1
|
|
164
|
+
|
|
165
|
+
if look_good is False:
|
|
166
|
+
cmds_to_rerun.append(blast_op_to_cmd_dict[each_blast_op])
|
|
167
|
+
|
|
168
|
+
print('Detected blastp outputs: %s' % num_of_good_ones)
|
|
169
|
+
exe_cmds(cmds_to_rerun, num_of_threads)
|
|
170
|
+
|
|
171
|
+
# get best_hit_dict_by_marker
|
|
172
|
+
best_hit_to_gnm_dict = dict()
|
|
173
|
+
best_hit_dict_by_marker = dict()
|
|
174
|
+
for gnm_id in gnm_id_list_sorted:
|
|
175
|
+
for each_cog in marker_id_set:
|
|
176
|
+
current_blastp_op = '%s/%s_vs_%s_blastp.txt' % (blast_op_dir, gnm_id, each_cog)
|
|
177
|
+
# get best hit
|
|
178
|
+
if os.path.isfile(current_blastp_op) is True:
|
|
179
|
+
best_hit_gene = ''
|
|
180
|
+
best_hit_score = 0
|
|
181
|
+
for each_line in open(current_blastp_op):
|
|
182
|
+
each_line_split = each_line.strip().split('\t')
|
|
183
|
+
query_id = each_line_split[0]
|
|
184
|
+
bit_score = float(each_line_split[11])
|
|
185
|
+
if bit_score > best_hit_score:
|
|
186
|
+
best_hit_score = bit_score
|
|
187
|
+
best_hit_gene = query_id
|
|
188
|
+
|
|
189
|
+
if best_hit_gene != '':
|
|
190
|
+
best_hit_to_gnm_dict[best_hit_gene] = gnm_id
|
|
191
|
+
|
|
192
|
+
if each_cog not in best_hit_dict_by_marker:
|
|
193
|
+
best_hit_dict_by_marker[each_cog] = [best_hit_gene]
|
|
194
|
+
else:
|
|
195
|
+
best_hit_dict_by_marker[each_cog].append(best_hit_gene)
|
|
196
|
+
|
|
197
|
+
# create output dir
|
|
198
|
+
if os.path.isdir(best_hit_id_by_marker_dir) is False:
|
|
199
|
+
os.system('mkdir %s' % best_hit_id_by_marker_dir)
|
|
200
|
+
if os.path.isdir(best_hit_seq_by_marker_dir) is False:
|
|
201
|
+
os.system('mkdir %s' % best_hit_seq_by_marker_dir)
|
|
202
|
+
if os.path.isdir(best_hit_seq_by_marker_dir_renamed) is False:
|
|
203
|
+
os.system('mkdir %s' % best_hit_seq_by_marker_dir_renamed)
|
|
204
|
+
|
|
205
|
+
# write out best hits and extract sequences
|
|
206
|
+
processing_index = 1
|
|
207
|
+
for each_marker in best_hit_dict_by_marker:
|
|
208
|
+
print('Extracting marker sequence %s/%s: %s' % (processing_index, len(best_hit_dict_by_marker), each_marker))
|
|
209
|
+
processing_index += 1
|
|
210
|
+
|
|
211
|
+
current_m_hit_list = best_hit_dict_by_marker[each_marker]
|
|
212
|
+
marker_hits_txt = ('%s/%s.txt' % (best_hit_id_by_marker_dir, each_marker)).replace(':', '')
|
|
213
|
+
marker_hits_seq = ('%s/%s.fa' % (best_hit_seq_by_marker_dir, each_marker)).replace(':', '')
|
|
214
|
+
marker_hits_seq_renamed = ('%s/%s.fa' % (best_hit_seq_by_marker_dir_renamed, each_marker)).replace(':', '')
|
|
215
|
+
|
|
216
|
+
with open(marker_hits_txt, 'w') as marker_hits_txt_handle:
|
|
217
|
+
marker_hits_txt_handle.write('\n'.join(current_m_hit_list))
|
|
218
|
+
|
|
219
|
+
# extract sequences
|
|
220
|
+
select_seq(pwd_combined_protein, marker_hits_txt, 1, marker_hits_seq, True, False)
|
|
221
|
+
|
|
222
|
+
# rename sequences
|
|
223
|
+
marker_hits_seq_renamed_handle = open(marker_hits_seq_renamed, 'w')
|
|
224
|
+
for each_seq in SeqIO.parse(marker_hits_seq, 'fasta'):
|
|
225
|
+
seq_id = each_seq.id
|
|
226
|
+
seq_gnm = best_hit_to_gnm_dict[seq_id]
|
|
227
|
+
marker_hits_seq_renamed_handle.write('>%s\n' % seq_gnm)
|
|
228
|
+
marker_hits_seq_renamed_handle.write('%s\n' % str(each_seq.seq))
|
|
229
|
+
marker_hits_seq_renamed_handle.close()
|
|
230
|
+
|
|
231
|
+
print('Done!')
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
if __name__ == '__main__':
|
|
235
|
+
|
|
236
|
+
# initialize the options parser
|
|
237
|
+
parser = argparse.ArgumentParser()
|
|
238
|
+
parser.add_argument('-m', required=True, help='marker seq dir')
|
|
239
|
+
parser.add_argument('-mx', required=True, help='marker seq ext')
|
|
240
|
+
parser.add_argument('-aa', required=True, help='faa file dir')
|
|
241
|
+
parser.add_argument('-aax', required=True, help='faa file ext')
|
|
242
|
+
parser.add_argument('-o', required=True, help='output dir')
|
|
243
|
+
parser.add_argument('-e', required=True, default=1e-30, help='e-value cutoff, default: 1e-30')
|
|
244
|
+
parser.add_argument('-t', required=True, type=int, help='num of threads')
|
|
245
|
+
parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
|
|
246
|
+
args = vars(parser.parse_args())
|
|
247
|
+
ExtractMarkerSeq(args)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
'''
|
|
251
|
+
|
|
252
|
+
conda activate mypy3env
|
|
253
|
+
cd /home-user/wzsong/DateArTree
|
|
254
|
+
python3 MarkerRef2Tree.py -m Marker_set_2_Betts_2018_29_arCOG -mx fa -aa /home-user/wzsong/DateArTree/01_genome_selection_Prokka/d__Archaea_o_rs_133_gnms_plus_27_mito_faa_files -aax faa -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30_demo -e 30 -t 24 -pl /home-user/wzsong/Scripts/catfasta2phyml.pl
|
|
255
|
+
submitHPC.sh --cmd "python3 MarkerRef2Tree.py -m Marker_set_2_Betts_2018_29_arCOG -mx fa -aa /home-user/wzsong/DateArTree/01_genome_selection_Prokka/d__Archaea_o_rs_133_gnms_plus_27_mito_faa_files -aax faa -g /home-user/wzsong/DateArTree/gnm_group.txt -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30_demo -e 30 -t 24 -pl /home-user/wzsong/Scripts/catfasta2phyml.pl" -n 24 -c Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30_demo
|
|
256
|
+
|
|
257
|
+
cd /home-user/wzsong/DateArTree
|
|
258
|
+
python3 MarkerRef2Tree.py -m Marker_set_2_Betts_2018_29_arCOG -mx fa -aa /home-user/wzsong/DateArTree/01_genome_selection_Prokka/d__Archaea_o_rs_133_gnms_plus_27_mito_faa_files -aax faa -g /home-user/wzsong/DateArTree/gnm_group.txt -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30_demo -e 30 -t 12 -pl /home-user/wzsong/Scripts/catfasta2phyml.pl -g gnm_group.txt -skip_align_trim -jst 6 -qsub
|
|
259
|
+
|
|
260
|
+
cd /Users/songweizhi/Desktop/demo
|
|
261
|
+
python3 /Users/songweizhi/PycharmProjects/TreeSAK/TreeSAK/MarkerRef2Tree.py -m Marker_set_2_Betts_2018_29_arCOG -mx fa -aa d__Archaea_o_rs_133_gnms_plus_27_mito_faa_files -aax faa -g gnm_group.txt -o Marker_set_2_Betts_2018_29_arCOG_Marker2Tree_e30_demo -e 30 -t 10 -pl /Users/songweizhi/Scripts/catfasta2phyml.pl -g gnm_group.txt -skip_align_trim -jst 6 -qsub
|
|
262
|
+
|
|
263
|
+
'''
|