treesak 1.51.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of treesak might be problematic. Click here for more details.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +130 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +290 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +106 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/RootTreeGTDB226.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +19 -0
- TreeSAK/SplitScore1.py +178 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +597 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +158 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +77 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.51.2.data/scripts/TreeSAK +950 -0
- treesak-1.51.2.dist-info/LICENSE +674 -0
- treesak-1.51.2.dist-info/METADATA +27 -0
- treesak-1.51.2.dist-info/RECORD +125 -0
- treesak-1.51.2.dist-info/WHEEL +5 -0
- treesak-1.51.2.dist-info/top_level.txt +1 -0
TreeSAK/FigTree.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
from ete3 import Tree
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
FigTree_usage = '''
|
|
7
|
+
====================== FigTree example commands ======================
|
|
8
|
+
|
|
9
|
+
TreeSAK FigTree -h
|
|
10
|
+
|
|
11
|
+
======================================================================
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def FigTree(args):
|
|
16
|
+
|
|
17
|
+
input_txt_file = args['i']
|
|
18
|
+
op_txt = args['o']
|
|
19
|
+
|
|
20
|
+
if os.path.isfile(input_txt_file) is False:
|
|
21
|
+
print('Metadata file not found, program exited!')
|
|
22
|
+
exit()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
if __name__ == '__main__':
|
|
26
|
+
|
|
27
|
+
FigTree_parser = argparse.ArgumentParser(usage=FigTree_usage)
|
|
28
|
+
FigTree_parser.add_argument('-i', required=True, help='input metadata')
|
|
29
|
+
FigTree_parser.add_argument('-tree', required=False, default=None, help='gene id, in tree file')
|
|
30
|
+
FigTree_parser.add_argument('-txt', required=False, default=None, help='gene id, in txt file')
|
|
31
|
+
FigTree_parser.add_argument('-o', required=True, help='output metadata')
|
|
32
|
+
FigTree_parser.add_argument('-na', required=False, action='store_true', help='include leaves with na values')
|
|
33
|
+
args = vars(FigTree_parser.parse_args())
|
|
34
|
+
FigTree(args)
|
TreeSAK/GTDB_tree.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
GTDB_tree_usage = '''
|
|
6
|
+
======================== GTDB_tree example command ========================
|
|
7
|
+
|
|
8
|
+
export GTDBTK_DATA_PATH=/scratch/PI/boqianpy/Database/gtdb_r220/release220
|
|
9
|
+
TreeSAK GTDB_tree -p Demo -i gnm_folder -x fa -t 12
|
|
10
|
+
|
|
11
|
+
# This is a wrapper for the following commands
|
|
12
|
+
gtdbtk identify --genome_dir gnm_folder -x fa --out_dir op_dir --cpus 12
|
|
13
|
+
gtdbtk align --identify_dir Demo_op_dir --out_dir op_dir --cpus 12
|
|
14
|
+
gtdbtk infer --msa_file Demo_op_dir/align/gtdbtk.bac120.user_msa.fasta.gz --out_dir op_dir --cpus 12 --prefix Demo_bac120
|
|
15
|
+
gtdbtk infer --msa_file Demo_op_dir/align/gtdbtk.ar53.user_msa.fasta.gz --out_dir op_dir --cpus 12 --prefix Demo_ar53
|
|
16
|
+
|
|
17
|
+
===========================================================================
|
|
18
|
+
'''
|
|
19
|
+
|
|
20
|
+
def GTDB_tree(args):
|
|
21
|
+
|
|
22
|
+
input_gnm_dir = args['i']
|
|
23
|
+
output_prefix = args['p']
|
|
24
|
+
file_extension = args['x']
|
|
25
|
+
num_threads = args['t']
|
|
26
|
+
|
|
27
|
+
output_dir = '%s_GTDB_tree' % output_prefix
|
|
28
|
+
msa_bac120_gz = '%s/align/gtdbtk.bac120.user_msa.fasta.gz' % output_dir
|
|
29
|
+
msa_bac120 = '%s/align/gtdbtk.bac120.user_msa.fasta' % output_dir
|
|
30
|
+
msa_ar53_gz = '%s/align/gtdbtk.ar53.user_msa.fasta.gz' % output_dir
|
|
31
|
+
msa_ar53 = '%s/align/gtdbtk.ar53.user_msa.fasta' % output_dir
|
|
32
|
+
|
|
33
|
+
cmd_identify = 'gtdbtk identify --genome_dir %s -x %s --out_dir %s --cpus %s' % (input_gnm_dir, file_extension, output_dir, num_threads)
|
|
34
|
+
cmd_align = 'gtdbtk align --identify_dir %s --out_dir %s --cpus %s' % (output_dir, output_dir, num_threads)
|
|
35
|
+
cmd_gunzip_bac120 = 'gunzip %s' % msa_bac120_gz
|
|
36
|
+
cmd_gunzip_ar53 = 'gunzip %s' % msa_ar53_gz
|
|
37
|
+
cmd_infer_bac120 = 'gtdbtk infer --msa_file %s --out_dir %s --cpus %s --prefix %s_bac120' % (msa_bac120, output_dir, num_threads, output_prefix)
|
|
38
|
+
cmd_infer_ar53 = 'gtdbtk infer --msa_file %s --out_dir %s --cpus %s --prefix %s_ar53' % (msa_ar53, output_dir, num_threads, output_prefix)
|
|
39
|
+
|
|
40
|
+
print(cmd_identify)
|
|
41
|
+
os.system(cmd_identify)
|
|
42
|
+
print(cmd_align)
|
|
43
|
+
os.system(cmd_align)
|
|
44
|
+
|
|
45
|
+
if os.path.isfile(msa_bac120_gz):
|
|
46
|
+
print(cmd_gunzip_bac120)
|
|
47
|
+
os.system(cmd_gunzip_bac120)
|
|
48
|
+
print(cmd_infer_bac120)
|
|
49
|
+
os.system(cmd_infer_bac120)
|
|
50
|
+
|
|
51
|
+
if os.path.isfile(msa_ar53_gz):
|
|
52
|
+
print(cmd_gunzip_ar53)
|
|
53
|
+
os.system(cmd_gunzip_ar53)
|
|
54
|
+
print(cmd_infer_ar53)
|
|
55
|
+
os.system(cmd_infer_ar53)
|
|
56
|
+
|
|
57
|
+
inferred_bac120_tree = '%s/%s_bac120.unrooted.tree' % (output_dir, output_prefix)
|
|
58
|
+
inferred_ar53_tree = '%s/%s_ar53.unrooted.tree' % (output_dir, output_prefix)
|
|
59
|
+
|
|
60
|
+
if os.path.isfile(inferred_bac120_tree):
|
|
61
|
+
print('Inferred bacterial tree:\t%s' % inferred_bac120_tree)
|
|
62
|
+
if os.path.isfile(inferred_ar53_tree):
|
|
63
|
+
print('Inferred archaeal tree:\t%s' % inferred_ar53_tree)
|
|
64
|
+
|
|
65
|
+
print('Done!')
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == '__main__':
|
|
69
|
+
|
|
70
|
+
GTDB_tree_parser = argparse.ArgumentParser(usage=GTDB_tree_usage)
|
|
71
|
+
GTDB_tree_parser.add_argument('-p', required=True, help='output prefix')
|
|
72
|
+
GTDB_tree_parser.add_argument('-i', required=True, help='genome folder')
|
|
73
|
+
GTDB_tree_parser.add_argument('-x', required=True, help='genome file extension')
|
|
74
|
+
GTDB_tree_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads')
|
|
75
|
+
args = vars(GTDB_tree_parser.parse_args())
|
|
76
|
+
GTDB_tree(args)
|
TreeSAK/GeneTree.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
from Bio import SeqIO
|
|
4
|
+
from distutils.spawn import find_executable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
GeneTree_usage = '''
|
|
8
|
+
============= GeneTree example commands =============
|
|
9
|
+
|
|
10
|
+
TreeSAK GeneTree -i amoA.faa -o amoA_tree -t 36 -f
|
|
11
|
+
|
|
12
|
+
=====================================================
|
|
13
|
+
'''
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def check_dependencies(program_list):
|
|
17
|
+
|
|
18
|
+
not_detected_programs = []
|
|
19
|
+
for needed_program in program_list:
|
|
20
|
+
if find_executable(needed_program) is None:
|
|
21
|
+
not_detected_programs.append(needed_program)
|
|
22
|
+
|
|
23
|
+
if not_detected_programs != []:
|
|
24
|
+
print('%s not found, program exited!' % ','.join(not_detected_programs))
|
|
25
|
+
exit()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def sep_path_basename_ext(file_in):
|
|
29
|
+
|
|
30
|
+
f_path, file_name = os.path.split(file_in)
|
|
31
|
+
if f_path == '':
|
|
32
|
+
f_path = '.'
|
|
33
|
+
|
|
34
|
+
f_base, f_ext = os.path.splitext(file_name)
|
|
35
|
+
|
|
36
|
+
return f_path, f_base, f_ext
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def filter_by_gap(file_in, max_gap_pct, file_out):
|
|
40
|
+
file_out_handle = open(file_out, 'w')
|
|
41
|
+
for each_seq in SeqIO.parse(file_in, 'fasta'):
|
|
42
|
+
seq_str = str(each_seq.seq)
|
|
43
|
+
gap_num = seq_str.count('-')
|
|
44
|
+
gap_pct = gap_num*100 / len(seq_str)
|
|
45
|
+
if gap_pct <= float(max_gap_pct):
|
|
46
|
+
file_out_handle.write('>%s\n%s\n' % (each_seq.id, seq_str))
|
|
47
|
+
file_out_handle.close()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def GeneTree(args):
|
|
51
|
+
|
|
52
|
+
seq_file = args['i']
|
|
53
|
+
num_threads = args['t']
|
|
54
|
+
op_dir = args['o']
|
|
55
|
+
force_create_op_dir = args['f']
|
|
56
|
+
gap_cutoff = args['max_gap']
|
|
57
|
+
trim_with_trimal = args['trimal']
|
|
58
|
+
bmge_trim_model = 'BLOSUM30'
|
|
59
|
+
bmge_entropy_score_cutoff = '0.55'
|
|
60
|
+
|
|
61
|
+
# check dependencies
|
|
62
|
+
if trim_with_trimal is False:
|
|
63
|
+
check_dependencies(['mafft-einsi', 'java'])
|
|
64
|
+
else:
|
|
65
|
+
check_dependencies(['mafft-einsi', 'trimal'])
|
|
66
|
+
|
|
67
|
+
# specify path to BMGE.jar
|
|
68
|
+
current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
|
69
|
+
pwd_bmge_jar = '%s/BMGE.jar' % current_file_path
|
|
70
|
+
|
|
71
|
+
# determine the version of iqtree available on the system
|
|
72
|
+
if find_executable('iqtree2'):
|
|
73
|
+
iqtree_exe = 'iqtree2'
|
|
74
|
+
elif find_executable('iqtree'):
|
|
75
|
+
iqtree_exe = 'iqtree'
|
|
76
|
+
else:
|
|
77
|
+
print('iqtree not detected, program exited!')
|
|
78
|
+
exit()
|
|
79
|
+
|
|
80
|
+
# create op_dir
|
|
81
|
+
if os.path.isdir(op_dir) is True:
|
|
82
|
+
if force_create_op_dir is True:
|
|
83
|
+
os.system('rm -r %s' % op_dir)
|
|
84
|
+
else:
|
|
85
|
+
print('Output folder detected, program exited!')
|
|
86
|
+
exit()
|
|
87
|
+
os.system('mkdir %s' % op_dir)
|
|
88
|
+
|
|
89
|
+
######################################## define output file name ########################################
|
|
90
|
+
|
|
91
|
+
sep_file_path, sep_file_base, sep_file_ext = sep_path_basename_ext(seq_file)
|
|
92
|
+
get_gene_tree_cmds_txt = '%s/cmds.txt' % op_dir
|
|
93
|
+
msa_file = '%s/%s.aln' % (op_dir, sep_file_base)
|
|
94
|
+
|
|
95
|
+
msa_file_trimmed = '%s/%s.bmge.aln' % (op_dir, sep_file_base)
|
|
96
|
+
msa_file_trimmed_low_gap = '%s/%s.bmge.maxgap%s.aln' % (op_dir, sep_file_base, gap_cutoff)
|
|
97
|
+
if trim_with_trimal is True:
|
|
98
|
+
msa_file_trimmed = '%s/%s.trimal.aln' % (op_dir, sep_file_base)
|
|
99
|
+
msa_file_trimmed_low_gap = '%s/%s.trimal.maxgap%s.aln' % (op_dir, sep_file_base, gap_cutoff)
|
|
100
|
+
|
|
101
|
+
#########################################################################################################
|
|
102
|
+
|
|
103
|
+
# prepare commands
|
|
104
|
+
mafft_cmd = 'mafft-einsi --thread %s --quiet %s > %s' % (num_threads, seq_file, msa_file)
|
|
105
|
+
trim_cmd = 'java -jar %s -i %s -m %s -t AA -h %s -of %s' % (pwd_bmge_jar, msa_file, bmge_trim_model, bmge_entropy_score_cutoff, msa_file_trimmed)
|
|
106
|
+
if trim_with_trimal is True:
|
|
107
|
+
trim_cmd = 'trimal -in %s -out %s -automated1' % (msa_file, msa_file_trimmed)
|
|
108
|
+
iqtree_cmd = '%s -m LG+G+I -bb 1000 --wbtl -nt %s -s %s -pre %s/%s' % (iqtree_exe, num_threads, msa_file_trimmed_low_gap, op_dir, sep_file_base)
|
|
109
|
+
|
|
110
|
+
# write out commands
|
|
111
|
+
with open(get_gene_tree_cmds_txt, 'w') as f:
|
|
112
|
+
f.write('%s\n%s\n%s\n' % (mafft_cmd, trim_cmd, iqtree_cmd))
|
|
113
|
+
|
|
114
|
+
# run mafft
|
|
115
|
+
print(mafft_cmd)
|
|
116
|
+
os.system(mafft_cmd)
|
|
117
|
+
|
|
118
|
+
# run BMGE
|
|
119
|
+
print(trim_cmd)
|
|
120
|
+
os.system(trim_cmd)
|
|
121
|
+
|
|
122
|
+
# remove high gap sequences
|
|
123
|
+
filter_by_gap(msa_file_trimmed, gap_cutoff, msa_file_trimmed_low_gap)
|
|
124
|
+
|
|
125
|
+
# run iqtree
|
|
126
|
+
print(iqtree_cmd)
|
|
127
|
+
os.system(iqtree_cmd)
|
|
128
|
+
|
|
129
|
+
print('Done!')
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
if __name__ == '__main__':
|
|
133
|
+
|
|
134
|
+
GeneTree_parser = argparse.ArgumentParser()
|
|
135
|
+
GeneTree_parser.add_argument('-i', required=False, default=None, help='sequence file')
|
|
136
|
+
GeneTree_parser.add_argument('-o', required=True, help='output dir')
|
|
137
|
+
GeneTree_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads, default is 1')
|
|
138
|
+
GeneTree_parser.add_argument('-trimal', required=False, action="store_true", help='trim with trimal, default is BMGE')
|
|
139
|
+
GeneTree_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
|
|
140
|
+
GeneTree_parser.add_argument('-max_gap', required=False, default='40', help='maximum percentage of gap, default is 40')
|
|
141
|
+
args = vars(GeneTree_parser.parse_args())
|
|
142
|
+
GeneTree(args)
|