treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/FigTree.py ADDED
@@ -0,0 +1,34 @@
1
+ import os
2
+ import argparse
3
+ from ete3 import Tree
4
+
5
+
6
+ FigTree_usage = '''
7
+ ====================== FigTree example commands ======================
8
+
9
+ TreeSAK FigTree -h
10
+
11
+ ======================================================================
12
+ '''
13
+
14
+
15
+ def FigTree(args):
16
+
17
+ input_txt_file = args['i']
18
+ op_txt = args['o']
19
+
20
+ if os.path.isfile(input_txt_file) is False:
21
+ print('Metadata file not found, program exited!')
22
+ exit()
23
+
24
+
25
+ if __name__ == '__main__':
26
+
27
+ FigTree_parser = argparse.ArgumentParser(usage=FigTree_usage)
28
+ FigTree_parser.add_argument('-i', required=True, help='input metadata')
29
+ FigTree_parser.add_argument('-tree', required=False, default=None, help='gene id, in tree file')
30
+ FigTree_parser.add_argument('-txt', required=False, default=None, help='gene id, in txt file')
31
+ FigTree_parser.add_argument('-o', required=True, help='output metadata')
32
+ FigTree_parser.add_argument('-na', required=False, action='store_true', help='include leaves with na values')
33
+ args = vars(FigTree_parser.parse_args())
34
+ FigTree(args)
TreeSAK/GTDB_tree.py ADDED
@@ -0,0 +1,76 @@
1
+ import os
2
+ import argparse
3
+
4
+
5
+ GTDB_tree_usage = '''
6
+ ======================== GTDB_tree example command ========================
7
+
8
+ export GTDBTK_DATA_PATH=/scratch/PI/boqianpy/Database/gtdb_r220/release220
9
+ TreeSAK GTDB_tree -p Demo -i gnm_folder -x fa -t 12
10
+
11
+ # This is a wrapper for the following commands
12
+ gtdbtk identify --genome_dir gnm_folder -x fa --out_dir op_dir --cpus 12
13
+ gtdbtk align --identify_dir Demo_op_dir --out_dir op_dir --cpus 12
14
+ gtdbtk infer --msa_file Demo_op_dir/align/gtdbtk.bac120.user_msa.fasta.gz --out_dir op_dir --cpus 12 --prefix Demo_bac120
15
+ gtdbtk infer --msa_file Demo_op_dir/align/gtdbtk.ar53.user_msa.fasta.gz --out_dir op_dir --cpus 12 --prefix Demo_ar53
16
+
17
+ ===========================================================================
18
+ '''
19
+
20
+ def GTDB_tree(args):
21
+
22
+ input_gnm_dir = args['i']
23
+ output_prefix = args['p']
24
+ file_extension = args['x']
25
+ num_threads = args['t']
26
+
27
+ output_dir = '%s_GTDB_tree' % output_prefix
28
+ msa_bac120_gz = '%s/align/gtdbtk.bac120.user_msa.fasta.gz' % output_dir
29
+ msa_bac120 = '%s/align/gtdbtk.bac120.user_msa.fasta' % output_dir
30
+ msa_ar53_gz = '%s/align/gtdbtk.ar53.user_msa.fasta.gz' % output_dir
31
+ msa_ar53 = '%s/align/gtdbtk.ar53.user_msa.fasta' % output_dir
32
+
33
+ cmd_identify = 'gtdbtk identify --genome_dir %s -x %s --out_dir %s --cpus %s --write_single_copy_genes' % (input_gnm_dir, file_extension, output_dir, num_threads)
34
+ cmd_align = 'gtdbtk align --identify_dir %s --out_dir %s --cpus %s' % (output_dir, output_dir, num_threads)
35
+ cmd_gunzip_bac120 = 'gunzip %s' % msa_bac120_gz
36
+ cmd_gunzip_ar53 = 'gunzip %s' % msa_ar53_gz
37
+ cmd_infer_bac120 = 'gtdbtk infer --msa_file %s --out_dir %s --cpus %s --prefix %s_bac120' % (msa_bac120, output_dir, num_threads, output_prefix)
38
+ cmd_infer_ar53 = 'gtdbtk infer --msa_file %s --out_dir %s --cpus %s --prefix %s_ar53' % (msa_ar53, output_dir, num_threads, output_prefix)
39
+
40
+ print(cmd_identify)
41
+ os.system(cmd_identify)
42
+ print(cmd_align)
43
+ os.system(cmd_align)
44
+
45
+ if os.path.isfile(msa_bac120_gz):
46
+ print(cmd_gunzip_bac120)
47
+ os.system(cmd_gunzip_bac120)
48
+ print(cmd_infer_bac120)
49
+ os.system(cmd_infer_bac120)
50
+
51
+ if os.path.isfile(msa_ar53_gz):
52
+ print(cmd_gunzip_ar53)
53
+ os.system(cmd_gunzip_ar53)
54
+ print(cmd_infer_ar53)
55
+ os.system(cmd_infer_ar53)
56
+
57
+ inferred_bac120_tree = '%s/%s_bac120.unrooted.tree' % (output_dir, output_prefix)
58
+ inferred_ar53_tree = '%s/%s_ar53.unrooted.tree' % (output_dir, output_prefix)
59
+
60
+ if os.path.isfile(inferred_bac120_tree):
61
+ print('Inferred bacterial tree:\t%s' % inferred_bac120_tree)
62
+ if os.path.isfile(inferred_ar53_tree):
63
+ print('Inferred archaeal tree:\t%s' % inferred_ar53_tree)
64
+
65
+ print('Done!')
66
+
67
+
68
+ if __name__ == '__main__':
69
+
70
+ GTDB_tree_parser = argparse.ArgumentParser(usage=GTDB_tree_usage)
71
+ GTDB_tree_parser.add_argument('-p', required=True, help='output prefix')
72
+ GTDB_tree_parser.add_argument('-i', required=True, help='genome folder')
73
+ GTDB_tree_parser.add_argument('-x', required=True, help='genome file extension')
74
+ GTDB_tree_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads')
75
+ args = vars(GTDB_tree_parser.parse_args())
76
+ GTDB_tree(args)
TreeSAK/GeneTree.py ADDED
@@ -0,0 +1,142 @@
1
+ import os
2
+ import argparse
3
+ from Bio import SeqIO
4
+ from distutils.spawn import find_executable
5
+
6
+
7
+ GeneTree_usage = '''
8
+ ============= GeneTree example commands =============
9
+
10
+ TreeSAK GeneTree -i amoA.faa -o amoA_tree -t 36 -f
11
+
12
+ =====================================================
13
+ '''
14
+
15
+
16
+ def check_dependencies(program_list):
17
+
18
+ not_detected_programs = []
19
+ for needed_program in program_list:
20
+ if find_executable(needed_program) is None:
21
+ not_detected_programs.append(needed_program)
22
+
23
+ if not_detected_programs != []:
24
+ print('%s not found, program exited!' % ','.join(not_detected_programs))
25
+ exit()
26
+
27
+
28
+ def sep_path_basename_ext(file_in):
29
+
30
+ f_path, file_name = os.path.split(file_in)
31
+ if f_path == '':
32
+ f_path = '.'
33
+
34
+ f_base, f_ext = os.path.splitext(file_name)
35
+
36
+ return f_path, f_base, f_ext
37
+
38
+
39
+ def filter_by_gap(file_in, max_gap_pct, file_out):
40
+ file_out_handle = open(file_out, 'w')
41
+ for each_seq in SeqIO.parse(file_in, 'fasta'):
42
+ seq_str = str(each_seq.seq)
43
+ gap_num = seq_str.count('-')
44
+ gap_pct = gap_num*100 / len(seq_str)
45
+ if gap_pct <= float(max_gap_pct):
46
+ file_out_handle.write('>%s\n%s\n' % (each_seq.id, seq_str))
47
+ file_out_handle.close()
48
+
49
+
50
+ def GeneTree(args):
51
+
52
+ seq_file = args['i']
53
+ num_threads = args['t']
54
+ op_dir = args['o']
55
+ force_create_op_dir = args['f']
56
+ gap_cutoff = args['max_gap']
57
+ trim_with_trimal = args['trimal']
58
+ bmge_trim_model = 'BLOSUM30'
59
+ bmge_entropy_score_cutoff = '0.55'
60
+
61
+ # check dependencies
62
+ if trim_with_trimal is False:
63
+ check_dependencies(['mafft-einsi', 'java'])
64
+ else:
65
+ check_dependencies(['mafft-einsi', 'trimal'])
66
+
67
+ # specify path to BMGE.jar
68
+ current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
69
+ pwd_bmge_jar = '%s/BMGE.jar' % current_file_path
70
+
71
+ # determine the version of iqtree available on the system
72
+ if find_executable('iqtree2'):
73
+ iqtree_exe = 'iqtree2'
74
+ elif find_executable('iqtree'):
75
+ iqtree_exe = 'iqtree'
76
+ else:
77
+ print('iqtree not detected, program exited!')
78
+ exit()
79
+
80
+ # create op_dir
81
+ if os.path.isdir(op_dir) is True:
82
+ if force_create_op_dir is True:
83
+ os.system('rm -r %s' % op_dir)
84
+ else:
85
+ print('Output folder detected, program exited!')
86
+ exit()
87
+ os.system('mkdir %s' % op_dir)
88
+
89
+ ######################################## define output file name ########################################
90
+
91
+ sep_file_path, sep_file_base, sep_file_ext = sep_path_basename_ext(seq_file)
92
+ get_gene_tree_cmds_txt = '%s/cmds.txt' % op_dir
93
+ msa_file = '%s/%s.aln' % (op_dir, sep_file_base)
94
+
95
+ msa_file_trimmed = '%s/%s.bmge.aln' % (op_dir, sep_file_base)
96
+ msa_file_trimmed_low_gap = '%s/%s.bmge.maxgap%s.aln' % (op_dir, sep_file_base, gap_cutoff)
97
+ if trim_with_trimal is True:
98
+ msa_file_trimmed = '%s/%s.trimal.aln' % (op_dir, sep_file_base)
99
+ msa_file_trimmed_low_gap = '%s/%s.trimal.maxgap%s.aln' % (op_dir, sep_file_base, gap_cutoff)
100
+
101
+ #########################################################################################################
102
+
103
+ # prepare commands
104
+ mafft_cmd = 'mafft-einsi --thread %s --quiet %s > %s' % (num_threads, seq_file, msa_file)
105
+ trim_cmd = 'java -jar %s -i %s -m %s -t AA -h %s -of %s' % (pwd_bmge_jar, msa_file, bmge_trim_model, bmge_entropy_score_cutoff, msa_file_trimmed)
106
+ if trim_with_trimal is True:
107
+ trim_cmd = 'trimal -in %s -out %s -automated1' % (msa_file, msa_file_trimmed)
108
+ iqtree_cmd = '%s -m LG+G+I -bb 1000 --wbtl -nt %s -s %s -pre %s/%s' % (iqtree_exe, num_threads, msa_file_trimmed_low_gap, op_dir, sep_file_base)
109
+
110
+ # write out commands
111
+ with open(get_gene_tree_cmds_txt, 'w') as f:
112
+ f.write('%s\n%s\n%s\n' % (mafft_cmd, trim_cmd, iqtree_cmd))
113
+
114
+ # run mafft
115
+ print(mafft_cmd)
116
+ os.system(mafft_cmd)
117
+
118
+ # run BMGE
119
+ print(trim_cmd)
120
+ os.system(trim_cmd)
121
+
122
+ # remove high gap sequences
123
+ filter_by_gap(msa_file_trimmed, gap_cutoff, msa_file_trimmed_low_gap)
124
+
125
+ # run iqtree
126
+ print(iqtree_cmd)
127
+ os.system(iqtree_cmd)
128
+
129
+ print('Done!')
130
+
131
+
132
+ if __name__ == '__main__':
133
+
134
+ GeneTree_parser = argparse.ArgumentParser()
135
+ GeneTree_parser.add_argument('-i', required=False, default=None, help='sequence file')
136
+ GeneTree_parser.add_argument('-o', required=True, help='output dir')
137
+ GeneTree_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads, default is 1')
138
+ GeneTree_parser.add_argument('-trimal', required=False, action="store_true", help='trim with trimal, default is BMGE')
139
+ GeneTree_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
140
+ GeneTree_parser.add_argument('-max_gap', required=False, default='40', help='maximum percentage of gap, default is 40')
141
+ args = vars(GeneTree_parser.parse_args())
142
+ GeneTree(args)