treesak 1.51.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of treesak might be problematic. Click here for more details.

Files changed (125) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +130 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/CompareMCMC.py +138 -0
  19. TreeSAK/ConcateMSA.py +111 -0
  20. TreeSAK/ConvertMSA.py +135 -0
  21. TreeSAK/Dir.rb +82 -0
  22. TreeSAK/ExtractMarkerSeq.py +263 -0
  23. TreeSAK/FastRoot.py +1175 -0
  24. TreeSAK/FastRoot_backup.py +1122 -0
  25. TreeSAK/FigTree.py +34 -0
  26. TreeSAK/GTDB_tree.py +76 -0
  27. TreeSAK/GeneTree.py +142 -0
  28. TreeSAK/KEGG_Luo17.py +807 -0
  29. TreeSAK/LcaToLeaves.py +66 -0
  30. TreeSAK/MarkerRef2Tree.py +616 -0
  31. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  32. TreeSAK/MarkerSeq2Tree.py +290 -0
  33. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  34. TreeSAK/ModifyTopo.py +116 -0
  35. TreeSAK/Newick_tree_plotter.py +79 -0
  36. TreeSAK/OMA.py +170 -0
  37. TreeSAK/OMA2.py +212 -0
  38. TreeSAK/OneLineAln.py +50 -0
  39. TreeSAK/PB.py +155 -0
  40. TreeSAK/PMSF.py +106 -0
  41. TreeSAK/PhyloBiAssoc.R +84 -0
  42. TreeSAK/PhyloBiAssoc.py +167 -0
  43. TreeSAK/PlotMCMC.py +41 -0
  44. TreeSAK/PlotMcmcNode.py +152 -0
  45. TreeSAK/PlotMcmcNode_old.py +252 -0
  46. TreeSAK/RootTree.py +101 -0
  47. TreeSAK/RootTreeGTDB214.py +288 -0
  48. TreeSAK/RootTreeGTDB220.py +300 -0
  49. TreeSAK/RootTreeGTDB226.py +300 -0
  50. TreeSAK/SequentialDating.py +16 -0
  51. TreeSAK/SingleAleHGT.py +157 -0
  52. TreeSAK/SingleLinePhy.py +50 -0
  53. TreeSAK/SliceMSA.py +142 -0
  54. TreeSAK/SplitScore.py +19 -0
  55. TreeSAK/SplitScore1.py +178 -0
  56. TreeSAK/SplitScore1OMA.py +148 -0
  57. TreeSAK/SplitScore2.py +597 -0
  58. TreeSAK/TaxaCountStats.R +256 -0
  59. TreeSAK/TaxonTree.py +47 -0
  60. TreeSAK/TreeSAK_config.py +32 -0
  61. TreeSAK/VERSION +158 -0
  62. TreeSAK/VisHPD95.R +45 -0
  63. TreeSAK/VisHPD95.py +200 -0
  64. TreeSAK/__init__.py +0 -0
  65. TreeSAK/ale_parser.py +74 -0
  66. TreeSAK/ale_splitter.py +63 -0
  67. TreeSAK/alignment_pruner.pl +1471 -0
  68. TreeSAK/assessOG.py +45 -0
  69. TreeSAK/catfasta2phy.py +140 -0
  70. TreeSAK/cogTree.py +185 -0
  71. TreeSAK/compare_trees.R +30 -0
  72. TreeSAK/compare_trees.py +255 -0
  73. TreeSAK/dating.py +264 -0
  74. TreeSAK/dating_ss.py +361 -0
  75. TreeSAK/deltall.py +82 -0
  76. TreeSAK/do_rrtc.rb +464 -0
  77. TreeSAK/fa2phy.py +42 -0
  78. TreeSAK/format_leaf_name.py +70 -0
  79. TreeSAK/gap_stats.py +38 -0
  80. TreeSAK/get_SCG_tree.py +742 -0
  81. TreeSAK/get_arCOG_seq.py +97 -0
  82. TreeSAK/global_functions.py +222 -0
  83. TreeSAK/gnm_leaves.py +43 -0
  84. TreeSAK/iTOL.py +791 -0
  85. TreeSAK/iTOL_gene_tree.py +80 -0
  86. TreeSAK/itol_msa_stats.py +56 -0
  87. TreeSAK/keep_highest_rrtc.py +37 -0
  88. TreeSAK/koTree.py +194 -0
  89. TreeSAK/label_tree.R +75 -0
  90. TreeSAK/label_tree.py +121 -0
  91. TreeSAK/mad.py +708 -0
  92. TreeSAK/mcmc2tree.py +58 -0
  93. TreeSAK/mcmcTC copy.py +92 -0
  94. TreeSAK/mcmcTC.py +104 -0
  95. TreeSAK/mcmctree_vs_reltime.R +44 -0
  96. TreeSAK/mcmctree_vs_reltime.py +252 -0
  97. TreeSAK/merge_pdf.py +32 -0
  98. TreeSAK/pRTC.py +56 -0
  99. TreeSAK/parse_mcmctree.py +198 -0
  100. TreeSAK/parse_reltime.py +141 -0
  101. TreeSAK/phy2fa.py +37 -0
  102. TreeSAK/plot_distruibution_th.py +165 -0
  103. TreeSAK/prep_mcmctree_ctl.py +92 -0
  104. TreeSAK/print_leaves.py +32 -0
  105. TreeSAK/pruneMSA.py +63 -0
  106. TreeSAK/recode.py +73 -0
  107. TreeSAK/remove_bias.R +112 -0
  108. TreeSAK/rename_leaves.py +77 -0
  109. TreeSAK/replace_clade.py +55 -0
  110. TreeSAK/root_with_out_group.py +84 -0
  111. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  112. TreeSAK/subsample_drep_gnms.py +74 -0
  113. TreeSAK/subset.py +69 -0
  114. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  115. TreeSAK/supertree.py +330 -0
  116. TreeSAK/tmp_1.py +19 -0
  117. TreeSAK/tmp_2.py +19 -0
  118. TreeSAK/tmp_3.py +120 -0
  119. TreeSAK/weighted_rand.rb +23 -0
  120. treesak-1.51.2.data/scripts/TreeSAK +950 -0
  121. treesak-1.51.2.dist-info/LICENSE +674 -0
  122. treesak-1.51.2.dist-info/METADATA +27 -0
  123. treesak-1.51.2.dist-info/RECORD +125 -0
  124. treesak-1.51.2.dist-info/WHEEL +5 -0
  125. treesak-1.51.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,148 @@
1
+ from __future__ import print_function
2
+ import os
3
+ import argparse
4
+ from Bio import SeqIO
5
+
6
+
7
+ SplitScore1OMA_usage = '''
8
+ ======================== SplitScore1OMA example commands ========================
9
+
10
+ # SplitScore1
11
+ TreeSAK SplitScore1OMA -i OrthologousGroups.txt -s OrthologousGroupsFasta -o step1_op_dir -t 6 -f
12
+ TreeSAK SplitScore1OMA -i OrthologousGroups.txt -s OrthologousGroupsFasta -o step1_op_dir -t 6 -f -u interested_gnm.txt
13
+ # Please ensure that all the commands in iqtree_cmds.txt have been executed before proceeding to step 2.
14
+
15
+ =================================================================================
16
+ '''
17
+
18
+
19
+ def select_seq(seq_file, seq_id_list, output_file):
20
+ output_file_handle = open(output_file, 'w')
21
+ for seq_record in SeqIO.parse(seq_file, 'fasta'):
22
+ seq_id = seq_record.id
23
+ if seq_id in seq_id_list:
24
+ output_file_handle.write('>%s\n' % seq_id)
25
+ output_file_handle.write('%s\n' % str(seq_record.seq))
26
+ output_file_handle.close()
27
+
28
+
29
+ def get_gene_tree(oma_op_txt, oma_op_fasta, interested_gnm_txt, cov_cutoff, oma_op_fasta_qualified, iqtree_model, num_of_js_threads, force_overwrite, get_gene_tree_cmd_txt):
30
+
31
+ # get the total number of genome
32
+ genome_id_set = set()
33
+ for each_group in open(oma_op_txt):
34
+ if not each_group.startswith('#'):
35
+ for each_gene in each_group.strip().split('\t')[1:]:
36
+ gnm_id = '_'.join(each_gene.split(':')[1].split(' ')[0].split('_')[:-1])
37
+ genome_id_set.add(gnm_id)
38
+
39
+ interested_gnm_set = set()
40
+ if interested_gnm_txt is not None:
41
+ for each_gnm in open(interested_gnm_txt):
42
+ interested_gnm_set.add(each_gnm.strip())
43
+ else:
44
+ interested_gnm_set = genome_id_set
45
+
46
+ # create output folder
47
+ if os.path.isdir(oma_op_fasta_qualified) is True:
48
+ if force_overwrite is True:
49
+ os.system('rm -r %s' % oma_op_fasta_qualified)
50
+ else:
51
+ print('%s already exist, program exited!' % oma_op_fasta_qualified)
52
+ exit()
53
+ os.system('mkdir %s' % oma_op_fasta_qualified)
54
+
55
+ # filter OMA output
56
+ qualified_grp_to_gene_dict = dict()
57
+ for each_group in open(oma_op_txt):
58
+ if not each_group.startswith('#'):
59
+ each_group_split = each_group.strip().split('\t')
60
+ group_id = each_group_split[0]
61
+ gene_list_by_gnm = each_group_split[1:]
62
+ current_gene_list = [i.split(':')[1].split(' ')[0] for i in gene_list_by_gnm]
63
+ current_gnm_list_interested = []
64
+ current_gene_list_interested = []
65
+ for gene in current_gene_list:
66
+ gnm = '_'.join(gene.split('_')[:-1])
67
+ if gnm in interested_gnm_set:
68
+ current_gnm_list_interested.append(gnm)
69
+ current_gene_list_interested.append(gene)
70
+
71
+ current_cov = len(current_gnm_list_interested) * 100 / len(interested_gnm_set)
72
+ if current_cov >= cov_cutoff:
73
+ qualified_grp_to_gene_dict[group_id] = current_gene_list_interested
74
+
75
+ print('The number of orthologous groups with coverage >= %s is %s.' % (cov_cutoff, len(qualified_grp_to_gene_dict)))
76
+
77
+ # prepare commands for getting gene tree
78
+ get_gene_tree_cmd_txt_handle = open(get_gene_tree_cmd_txt, 'w')
79
+ for qualified_grp in sorted(list(qualified_grp_to_gene_dict.keys())):
80
+ group_id_only_num = qualified_grp.replace('OMA', '')
81
+ while group_id_only_num[0] == '0':
82
+ group_id_only_num = group_id_only_num[1:]
83
+
84
+ # define file name
85
+ og_id = 'OG%s' % group_id_only_num
86
+ pwd_seq_file_in = '%s/%s.fa' % (oma_op_fasta, og_id)
87
+ pwd_og_seq = '%s/%s.fa' % (oma_op_fasta_qualified, og_id)
88
+ pwd_og_aln = '%s/%s.aln' % (oma_op_fasta_qualified, og_id)
89
+ pwd_og_aln_trimmed = '%s/%s_trimmed.aln' % (oma_op_fasta_qualified, og_id)
90
+
91
+ # get sequence
92
+ if len(interested_gnm_set) == len(genome_id_set):
93
+ cp_cmd = 'cp %s %s' % (pwd_seq_file_in, pwd_og_seq)
94
+ os.system(cp_cmd)
95
+ else:
96
+ select_seq(pwd_seq_file_in, qualified_grp_to_gene_dict[qualified_grp], pwd_og_seq)
97
+
98
+ # align, trim and iqtree
99
+ mafft_cmd = 'mafft-einsi --thread %s --quiet %s > %s' % (num_of_js_threads, pwd_og_seq, pwd_og_aln)
100
+ trimal_cmd = 'trimal -in %s -out %s -automated1' % (pwd_og_aln, pwd_og_aln_trimmed)
101
+ iqtree_cmd = 'iqtree2 -s %s --seqtype AA -m %s -T %s -B 1000 --quiet --wbtl --prefix %s/%s' % (pwd_og_aln_trimmed, iqtree_model, num_of_js_threads, oma_op_fasta_qualified, og_id)
102
+ cmds_one_line = '%s; %s; %s' % (mafft_cmd, trimal_cmd, iqtree_cmd)
103
+ get_gene_tree_cmd_txt_handle.write(cmds_one_line.replace((oma_op_fasta_qualified + '/'), '') + '\n')
104
+ get_gene_tree_cmd_txt_handle.close()
105
+
106
+
107
+ def SplitScore1OMA(args):
108
+
109
+ oma_op_txt = args['i']
110
+ oma_op_fasta = args['s']
111
+ interested_gnm_txt = args['u']
112
+ iqtree_model = args['m']
113
+ cov_cutoff = args['c']
114
+ force_overwrite = args['f']
115
+ num_of_js_threads = args['jst']
116
+ step_1_op_dir = args['o']
117
+
118
+ # define file name
119
+ qualified_og_dir = '%s/qualified_OGs' % step_1_op_dir
120
+ iqtree_cmds_txt = '%s/iqtree_cmds.txt' % step_1_op_dir
121
+
122
+ # create output folder
123
+ if os.path.isdir(step_1_op_dir) is True:
124
+ if force_overwrite is True:
125
+ os.system('rm -r %s' % step_1_op_dir)
126
+ else:
127
+ print('%s exist, program exited!' % step_1_op_dir)
128
+ exit()
129
+ os.mkdir(step_1_op_dir)
130
+ os.mkdir(qualified_og_dir)
131
+
132
+ # get get_gene_tree
133
+ get_gene_tree(oma_op_txt, oma_op_fasta, interested_gnm_txt, cov_cutoff, qualified_og_dir, iqtree_model, num_of_js_threads, force_overwrite, iqtree_cmds_txt)
134
+
135
+
136
+ if __name__ == '__main__':
137
+
138
+ SplitScore1OMA_parser = argparse.ArgumentParser()
139
+ SplitScore1OMA_parser.add_argument('-i', required=True, help='OrthologousGroups.txt, produced by OMA')
140
+ SplitScore1OMA_parser.add_argument('-s', required=True, help='OrthologousGroupsFasta, produced by OMA')
141
+ SplitScore1OMA_parser.add_argument('-u', required=False, default= None, help='ID of interested genomes, no file extension')
142
+ SplitScore1OMA_parser.add_argument('-o', required=True, help='output directory')
143
+ SplitScore1OMA_parser.add_argument('-m', required=False, default='LG+G+I', help='iqtree_model, default: LG+G+I')
144
+ SplitScore1OMA_parser.add_argument('-c', required=False, type=int, default=80, help='coverage cutoff, default: 80')
145
+ SplitScore1OMA_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
146
+ SplitScore1OMA_parser.add_argument('-jst', required=False, type=int, default=1, help='num of threads for inferring gene tree, default: 1')
147
+ args = vars(SplitScore1OMA_parser.parse_args())
148
+ SplitScore1OMA(args)