treesak 1.51.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of treesak might be problematic. Click here for more details.

Files changed (125) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +130 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/CompareMCMC.py +138 -0
  19. TreeSAK/ConcateMSA.py +111 -0
  20. TreeSAK/ConvertMSA.py +135 -0
  21. TreeSAK/Dir.rb +82 -0
  22. TreeSAK/ExtractMarkerSeq.py +263 -0
  23. TreeSAK/FastRoot.py +1175 -0
  24. TreeSAK/FastRoot_backup.py +1122 -0
  25. TreeSAK/FigTree.py +34 -0
  26. TreeSAK/GTDB_tree.py +76 -0
  27. TreeSAK/GeneTree.py +142 -0
  28. TreeSAK/KEGG_Luo17.py +807 -0
  29. TreeSAK/LcaToLeaves.py +66 -0
  30. TreeSAK/MarkerRef2Tree.py +616 -0
  31. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  32. TreeSAK/MarkerSeq2Tree.py +290 -0
  33. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  34. TreeSAK/ModifyTopo.py +116 -0
  35. TreeSAK/Newick_tree_plotter.py +79 -0
  36. TreeSAK/OMA.py +170 -0
  37. TreeSAK/OMA2.py +212 -0
  38. TreeSAK/OneLineAln.py +50 -0
  39. TreeSAK/PB.py +155 -0
  40. TreeSAK/PMSF.py +106 -0
  41. TreeSAK/PhyloBiAssoc.R +84 -0
  42. TreeSAK/PhyloBiAssoc.py +167 -0
  43. TreeSAK/PlotMCMC.py +41 -0
  44. TreeSAK/PlotMcmcNode.py +152 -0
  45. TreeSAK/PlotMcmcNode_old.py +252 -0
  46. TreeSAK/RootTree.py +101 -0
  47. TreeSAK/RootTreeGTDB214.py +288 -0
  48. TreeSAK/RootTreeGTDB220.py +300 -0
  49. TreeSAK/RootTreeGTDB226.py +300 -0
  50. TreeSAK/SequentialDating.py +16 -0
  51. TreeSAK/SingleAleHGT.py +157 -0
  52. TreeSAK/SingleLinePhy.py +50 -0
  53. TreeSAK/SliceMSA.py +142 -0
  54. TreeSAK/SplitScore.py +19 -0
  55. TreeSAK/SplitScore1.py +178 -0
  56. TreeSAK/SplitScore1OMA.py +148 -0
  57. TreeSAK/SplitScore2.py +597 -0
  58. TreeSAK/TaxaCountStats.R +256 -0
  59. TreeSAK/TaxonTree.py +47 -0
  60. TreeSAK/TreeSAK_config.py +32 -0
  61. TreeSAK/VERSION +158 -0
  62. TreeSAK/VisHPD95.R +45 -0
  63. TreeSAK/VisHPD95.py +200 -0
  64. TreeSAK/__init__.py +0 -0
  65. TreeSAK/ale_parser.py +74 -0
  66. TreeSAK/ale_splitter.py +63 -0
  67. TreeSAK/alignment_pruner.pl +1471 -0
  68. TreeSAK/assessOG.py +45 -0
  69. TreeSAK/catfasta2phy.py +140 -0
  70. TreeSAK/cogTree.py +185 -0
  71. TreeSAK/compare_trees.R +30 -0
  72. TreeSAK/compare_trees.py +255 -0
  73. TreeSAK/dating.py +264 -0
  74. TreeSAK/dating_ss.py +361 -0
  75. TreeSAK/deltall.py +82 -0
  76. TreeSAK/do_rrtc.rb +464 -0
  77. TreeSAK/fa2phy.py +42 -0
  78. TreeSAK/format_leaf_name.py +70 -0
  79. TreeSAK/gap_stats.py +38 -0
  80. TreeSAK/get_SCG_tree.py +742 -0
  81. TreeSAK/get_arCOG_seq.py +97 -0
  82. TreeSAK/global_functions.py +222 -0
  83. TreeSAK/gnm_leaves.py +43 -0
  84. TreeSAK/iTOL.py +791 -0
  85. TreeSAK/iTOL_gene_tree.py +80 -0
  86. TreeSAK/itol_msa_stats.py +56 -0
  87. TreeSAK/keep_highest_rrtc.py +37 -0
  88. TreeSAK/koTree.py +194 -0
  89. TreeSAK/label_tree.R +75 -0
  90. TreeSAK/label_tree.py +121 -0
  91. TreeSAK/mad.py +708 -0
  92. TreeSAK/mcmc2tree.py +58 -0
  93. TreeSAK/mcmcTC copy.py +92 -0
  94. TreeSAK/mcmcTC.py +104 -0
  95. TreeSAK/mcmctree_vs_reltime.R +44 -0
  96. TreeSAK/mcmctree_vs_reltime.py +252 -0
  97. TreeSAK/merge_pdf.py +32 -0
  98. TreeSAK/pRTC.py +56 -0
  99. TreeSAK/parse_mcmctree.py +198 -0
  100. TreeSAK/parse_reltime.py +141 -0
  101. TreeSAK/phy2fa.py +37 -0
  102. TreeSAK/plot_distruibution_th.py +165 -0
  103. TreeSAK/prep_mcmctree_ctl.py +92 -0
  104. TreeSAK/print_leaves.py +32 -0
  105. TreeSAK/pruneMSA.py +63 -0
  106. TreeSAK/recode.py +73 -0
  107. TreeSAK/remove_bias.R +112 -0
  108. TreeSAK/rename_leaves.py +77 -0
  109. TreeSAK/replace_clade.py +55 -0
  110. TreeSAK/root_with_out_group.py +84 -0
  111. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  112. TreeSAK/subsample_drep_gnms.py +74 -0
  113. TreeSAK/subset.py +69 -0
  114. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  115. TreeSAK/supertree.py +330 -0
  116. TreeSAK/tmp_1.py +19 -0
  117. TreeSAK/tmp_2.py +19 -0
  118. TreeSAK/tmp_3.py +120 -0
  119. TreeSAK/weighted_rand.rb +23 -0
  120. treesak-1.51.2.data/scripts/TreeSAK +950 -0
  121. treesak-1.51.2.dist-info/LICENSE +674 -0
  122. treesak-1.51.2.dist-info/METADATA +27 -0
  123. treesak-1.51.2.dist-info/RECORD +125 -0
  124. treesak-1.51.2.dist-info/WHEEL +5 -0
  125. treesak-1.51.2.dist-info/top_level.txt +1 -0
TreeSAK/ALE3.py ADDED
@@ -0,0 +1,205 @@
1
+ import os
2
+ import argparse
3
+ import pandas as pd
4
+
5
+
6
+ ALE3_usage = '''
7
+ ================= ALE3 example commands =================
8
+
9
+ TreeSAK ALE3 -2 ALE2_op_dir -o ALE3_op_dir_30 -f -c 30
10
+ TreeSAK ALE3 -2 ALE2_op_dir -o ALE3_op_dir_75 -f -c 75
11
+
12
+ # Needs the uml_rec files
13
+
14
+ =========================================================
15
+ '''
16
+
17
+
18
+ def ale_parser(rec_folder, SpeciesTreeRef_newick, TableInfo_tsv, TableEvents_tsv, GeneTrees_nwk):
19
+
20
+ rec_files = [x for x in os.listdir(rec_folder) if x.endswith("uml_rec")]
21
+
22
+ table_info = list()
23
+ table_events = list()
24
+ for rec_file in rec_files:
25
+ with open(os.path.join(rec_folder, rec_file)) as f:
26
+ fam = rec_file.replace(".ale.uml_rec", "")
27
+ lines = f.readlines()
28
+ stree = lines[2].strip()
29
+ ll = lines[6].strip().split()[-1]
30
+ dp, tp, lp = lines[8].strip().split("\t")[1:]
31
+ n_reconciled_trees = int(lines[9].strip().split()[0])
32
+ reconciled_trees = lines[11:n_reconciled_trees + 11]
33
+ de, te, le, se = lines[11 + n_reconciled_trees + 1].split("\t")[1:]
34
+ table = lines[11 + n_reconciled_trees + 3:]
35
+
36
+ table_info.append((fam, ll, dp, tp, lp, de, te, le, se))
37
+ table_events.append((fam, table))
38
+
39
+ # write out SpeciesTreeRef.newick
40
+ with open(SpeciesTreeRef_newick, "w") as f:
41
+ f.write(stree.split("\t")[-1])
42
+
43
+ # write out TableInfo.tsv
44
+ with open(TableInfo_tsv, "w") as f:
45
+ head = "\t".join(["Family", "LL", "Dp", "Tp", "Lp", "De", "Te", "Le", "Se"]) + "\n"
46
+ f.write(head)
47
+ for info in table_info:
48
+ f.write("\t".join(info))
49
+
50
+ # write out TableEvents.tsv
51
+ with open(TableEvents_tsv, "w") as f:
52
+ header = "Family\tBranchType\t" + table[0].replace("# of", "Branch")
53
+ f.write(header)
54
+ for fam, events in table_events:
55
+ for b in events[1:]:
56
+ f.write(fam + "\t" + b)
57
+
58
+ # write out GeneTrees.nwk
59
+ with open(GeneTrees_nwk, "w") as f:
60
+ for t in reconciled_trees:
61
+ f.write(t)
62
+
63
+
64
+ def get_verticality_and_transfer_propensity(TableEvents_tsv, verticality_txt, transfer_propensity_txt, fun_des_dict):
65
+
66
+ df = pd.read_csv(TableEvents_tsv, sep="\t")
67
+ dfb = df.groupby("Branch", as_index=False).sum()
68
+ dff = df.groupby("Family").sum()
69
+
70
+ dfb["Verticality"] = dfb["singletons"] / (dfb["singletons"] + dfb["Originations"] + dfb["Transfers"])
71
+ dff["TransferPropensity"] = dff["Transfers"] / (dff["singletons"] + dff["Transfers"])
72
+
73
+ verticality_dict = dfb.to_dict()['Verticality']
74
+ transfer_propensity_dict = dff.to_dict()['TransferPropensity']
75
+
76
+ with open(verticality_txt, 'w') as verticality_txt_handle:
77
+ verticality_txt_handle.write('Branch\tVerticality\n')
78
+ for each_key in sorted(list(verticality_dict.keys())):
79
+ verticality_txt_handle.write('%s\t%s\n' % (each_key, verticality_dict[each_key]))
80
+
81
+ with open(transfer_propensity_txt, 'w') as transfer_propensity_txt_handle:
82
+
83
+ # write out header
84
+ if len(fun_des_dict) == 0:
85
+ transfer_propensity_txt_handle.write('OG\tTransfer_propensity\n')
86
+ else:
87
+ transfer_propensity_txt_handle.write('OG\tTransfer_propensity\tDescription\n')
88
+
89
+ for each_key in sorted(list(transfer_propensity_dict.keys())):
90
+ transfer_propensity = transfer_propensity_dict[each_key]
91
+ transfer_propensity = float("{0:.3f}".format(transfer_propensity))
92
+ each_key = each_key.replace(('genome_tree.newick_'), '')
93
+ each_key = each_key.replace('.ufboot', '')
94
+ if len(fun_des_dict) == 0:
95
+ transfer_propensity_txt_handle.write('%s\t%s\n' % (each_key, transfer_propensity))
96
+ else:
97
+ transfer_propensity_txt_handle.write('%s\t%s\t%s\n' % (each_key, transfer_propensity, fun_des_dict.get(each_key, 'na')))
98
+
99
+
100
+ def ALE3(args):
101
+
102
+ uml_rec_dir = args['2']
103
+ gene_presence_cutoff = args['c']
104
+ fun_des_txt = args['a']
105
+ op_dir = args['o']
106
+ force_create_op_dir = args['f']
107
+
108
+ # read in fun_des_txt
109
+ fun_des_dict = dict()
110
+ if fun_des_txt is not None:
111
+ if os.path.isfile(fun_des_txt) is True:
112
+ for each_line in open(fun_des_txt):
113
+ each_line_split = each_line.strip().split('\t')
114
+ fun_des_dict[each_line_split[0]] = each_line_split[1]
115
+ else:
116
+ print('Specified "-a" not found, program exited!')
117
+ exit()
118
+
119
+ SpeciesTreeRef_newick = '%s/SpeciesTreeRef.newick' % op_dir
120
+ TableInfo_tsv = '%s/TableInfo.tsv' % op_dir
121
+ TableEvents_tsv = '%s/TableEvents.tsv' % op_dir
122
+ GeneTrees_nwk = '%s/GeneTrees.nwk' % op_dir
123
+ gene_content_dir = '%s/GeneContent' % op_dir
124
+ gene_content_txt = '%s/GeneContent.txt' % op_dir
125
+ verticality_txt = '%s/Verticality.txt' % op_dir
126
+ transfer_propensity_txt = '%s/Transfer_propensity.txt' % op_dir
127
+
128
+ if os.path.isdir(op_dir) is True:
129
+ if force_create_op_dir is True:
130
+ os.system('rm -r %s' % op_dir)
131
+ else:
132
+ print('Output folder detected, program exited!')
133
+ exit()
134
+ os.system('mkdir %s' % op_dir)
135
+ os.system('mkdir %s' % gene_content_dir)
136
+
137
+ # parsing ALE2 outputs
138
+ print('Parsing ALE2 outputs')
139
+ ale_parser(uml_rec_dir, SpeciesTreeRef_newick, TableInfo_tsv, TableEvents_tsv, GeneTrees_nwk)
140
+
141
+ # get_verticality_and_transfer_propensity
142
+ print('Getting verticality and transfer propensity')
143
+ get_verticality_and_transfer_propensity(TableEvents_tsv, verticality_txt, transfer_propensity_txt, fun_des_dict)
144
+
145
+ # get genome content
146
+ og_set = set()
147
+ branch_to_og_dict = dict()
148
+ col_index = {}
149
+ for each_line in open(TableEvents_tsv):
150
+ each_line_split = each_line.strip().split('\t')
151
+ if each_line.startswith('Family'):
152
+ col_index = {key: i for i, key in enumerate(each_line_split)}
153
+ else:
154
+ gene_family = each_line_split[col_index['Family']]
155
+ gene_family = gene_family.replace(('genome_tree.newick_'), '')
156
+ gene_family = gene_family.replace('.ufboot', '')
157
+ gene_branch = each_line_split[col_index['Branch']]
158
+ gene_presence = float(each_line_split[col_index['presence']])
159
+ if gene_presence >= (gene_presence_cutoff/100):
160
+ og_set.add(gene_family)
161
+ if gene_branch not in branch_to_og_dict:
162
+ branch_to_og_dict[gene_branch] = set()
163
+ branch_to_og_dict[gene_branch].add(gene_family)
164
+
165
+ # write out gene content for each branch
166
+ for each_branch in branch_to_og_dict:
167
+ branch_gene_content = branch_to_og_dict[each_branch]
168
+
169
+ current_gene_content_txt = '%s/%s.txt' % (gene_content_dir, each_branch)
170
+ current_gene_content_txt_handle = open(current_gene_content_txt, 'w')
171
+ for each_gene in sorted(list(branch_gene_content)):
172
+ if len(fun_des_dict) == 0:
173
+ current_gene_content_txt_handle.write('%s\n' % each_gene)
174
+ else:
175
+ current_gene_content_txt_handle.write('%s\t%s\n' % (each_gene, fun_des_dict.get(each_gene, 'na')))
176
+ current_gene_content_txt_handle.close()
177
+
178
+ og_list_sorted = sorted(list(og_set))
179
+
180
+ gene_content_txt_handle = open(gene_content_txt, 'w')
181
+ gene_content_txt_handle.write('Branch\t' + '\t'.join(og_list_sorted) + '\n')
182
+ for each_gnm in sorted(list(branch_to_og_dict.keys())):
183
+ og_pa_list = [each_gnm]
184
+ for each_og in og_list_sorted:
185
+ if each_og in branch_to_og_dict[each_gnm]:
186
+ og_pa_list.append('1')
187
+ else:
188
+ og_pa_list.append('0')
189
+ gene_content_txt_handle.write('\t'.join(og_pa_list) + '\n')
190
+ gene_content_txt_handle.close()
191
+
192
+ print('Protein families in GeneContent.txt: %s' % len(og_list_sorted))
193
+ print('Genomes/branches in GeneContent.txt: %s' % len(branch_to_og_dict))
194
+
195
+
196
+ if __name__ == '__main__':
197
+
198
+ ALE3_parser = argparse.ArgumentParser()
199
+ ALE3_parser.add_argument('-2', required=True, help='Folder with the uml_rec files')
200
+ ALE3_parser.add_argument('-c', required=False, type=float, default=75, help='gene family presence cutoff in percentage, default: 75')
201
+ ALE3_parser.add_argument('-a', required=False, default=None, help='OG functional description')
202
+ ALE3_parser.add_argument('-o', required=True, help='output dir')
203
+ ALE3_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
204
+ args = vars(ALE3_parser.parse_args())
205
+ ALE3(args)