treesak 1.53.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +113 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/C60SR4.nex +127 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +299 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +115 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB.py +371 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +21 -0
- TreeSAK/SplitScore1.py +177 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +608 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +164 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/batch_itol.py +171 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/filter_rename_ar53.py +118 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_gene_tree_by_gnm.py +34 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +78 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/tmp_4.py +43 -0
- TreeSAK/tmp_5.py +12 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.53.3.data/scripts/TreeSAK +955 -0
- treesak-1.53.3.dist-info/LICENSE +674 -0
- treesak-1.53.3.dist-info/METADATA +27 -0
- treesak-1.53.3.dist-info/RECORD +131 -0
- treesak-1.53.3.dist-info/WHEEL +5 -0
- treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/ALE3.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
ALE3_usage = '''
|
|
7
|
+
================= ALE3 example commands =================
|
|
8
|
+
|
|
9
|
+
TreeSAK ALE3 -2 ALE2_op_dir -o ALE3_op_dir_30 -f -c 30
|
|
10
|
+
TreeSAK ALE3 -2 ALE2_op_dir -o ALE3_op_dir_75 -f -c 75
|
|
11
|
+
|
|
12
|
+
# Needs the uml_rec files
|
|
13
|
+
|
|
14
|
+
=========================================================
|
|
15
|
+
'''
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def ale_parser(rec_folder, SpeciesTreeRef_newick, TableInfo_tsv, TableEvents_tsv, GeneTrees_nwk):
|
|
19
|
+
|
|
20
|
+
rec_files = [x for x in os.listdir(rec_folder) if x.endswith("uml_rec")]
|
|
21
|
+
|
|
22
|
+
table_info = list()
|
|
23
|
+
table_events = list()
|
|
24
|
+
for rec_file in rec_files:
|
|
25
|
+
with open(os.path.join(rec_folder, rec_file)) as f:
|
|
26
|
+
fam = rec_file.replace(".ale.uml_rec", "")
|
|
27
|
+
lines = f.readlines()
|
|
28
|
+
stree = lines[2].strip()
|
|
29
|
+
ll = lines[6].strip().split()[-1]
|
|
30
|
+
dp, tp, lp = lines[8].strip().split("\t")[1:]
|
|
31
|
+
n_reconciled_trees = int(lines[9].strip().split()[0])
|
|
32
|
+
reconciled_trees = lines[11:n_reconciled_trees + 11]
|
|
33
|
+
de, te, le, se = lines[11 + n_reconciled_trees + 1].split("\t")[1:]
|
|
34
|
+
table = lines[11 + n_reconciled_trees + 3:]
|
|
35
|
+
|
|
36
|
+
table_info.append((fam, ll, dp, tp, lp, de, te, le, se))
|
|
37
|
+
table_events.append((fam, table))
|
|
38
|
+
|
|
39
|
+
# write out SpeciesTreeRef.newick
|
|
40
|
+
with open(SpeciesTreeRef_newick, "w") as f:
|
|
41
|
+
f.write(stree.split("\t")[-1])
|
|
42
|
+
|
|
43
|
+
# write out TableInfo.tsv
|
|
44
|
+
with open(TableInfo_tsv, "w") as f:
|
|
45
|
+
head = "\t".join(["Family", "LL", "Dp", "Tp", "Lp", "De", "Te", "Le", "Se"]) + "\n"
|
|
46
|
+
f.write(head)
|
|
47
|
+
for info in table_info:
|
|
48
|
+
f.write("\t".join(info))
|
|
49
|
+
|
|
50
|
+
# write out TableEvents.tsv
|
|
51
|
+
with open(TableEvents_tsv, "w") as f:
|
|
52
|
+
header = "Family\tBranchType\t" + table[0].replace("# of", "Branch")
|
|
53
|
+
f.write(header)
|
|
54
|
+
for fam, events in table_events:
|
|
55
|
+
for b in events[1:]:
|
|
56
|
+
f.write(fam + "\t" + b)
|
|
57
|
+
|
|
58
|
+
# write out GeneTrees.nwk
|
|
59
|
+
with open(GeneTrees_nwk, "w") as f:
|
|
60
|
+
for t in reconciled_trees:
|
|
61
|
+
f.write(t)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_verticality_and_transfer_propensity(TableEvents_tsv, verticality_txt, transfer_propensity_txt, fun_des_dict):
|
|
65
|
+
|
|
66
|
+
df = pd.read_csv(TableEvents_tsv, sep="\t")
|
|
67
|
+
dfb = df.groupby("Branch", as_index=False).sum()
|
|
68
|
+
dff = df.groupby("Family").sum()
|
|
69
|
+
|
|
70
|
+
dfb["Verticality"] = dfb["singletons"] / (dfb["singletons"] + dfb["Originations"] + dfb["Transfers"])
|
|
71
|
+
dff["TransferPropensity"] = dff["Transfers"] / (dff["singletons"] + dff["Transfers"])
|
|
72
|
+
|
|
73
|
+
verticality_dict = dfb.to_dict()['Verticality']
|
|
74
|
+
transfer_propensity_dict = dff.to_dict()['TransferPropensity']
|
|
75
|
+
|
|
76
|
+
with open(verticality_txt, 'w') as verticality_txt_handle:
|
|
77
|
+
verticality_txt_handle.write('Branch\tVerticality\n')
|
|
78
|
+
for each_key in sorted(list(verticality_dict.keys())):
|
|
79
|
+
verticality_txt_handle.write('%s\t%s\n' % (each_key, verticality_dict[each_key]))
|
|
80
|
+
|
|
81
|
+
with open(transfer_propensity_txt, 'w') as transfer_propensity_txt_handle:
|
|
82
|
+
|
|
83
|
+
# write out header
|
|
84
|
+
if len(fun_des_dict) == 0:
|
|
85
|
+
transfer_propensity_txt_handle.write('OG\tTransfer_propensity\n')
|
|
86
|
+
else:
|
|
87
|
+
transfer_propensity_txt_handle.write('OG\tTransfer_propensity\tDescription\n')
|
|
88
|
+
|
|
89
|
+
for each_key in sorted(list(transfer_propensity_dict.keys())):
|
|
90
|
+
transfer_propensity = transfer_propensity_dict[each_key]
|
|
91
|
+
transfer_propensity = float("{0:.3f}".format(transfer_propensity))
|
|
92
|
+
each_key = each_key.replace(('genome_tree.newick_'), '')
|
|
93
|
+
each_key = each_key.replace('.ufboot', '')
|
|
94
|
+
if len(fun_des_dict) == 0:
|
|
95
|
+
transfer_propensity_txt_handle.write('%s\t%s\n' % (each_key, transfer_propensity))
|
|
96
|
+
else:
|
|
97
|
+
transfer_propensity_txt_handle.write('%s\t%s\t%s\n' % (each_key, transfer_propensity, fun_des_dict.get(each_key, 'na')))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def ALE3(args):
|
|
101
|
+
|
|
102
|
+
uml_rec_dir = args['2']
|
|
103
|
+
gene_presence_cutoff = args['c']
|
|
104
|
+
fun_des_txt = args['a']
|
|
105
|
+
op_dir = args['o']
|
|
106
|
+
force_create_op_dir = args['f']
|
|
107
|
+
|
|
108
|
+
# read in fun_des_txt
|
|
109
|
+
fun_des_dict = dict()
|
|
110
|
+
if fun_des_txt is not None:
|
|
111
|
+
if os.path.isfile(fun_des_txt) is True:
|
|
112
|
+
for each_line in open(fun_des_txt):
|
|
113
|
+
each_line_split = each_line.strip().split('\t')
|
|
114
|
+
fun_des_dict[each_line_split[0]] = each_line_split[1]
|
|
115
|
+
else:
|
|
116
|
+
print('Specified "-a" not found, program exited!')
|
|
117
|
+
exit()
|
|
118
|
+
|
|
119
|
+
SpeciesTreeRef_newick = '%s/SpeciesTreeRef.newick' % op_dir
|
|
120
|
+
TableInfo_tsv = '%s/TableInfo.tsv' % op_dir
|
|
121
|
+
TableEvents_tsv = '%s/TableEvents.tsv' % op_dir
|
|
122
|
+
GeneTrees_nwk = '%s/GeneTrees.nwk' % op_dir
|
|
123
|
+
gene_content_dir = '%s/GeneContent' % op_dir
|
|
124
|
+
gene_content_txt = '%s/GeneContent.txt' % op_dir
|
|
125
|
+
verticality_txt = '%s/Verticality.txt' % op_dir
|
|
126
|
+
transfer_propensity_txt = '%s/Transfer_propensity.txt' % op_dir
|
|
127
|
+
|
|
128
|
+
if os.path.isdir(op_dir) is True:
|
|
129
|
+
if force_create_op_dir is True:
|
|
130
|
+
os.system('rm -r %s' % op_dir)
|
|
131
|
+
else:
|
|
132
|
+
print('Output folder detected, program exited!')
|
|
133
|
+
exit()
|
|
134
|
+
os.system('mkdir %s' % op_dir)
|
|
135
|
+
os.system('mkdir %s' % gene_content_dir)
|
|
136
|
+
|
|
137
|
+
# parsing ALE2 outputs
|
|
138
|
+
print('Parsing ALE2 outputs')
|
|
139
|
+
ale_parser(uml_rec_dir, SpeciesTreeRef_newick, TableInfo_tsv, TableEvents_tsv, GeneTrees_nwk)
|
|
140
|
+
|
|
141
|
+
# get_verticality_and_transfer_propensity
|
|
142
|
+
print('Getting verticality and transfer propensity')
|
|
143
|
+
get_verticality_and_transfer_propensity(TableEvents_tsv, verticality_txt, transfer_propensity_txt, fun_des_dict)
|
|
144
|
+
|
|
145
|
+
# get genome content
|
|
146
|
+
og_set = set()
|
|
147
|
+
branch_to_og_dict = dict()
|
|
148
|
+
col_index = {}
|
|
149
|
+
for each_line in open(TableEvents_tsv):
|
|
150
|
+
each_line_split = each_line.strip().split('\t')
|
|
151
|
+
if each_line.startswith('Family'):
|
|
152
|
+
col_index = {key: i for i, key in enumerate(each_line_split)}
|
|
153
|
+
else:
|
|
154
|
+
gene_family = each_line_split[col_index['Family']]
|
|
155
|
+
gene_family = gene_family.replace(('genome_tree.newick_'), '')
|
|
156
|
+
gene_family = gene_family.replace('.ufboot', '')
|
|
157
|
+
gene_branch = each_line_split[col_index['Branch']]
|
|
158
|
+
gene_presence = float(each_line_split[col_index['presence']])
|
|
159
|
+
if gene_presence >= (gene_presence_cutoff/100):
|
|
160
|
+
og_set.add(gene_family)
|
|
161
|
+
if gene_branch not in branch_to_og_dict:
|
|
162
|
+
branch_to_og_dict[gene_branch] = set()
|
|
163
|
+
branch_to_og_dict[gene_branch].add(gene_family)
|
|
164
|
+
|
|
165
|
+
# write out gene content for each branch
|
|
166
|
+
for each_branch in branch_to_og_dict:
|
|
167
|
+
branch_gene_content = branch_to_og_dict[each_branch]
|
|
168
|
+
|
|
169
|
+
current_gene_content_txt = '%s/%s.txt' % (gene_content_dir, each_branch)
|
|
170
|
+
current_gene_content_txt_handle = open(current_gene_content_txt, 'w')
|
|
171
|
+
for each_gene in sorted(list(branch_gene_content)):
|
|
172
|
+
if len(fun_des_dict) == 0:
|
|
173
|
+
current_gene_content_txt_handle.write('%s\n' % each_gene)
|
|
174
|
+
else:
|
|
175
|
+
current_gene_content_txt_handle.write('%s\t%s\n' % (each_gene, fun_des_dict.get(each_gene, 'na')))
|
|
176
|
+
current_gene_content_txt_handle.close()
|
|
177
|
+
|
|
178
|
+
og_list_sorted = sorted(list(og_set))
|
|
179
|
+
|
|
180
|
+
gene_content_txt_handle = open(gene_content_txt, 'w')
|
|
181
|
+
gene_content_txt_handle.write('Branch\t' + '\t'.join(og_list_sorted) + '\n')
|
|
182
|
+
for each_gnm in sorted(list(branch_to_og_dict.keys())):
|
|
183
|
+
og_pa_list = [each_gnm]
|
|
184
|
+
for each_og in og_list_sorted:
|
|
185
|
+
if each_og in branch_to_og_dict[each_gnm]:
|
|
186
|
+
og_pa_list.append('1')
|
|
187
|
+
else:
|
|
188
|
+
og_pa_list.append('0')
|
|
189
|
+
gene_content_txt_handle.write('\t'.join(og_pa_list) + '\n')
|
|
190
|
+
gene_content_txt_handle.close()
|
|
191
|
+
|
|
192
|
+
print('Protein families in GeneContent.txt: %s' % len(og_list_sorted))
|
|
193
|
+
print('Genomes/branches in GeneContent.txt: %s' % len(branch_to_og_dict))
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
if __name__ == '__main__':
|
|
197
|
+
|
|
198
|
+
ALE3_parser = argparse.ArgumentParser()
|
|
199
|
+
ALE3_parser.add_argument('-2', required=True, help='Folder with the uml_rec files')
|
|
200
|
+
ALE3_parser.add_argument('-c', required=False, type=float, default=75, help='gene family presence cutoff in percentage, default: 75')
|
|
201
|
+
ALE3_parser.add_argument('-a', required=False, default=None, help='OG functional description')
|
|
202
|
+
ALE3_parser.add_argument('-o', required=True, help='output dir')
|
|
203
|
+
ALE3_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
|
|
204
|
+
args = vars(ALE3_parser.parse_args())
|
|
205
|
+
ALE3(args)
|