treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,256 @@
1
+
2
+ ################################################################################
3
+
4
+ # R script for ranking marker proteins for generating concatenated species trees (127 taxa set)
5
+ # Dombrowski et al., 2020
6
+ # Finalized: February 2020
7
+
8
+ # modified by Weizhi
9
+ # Rscript /Users/songweizhi/PycharmProjects/Sponge_Hologenome/Scripts/TaxaCountStats.R -t treefile_v2.tre -l List_of_trees_2.txt -g mapping_3.txt -x MarkerList.txt -s TaxaCounts_op.txt -r Genes_to_remove.txt -o a.txt
10
+
11
+ ################################################################################
12
+
13
+ suppressMessages(library(optparse))
14
+ suppressMessages(library(plyr))
15
+ suppressMessages(library(dbplyr))
16
+ suppressMessages(library(dplyr))
17
+ suppressMessages(library(tidyr))
18
+ suppressMessages(library(ggplot2))
19
+ suppressMessages(library(data.table))
20
+ suppressMessages(library(RColorBrewer))
21
+ suppressMessages(library(gplots))
22
+ suppressMessages(library(ape))
23
+
24
+ ####################################### argument parser ######################################
25
+
26
+ option_list = list(
27
+ make_option(c("-t", "--tree"), type="character", help="combined_contree_file"),
28
+ make_option(c("-l", "--treelist"), type="character", help="list_of_trees_txt"),
29
+ make_option(c("-g", "--mapping"), type="character", help="mapping_txt"),
30
+ make_option(c("-x", "--markerlist"), type="character", help="marker_list_txt"),
31
+ make_option(c("-s", "--cstop"), type="character", help="combined_count_sister_taxa_output"),
32
+ make_option(c("-r", "--removegene"), type="character", help="genes_to_remove_txt"),
33
+ make_option(c("-o", "--output"), type="character", help="output table"));
34
+
35
+ opt_parser = OptionParser(option_list=option_list);
36
+ opt = parse_args(opt_parser);
37
+
38
+ combined_contree_file = opt$tree
39
+ list_of_trees_txt = opt$treelist
40
+ mapping_txt = opt$mapping
41
+ marker_list_txt = opt$markerlist
42
+ combined_count_sister_taxa_op = opt$cstop
43
+ genes_to_remove_txt = opt$removegene
44
+ output_table = opt$output
45
+
46
+ ################################################################################
47
+
48
+ #rm(list=ls())
49
+ sessionInfo()
50
+
51
+ ################################################################################
52
+ #0.1 setting working directory (!adjust wdir accordingly!)
53
+ ################################################################################
54
+
55
+ # setting working directory (!adjust wdir accordingly!)
56
+ #wdir <- "/Users/songweizhi/Desktop/Anja_paper/Nina/4_151Marker_analyses/127_taxa"
57
+ #wdir <- "/Users/songweizhi/Desktop/Input_folder_to_R"
58
+ #setwd(wdir)
59
+
60
+ # Weizhi
61
+ # List_of_trees_2.txt id of marker genes (HOGs) (152, mind order)
62
+ # treefile_v2.tre tree corresponding to each marker gene (HOG)
63
+ # mapping_3.txt taxonomy/cluster/group/color for each genome (Domain is higher than cluster)
64
+ # Genes_to_remove.txt basically a list of marker gene ids
65
+ # MarkerList.txt basically a list of marker gene ids (152, mind order)
66
+ # TaxaCounts_151MarkerGenes_ArcRefv5UAP2_129taxa_v5.txt concatenated output from count_sister_taxa.py
67
+
68
+ ################################################################################
69
+ #1. read in the treefiles (all concatenated in one large document)
70
+ ################################################################################
71
+ #read in concatenated tree and list of trees
72
+ tree_order <- read.table(list_of_trees_txt, sep="\t", header=F, fill=TRUE, quote = "")
73
+
74
+ trees<-read.tree(combined_contree_file)
75
+
76
+
77
+ #make a table from taxa labels and count how many taxa are in each tree
78
+ y <- c()
79
+ for(i in 1:length(trees)){
80
+ x <- length(trees[[i]]$tip.label)
81
+ y <- rbind (y,x)
82
+ }
83
+
84
+ Species_in_tree <- as.data.frame(y)
85
+ rownames(Species_in_tree) <- as.character(tree_order$V1)
86
+ colnames(Species_in_tree) <- "NrSpecies"
87
+ #head(Species_in_tree)
88
+
89
+ ################################################################################
90
+ #2. read in taxa mapping files and stats from tom's script
91
+ ################################################################################
92
+
93
+ #taxa - taxonomy mapping file
94
+ mapping <- read.table(mapping_txt, sep="\t", header=T, fill=TRUE, quote = "", comment.char = "", check.names = FALSE)
95
+ #head(mapping)
96
+
97
+ #cleanup and shorten mapping file
98
+ mapping_clean <-unique(mapping[,c("Cluster", "Domain")])
99
+ #head(mapping_clean)
100
+ #dim(mapping_clean)
101
+
102
+ #list of genes to remove because in these trees archaea were not monphyletic
103
+ genes_to_remove <- read.table(genes_to_remove_txt, sep="\t", header=T, fill=TRUE, quote = "")
104
+
105
+ #list of total markers used in these analyses
106
+ MarkerList <- read.table(marker_list_txt, sep="\t", header=T, fill=TRUE, quote = "")
107
+
108
+ #read in statistics file from count_sister_taxa.py
109
+ SisterCounts <- read.table(combined_count_sister_taxa_op, sep="\t", header=T, fill=TRUE, quote = "")
110
+ #colnames(SisterCounts)
111
+
112
+ ################################################################################
113
+ #3. transform data
114
+ ################################################################################
115
+ #reduce table on and remove hits with low support (0.1 in this case but can be changed)
116
+ SisterCounts_temp0 <- subset(SisterCounts, Normalized2_sum_of_occurances >= 0.1)
117
+
118
+ #control, whether something is missing in the mapping file, the first setdiff is the relevant one!
119
+ List_taxa <- as.character(unique(SisterCounts_temp0$Group_of_interest))
120
+ #setdiff(List_taxa, mapping$Cluster)
121
+
122
+ #add in Group Info (i.e. DPANN, Eury, TACK) for Group of interest (needed to define HGT events)
123
+ SisterCounts_temp1 <- merge(SisterCounts_temp0, mapping_clean, by.x = "Group_of_interest", by.y = "Cluster", all.x = T)
124
+ colnames(SisterCounts_temp1) <- c( "Group_of_interest", "MarkerID", "Sister_taxa", "Normalized_sum_of_occurances","splits","Normalized2_sum_of_occurances","Clusters", "Group_of_interest_Group")
125
+ #head(SisterCounts_temp1)
126
+
127
+ #add in Group Info (i.e. DPANN, Eury, TACK) for Sister_taxa (needed to define HGT events)
128
+ SisterCounts_temp2 <- merge(SisterCounts_temp1, mapping_clean, by.x = "Sister_taxa", by.y = "Cluster", all.x = T)
129
+ colnames(SisterCounts_temp2) <- c( "Sister_taxa", "Group_of_interest", "MarkerID","Normalized_sum_of_occurances", "splits","Normalized2_sum_of_occurances","Clusters","Group_of_interest_Group", "Sister_taxon_Group")
130
+ #head(SisterCounts_temp2)
131
+
132
+ #resort dataframe for aesthetics
133
+ SisterCounts_temp3 <- SisterCounts_temp2[,c("MarkerID","Group_of_interest", "Group_of_interest_Group", "Sister_taxa", "Sister_taxon_Group", "Normalized_sum_of_occurances","splits", "Normalized2_sum_of_occurances","Clusters")]
134
+ #head(SisterCounts_temp3)
135
+
136
+ #count nr of total splits
137
+ SisterCounts_temp4 <- cbind(SisterCounts_temp3, count.fields(textConnection(as.character(SisterCounts_temp3$Clusters)), sep = ","))
138
+ #head(SisterCounts_temp4)
139
+
140
+ #make new column and make a remark whether clusters of interest are split or not
141
+ #Notice: The column "Clusters" lists if there is a split (i.e. if UAP2 12 then all 12 MAGs are together, if UAP2 has 8,4 then UAP2 is split once with one cluster with 8 and the other with 4 taxa)
142
+ SisterCounts_temp4$SplitGroups<- ifelse(grepl(",",SisterCounts_temp3$Clusters), "split", "no")
143
+ #head(SisterCounts_temp4)
144
+
145
+ #rename a column for better readability
146
+ names(SisterCounts_temp4)[names(SisterCounts_temp4) == "count.fields(textConnection(as.character(SisterCounts_temp3$Clusters)), "] <- 'NrSplits'
147
+ #head(SisterCounts_temp4)
148
+
149
+ #remove dublicates and keep the Group_of_interest with the best Normalized2_sum_of_occurances value
150
+ #this is done to only have one hits per arcog and group of interest to better normalize the data by the total nr of arcogs and do have a consistent link to the total number of phylogenetic clusters
151
+ SisterCounts_best <-
152
+ SisterCounts_temp4 %>%
153
+ group_by(MarkerID,Group_of_interest) %>%
154
+ filter(Normalized2_sum_of_occurances == max(Normalized2_sum_of_occurances))
155
+
156
+ #count nr of taxonomic groups (i.e. clusters) in each tree
157
+ Nr_clusters <- Number_of_taxa <- ddply(SisterCounts_best, .(MarkerID), summarize, NrClusters = length(Group_of_interest))
158
+ #head(Nr_clusters)
159
+
160
+ #merge nr of clusters and nr of species with datatable
161
+ SisterCounts_best_temp1 <- merge(SisterCounts_best, Nr_clusters, by = "MarkerID")
162
+ SisterCounts_best_temp2 <- merge(SisterCounts_best_temp1,Species_in_tree, by.x = "MarkerID", by.y = "row.names" )
163
+ #head(SisterCounts_best_temp2)
164
+
165
+ #print table
166
+ # write.table(SisterCounts_best_temp1, "2_Output/Taxa_Summary_1.txt", sep = "\t", row.names = F, quote =F)
167
+
168
+ ################################################################################
169
+ #4. summarize split events to be able to rank marker genes
170
+ ################################################################################
171
+ #summarize splits/cluster
172
+ Split_counts <- ddply(SisterCounts_best_temp1, .(MarkerID,SplitGroups, NrClusters), summarise, quantity = length(NrSplits))
173
+ Split_counts_wide <- spread(Split_counts, SplitGroups, quantity)
174
+
175
+ #make new column to calulate the percentage of split clusters
176
+ Split_counts_wide$SplitsPerCluster <- round((Split_counts_wide$split/Split_counts_wide$NrClusters)*100, digits = 1)
177
+ #head(Split_counts_wide)
178
+
179
+ #summarize and coun the total number of splits
180
+ Split_Total <- ddply(SisterCounts_best_temp1, .(MarkerID, NrClusters), summarise, TotalSplits = sum(NrSplits))
181
+ #head(Split_Total)
182
+
183
+ #combine the two dataframes generated above
184
+ #Summary_temp1 <- merge(Split_counts_wide[,c("MarkerID","NrClusters","SplitsPerCluster")],HGT_Counts, by = "MarkerID" )
185
+ Summary_temp2 <- merge(Split_counts_wide[,c("MarkerID","NrClusters","SplitsPerCluster")], Split_Total, by = "MarkerID")
186
+ Summary_temp3 <- merge(Summary_temp2, Species_in_tree, by.x = "MarkerID", by.y = "row.names")
187
+ Summary_temp3$TotalSplits_to_Species <- round((Summary_temp3$TotalSplits/Summary_temp3$NrSpecies)*100, digits = 1)
188
+ #head(Summary_temp3)
189
+
190
+ #subset to only print relevant info
191
+ Summary_temp4 <- Summary_temp3[,c("MarkerID","NrSpecies", "NrClusters.x", "SplitsPerCluster","TotalSplits", "TotalSplits_to_Species" )]
192
+ #head(Summary_temp4)
193
+
194
+ #if genes were lost already during the tree building step, add that info in
195
+ Summary_temp5 <- merge(MarkerList, Summary_temp4, by = "MarkerID", all.x = T)
196
+ #head(Summary_temp5)
197
+
198
+ ################################################################################
199
+ #5. find highest/lowest 25/50% ranking markers
200
+ ################################################################################
201
+ #make vector of genes that are not good marker genes based on literature and that are not monophyletic
202
+ genes_to_remove_vector <- as.character(genes_to_remove$MarkerID)
203
+ #genes_to_remove_vector
204
+
205
+ #remove genes from dataframe
206
+ Stats_temp1A <- Summary_temp4[ ! Summary_temp4$MarkerID %in% genes_to_remove_vector, ]
207
+ #dim(Summary_temp4)
208
+ #dim(Stats_temp1A)
209
+
210
+ #define a cutoff to remove gene trees that have less than 50% of the species as we do not want to use these genes for concatenations
211
+ cutoff <- mean(Stats_temp1A$NrSpecies)/2
212
+ #cutoff
213
+
214
+ #remove genes that have few species
215
+ Stats_temp1B <- subset(Stats_temp1A, Stats_temp1A[ , "NrSpecies"] > cutoff)
216
+ #dim(Stats_temp1A)
217
+ #dim(Stats_temp1B)
218
+
219
+ #rank according to the split clusters in percentage from 1 to xx (lowest/best value = lowest nr) = RankA
220
+ Stats_temp2 <- Stats_temp1B %>% mutate(RankA = rank(SplitsPerCluster, ties.method = 'first'))
221
+
222
+ #rank according to the total splits normalized by the total number of species from 1 to xx (lowest value = lowest nr)
223
+ Stats_temp3 <- Stats_temp2 %>% mutate(RankB = rank(TotalSplits_to_Species, ties.method = 'first'))
224
+
225
+ #combine RankA and RankB to get the best for each method
226
+ Stats_temp3$RankA_B <- Stats_temp3$RankA+Stats_temp3$RankB
227
+ #dim(Stats_temp3)
228
+
229
+ #define the concatenated marker sets and create vectors
230
+ nr_genes <- length(Stats_temp3$MarkerID)
231
+ cutoff_25perc <- round(nr_genes/4, digits = 0)
232
+ cutoff_50perc<- round(nr_genes/2, digits = 0)
233
+ #cutoff_25perc
234
+ #cutoff_50perc
235
+
236
+ #subset the tables for the different cutoffs
237
+ best_50perc <- as.data.frame(Stats_temp3 %>% top_n(-cutoff_50perc, RankA_B))
238
+ Stats_temp3$best_50perc <- best_50perc$MarkerID[match(Stats_temp3$MarkerID, best_50perc$MarkerID)]
239
+
240
+ best_25perc <- Stats_temp3 %>% top_n(-cutoff_25perc, RankA_B)
241
+ Stats_temp3$best_25perc <- best_25perc$MarkerID[match(Stats_temp3$MarkerID, best_25perc$MarkerID)]
242
+
243
+ worst_50perc <- Stats_temp3 %>% top_n(cutoff_50perc, RankA_B)
244
+ Stats_temp3$worst_50perc <- worst_50perc$MarkerID[match(Stats_temp3$MarkerID, worst_50perc$MarkerID)]
245
+
246
+ worst_25perc <- Stats_temp3 %>% top_n(cutoff_25perc, RankA_B)
247
+ Stats_temp3$worst_25perc <- worst_25perc$MarkerID[match(Stats_temp3$MarkerID, worst_25perc$MarkerID)]
248
+
249
+ Stats_temp3$FullSet <- Stats_temp3$MarkerID
250
+
251
+ #merge with original table (to keep the statistics)
252
+ Stats_temp4 <- merge(Summary_temp5, Stats_temp3[,c("MarkerID", "RankA", "RankB", "RankA_B", "FullSet", "best_50perc", "best_25perc", "worst_50perc","worst_25perc")], by = "MarkerID", all.x = T)
253
+
254
+ #print
255
+ write.table(Stats_temp4, output_table, sep = "\t", row.names = F, quote =F, na = "")
256
+
TreeSAK/TaxonTree.py ADDED
@@ -0,0 +1,47 @@
1
+ import argparse
2
+ from ete3 import Tree
3
+
4
+
5
+ TaxonTree_usage = '''
6
+ ================================ TaxonTree example commands ================================
7
+
8
+ TreeSAK TaxonTree -i ar53_r220.tree -tax o__Nitrososphaerales -o o__Nitrososphaerales.tree
9
+
10
+ ============================================================================================
11
+ '''
12
+
13
+
14
+ def TaxonTree(args):
15
+
16
+ tree_file_in = args['i']
17
+ interested_taxon = args['tax']
18
+ tree_file_out = args['o']
19
+
20
+ input_tree = Tree(tree_file_in, quoted_node_names=True, format=1)
21
+
22
+ matched_node_list = []
23
+ for node in input_tree.traverse():
24
+ if (node.name == interested_taxon) or (interested_taxon in node.name):
25
+ matched_node_list.append(node.name)
26
+
27
+ if len(matched_node_list) == 1:
28
+ for node in input_tree.traverse():
29
+ if node.name in matched_node_list:
30
+ node.write(outfile=tree_file_out)
31
+ else:
32
+ print('There are multiple matched nodes. program exited!')
33
+ print('Matched nodes: %s' % ','.join(matched_node_list))
34
+ exit()
35
+
36
+ print('Subset tree exported to: %s' % tree_file_out)
37
+ print('Done!')
38
+
39
+
40
+ if __name__ == '__main__':
41
+
42
+ TaxonTree_parser = argparse.ArgumentParser()
43
+ TaxonTree_parser.add_argument('-i', required=True, help='input tree file')
44
+ TaxonTree_parser.add_argument('-tax', required=True, help='interested taxon')
45
+ TaxonTree_parser.add_argument('-o', required=True, help='output tree file')
46
+ args = vars(TaxonTree_parser.parse_args())
47
+ TaxonTree(args)
@@ -0,0 +1,32 @@
1
+ import os
2
+
3
+ # extract path to the config file
4
+ pwd_config_file = os.path.realpath(__file__)
5
+ config_file_path = '/'.join(pwd_config_file.split('/')[:-1])
6
+
7
+ # specify full path to corresponding executables at the right side of colon
8
+ config_dict = {'config_file_path' : config_file_path,
9
+ 'prodigal' : 'prodigal',
10
+ 'hmmsearch' : 'hmmsearch',
11
+ 'hmmfetch' : 'hmmfetch',
12
+ 'hmmalign' : 'hmmalign',
13
+ 'hmmstat' : 'hmmstat',
14
+ 'mafft' : 'mafft',
15
+ 'bowtie2' : 'bowtie2',
16
+ 'bowtie2_build' : 'bowtie2-build',
17
+ 'blastp' : 'blastp',
18
+ 'blastn' : 'blastn',
19
+ 'makeblastdb' : 'makeblastdb',
20
+ 'fasttree' : 'FastTree',
21
+ 'ranger_mac' : '%s/Ranger-DTL-Dated.mac' % config_file_path,
22
+ 'ranger_linux' : '%s/Ranger-DTL-Dated.linux' % config_file_path,
23
+ 'path_to_hmm' : '%s/MetaCHIP_phylo.hmm' % config_file_path,
24
+ 'circos_HGT_R' : '%s/MetaCHIP_circos_HGT.R' % config_file_path,
25
+ 'VisHPD95_R' : '%s/VisHPD95.R' % config_file_path,
26
+ 'label_tree_R' : '%s/label_tree.R' % config_file_path,
27
+ 'cdd2cog_perl' : '%s/cdd2cog.pl' % config_file_path,
28
+ 'get_sankey_plot_R' : '%s/get_sankey_plot.R' % config_file_path,
29
+ 'compare_trees_R' : '%s/compare_trees.R' % config_file_path,
30
+ 'ko00001_keg' : '%s/ko00001.keg' % config_file_path,
31
+ 'MetaCyc_rxns_with_ec' : '%s/MetaCyc_reactions_with_ec.txt' % config_file_path
32
+ }
TreeSAK/VERSION ADDED
@@ -0,0 +1,164 @@
1
+ 1.53.3
2
+ - fixed bugs
3
+
4
+ 1.53.0
5
+ - new module added: batch_itol
6
+
7
+ 1.52.0
8
+ - new module added: filter_rename_ar53
9
+
10
+ 1.51.0
11
+ - new module added: iTOL_msa_stats
12
+
13
+ 1.50.0
14
+ - new module added: RootTreeGTDB226
15
+
16
+ 1.49.0
17
+ - new module added: mcmctree_vs_reltime
18
+
19
+ 1.48.0
20
+ - new module added: parse_reltime
21
+
22
+ 1.47.0
23
+ - new module added: GeneTree
24
+
25
+ 1.46.0
26
+ - new module added: cogTree
27
+
28
+ 1.45.0
29
+ - new module added: iTOL_gene_tree
30
+
31
+ 1.44.0
32
+ - new module added: koTree
33
+
34
+ 1.43.0
35
+ - new module added: ALE7
36
+
37
+ 1.42.0
38
+ - new module added: mcmc2tree
39
+
40
+ 1.41.0
41
+ - new module added: mcmcTC
42
+
43
+ 1.40.0
44
+ - new module added: TaxonTree
45
+
46
+ 1.39.0
47
+ - new module added: supertree
48
+
49
+ 1.38.0
50
+ - new module added: pruneMSA
51
+
52
+ 1.37.0
53
+ - new module added: recode
54
+
55
+ 1.36.0
56
+ - new module added: gap_stats
57
+
58
+ 1.35.0
59
+ - new module added: AssessPB
60
+
61
+ 1.34.0
62
+ - new module added: PB
63
+
64
+ 1.33.0
65
+ - new module added: RootTreeGTDB214 and RootTreeGTDB220
66
+
67
+ 1.32.0
68
+ - new module added: replace_clade
69
+
70
+ 1.31.0
71
+ - new module added: PhyloBiAssoc
72
+
73
+ 1.30.0
74
+ - new module added: ALE6
75
+
76
+ 1.29.0
77
+ - new module added: LcaToLeaves
78
+
79
+ 1.28.0
80
+ - new module added: SingleAleHGT
81
+
82
+ 1.27.0
83
+ - new module added: ConcateMSA
84
+
85
+ 1.26.0
86
+ - new module added: pRTC
87
+
88
+ 1.25.0
89
+ - new module added: BMGE
90
+
91
+ 1.24.0
92
+ - new module added: AlignmentPruner
93
+
94
+ 1.23.0
95
+ - new module added: RootTree
96
+
97
+ 1.22.0
98
+ - new module added: OMA2
99
+
100
+ 1.21.0
101
+ - new module added: print_leaves
102
+
103
+ 1.20.0
104
+ - new module added: ALE1, ALE2, ALE3, ALE4
105
+
106
+ 1.19.0
107
+ - new module added: OMA
108
+
109
+ 1.18.0
110
+ - new module added: ExtractMarkerSeq
111
+
112
+ 1.17.0
113
+ - new module added: MarkerSeq2Tree
114
+
115
+ 1.16.0
116
+ - new module added: SplitScoreStep1 and SplitScoreStep2
117
+
118
+ 1.15.0
119
+ - new module added: SingleLinePhy
120
+
121
+ 1.14.0
122
+ - new module added: PMSF
123
+
124
+ 1.13.0
125
+ - new module added: VisHPD95
126
+
127
+ 1.12.0
128
+ - new module added: PlotMcmcNode
129
+
130
+ 1.11.0
131
+ - new module added: CompareMCMC
132
+
133
+ 1.10.0
134
+ - new module added: fa2phy
135
+
136
+ 1.9.0
137
+ - new module added: Dating
138
+
139
+ 1.8.0
140
+ - new module added: AssessMarkerDeltaLL
141
+
142
+ 1.7.0
143
+ - new module added: AssessMarkerPA
144
+
145
+ 1.6.0
146
+ - new module added: Marker2Tree
147
+
148
+ 1.5.0
149
+ - new module added: SliceMSA
150
+
151
+ 1.4.0
152
+ - new module added: ConvertMSA
153
+
154
+ 1.3.0
155
+ - new module added: get_arCOG_seq
156
+
157
+ 1.2.0
158
+ - new module added: parse_deltall_stdout
159
+
160
+ 1.1.0
161
+ - new module added: AssessCVG
162
+
163
+ 1.0.0
164
+ - initial release
TreeSAK/VisHPD95.R ADDED
@@ -0,0 +1,45 @@
1
+ library(ggplot2)
2
+ library(optparse)
3
+
4
+
5
+ plot_grouped_HPD95 <- function(data_file, plot_width, plot_height, plot_file){
6
+
7
+ dat <- read.table(data_file, header = T)
8
+
9
+ ggplot(dat, aes(x = Var, y = Mean, ymin = Low, ymax = High)) +
10
+ geom_pointrange(aes(col = factor(Test), shape=factor(Shape)),
11
+ position=position_dodge(width=0.6), # controls distance between groups
12
+ linewidth = 0.9, # line width
13
+ size=0.75) + # size of shape
14
+ theme_bw() + # remove background
15
+ theme(panel.grid.major=element_blank(), # remove grid
16
+ panel.grid.minor=element_blank()) + # remove grid
17
+ xlab("") + # x-axis label text
18
+ ylab("95% HPD CI") + # y-axis label text
19
+ theme(axis.text.x=element_text(size=12, color='black', angle=30, hjust=1), # x-axis label, rotate at an angle of 45
20
+ axis.text.y=element_text(size=12, color='black'), # y-axis label
21
+ legend.text=element_text(size=10)) + # legend label
22
+ scale_color_discrete(name="Color") + # customize color legend, title
23
+ guides(color=guide_legend(override.aes=list(linetype=0))) + # customize color legend
24
+ scale_shape_discrete(name="Shape") + # customize color legend, title
25
+ guides(shape=guide_legend(override.aes=list(linetype=0, color='grey'))) # customize color legend,
26
+
27
+ # write to file
28
+ ggsave(plot_file, width=plot_width, height=plot_height, dpi=300)
29
+ }
30
+
31
+
32
+ option_list = list(
33
+ make_option(c("-i", "--datain"), type="character", default=NULL, help="input data matrix"),
34
+ make_option(c("-x", "--width"), type="double", default=8, help="plot width"),
35
+ make_option(c("-y", "--height"), type="double", default=5, help="plot height"),
36
+ make_option(c("-o", "--plotout"), type="character", default=NULL, help="output plot"));
37
+
38
+ opt_parser = OptionParser(option_list=option_list);
39
+ opt = parse_args(opt_parser);
40
+ data_matrix_txt = opt$datain
41
+ plot_width = opt$width
42
+ plot_height = opt$height
43
+ output_plot = opt$plotout
44
+
45
+ plot_grouped_HPD95(data_matrix_txt, plot_width, plot_height, output_plot)