treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/pruneMSA.py ADDED
@@ -0,0 +1,63 @@
1
+ import os
2
+ import argparse
3
+
4
+
5
+ pruneMSA_usage = '''
6
+ ==================== pruneMSA example commands ====================
7
+
8
+ # Dependencies: perl and alignment_pruner.pl
9
+
10
+ TreeSAK pruneMSA -i input_msa.fasta -c 10
11
+ TreeSAK pruneMSA -i input_msa.fasta -c 5,10,20,30,40
12
+
13
+ Note:
14
+ 1. This is a wrapper for alignment_pruner.pl (--chi2_prune mode)
15
+ 2. For details: https://doi.org/10.1038/s41467-020-17408-w
16
+
17
+ ===================================================================
18
+ '''
19
+
20
+
21
+ def sep_path_basename_ext(file_in):
22
+
23
+ f_path, f_name = os.path.split(file_in)
24
+ if f_path == '':
25
+ f_path = '.'
26
+ f_base, f_ext = os.path.splitext(f_name)
27
+
28
+ return f_name, f_path, f_base, f_ext[1:]
29
+
30
+
31
+ def pruneMSA(args):
32
+
33
+ msa_in = args['i']
34
+ conserved_cutoffs = args['c']
35
+
36
+ _, msa_path, msa_base, msa_ext = sep_path_basename_ext(msa_in)
37
+
38
+ current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
39
+ alignment_pruner_pl = '%s/alignment_pruner.pl' % current_file_path
40
+ cutoff_list = conserved_cutoffs.split(',')
41
+
42
+ op_file_list = []
43
+ for each_cutoff in cutoff_list:
44
+ cutoff_formatted = str(float(each_cutoff)/100)
45
+ current_msa_out = '%s/%s_chi2p%s.%s' % (msa_path, msa_base, each_cutoff, msa_ext)
46
+ perl_cmd = 'perl %s --file %s --chi2_prune f%s > %s' % (alignment_pruner_pl, msa_in, cutoff_formatted, current_msa_out)
47
+ perl_cmd_to_print = 'perl %s --file %s --chi2_prune f%s > %s' % ('alignment_pruner.pl', msa_in, cutoff_formatted, current_msa_out)
48
+ op_file_list.append(current_msa_out)
49
+ print(perl_cmd_to_print)
50
+ os.system(perl_cmd)
51
+
52
+ # report
53
+ print('Pruned MSA exported to:')
54
+ print('\n'.join(op_file_list))
55
+
56
+
57
+ if __name__ == '__main__':
58
+
59
+ pruneMSA_parser = argparse.ArgumentParser()
60
+ pruneMSA_parser.add_argument('-i', required=True, help='input MSA file')
61
+ pruneMSA_parser.add_argument('-c', required=True, help='conservation cutoffs, comma separated')
62
+ args = vars(pruneMSA_parser.parse_args())
63
+ pruneMSA(args)
TreeSAK/recode.py ADDED
@@ -0,0 +1,73 @@
1
+ import argparse
2
+ from Bio import SeqIO
3
+
4
+
5
+ recode_usage = '''
6
+ ============================ recode example commands ============================
7
+
8
+ TreeSAK recode -i msa.fa -m s4 -o recoded_msa_SR4.fa
9
+ TreeSAK recode -i msa.fa -m d4 -o recoded_msa_Dayhoff4.fa
10
+ TreeSAK recode -i msa.fa -m d6 -o recoded_msa_Dayhoff6.fa
11
+
12
+ Note:
13
+ This script is modified based on the Recode_aa.py from Anja Spang.
14
+ It was used to recode AA to SR4 (s4), Dayhoff4 (d4) or Dayhoff6 (d6) categories.
15
+ Please refer to https://doi.org/10.1038/s41467-020-17408-w for details.
16
+
17
+ Recoding schemes
18
+ 1. Selenocysteine will be recoded to a gap.
19
+ 2. s4: A,G,N,P,S,T = A; C,H,W,Y = C; D,E,K,Q,R = G; F,I,L,M,V = T
20
+ 3. d4: A,G,P,S,T = A; D,E,N,Q = D; H,K,R = H; F,Y,W,I,L,M,V = F; C = ?
21
+ 4. d6: A,G,P,S,T = A; D,E,N,Q = D; H,K,R = H; F,Y,W = F; I,L,M,V = I; C = C
22
+
23
+ =================================================================================
24
+ '''
25
+
26
+
27
+ def recode(args):
28
+
29
+ msa_in = args['i']
30
+ msa_out = args['o']
31
+ category = args['m']
32
+
33
+ DH6 = {'-': '-', 'A': 'A', 'G': 'A', 'P': 'A', 'S': 'A', 'T': 'A', 'D': 'D', 'E': 'D', 'N': 'D', 'Q': 'D', 'H': 'H', 'K': 'H', 'R': 'H', 'F': 'F', 'Y': 'F', 'W': 'F', 'I': 'I', 'L': 'I', 'M': 'I', 'V': 'I', 'C': 'C'}
34
+ DH4 = {'-': '-', 'A': 'A', 'G': 'A', 'P': 'A', 'S': 'A', 'T': 'A', 'D': 'D', 'E': 'D', 'N': 'D', 'Q': 'D', 'H': 'H', 'K': 'H', 'R': 'H', 'F': 'F', 'Y': 'F', 'W': 'F', 'I': 'F', 'L': 'F', 'M': 'F', 'V': 'F', 'C': '-'}
35
+ SR4 = {'-': '-', 'A': 'A', 'G': 'A', 'N': 'A', 'P': 'A', 'S': 'A', 'T': 'A', 'C': 'C', 'H': 'C', 'W': 'C', 'Y': 'C', 'D': 'G', 'E': 'G', 'K': 'G', 'Q': 'G', 'R': 'G', 'F': 'T', 'I': 'T', 'L': 'T', 'M': 'T', 'V': 'T'}
36
+
37
+ msa_out_handle = open(msa_out, 'w')
38
+ for seq_record in SeqIO.parse(msa_in, "fasta"):
39
+ header = str(seq_record.description).strip()
40
+ seq = str(seq_record.seq).strip()
41
+ new_seq = ''
42
+ for item in seq:
43
+ if category in ['D6', 'd6']:
44
+ if item in DH6:
45
+ new_seq = new_seq + str(DH6.get(item))
46
+ else:
47
+ new_seq = new_seq + str('-')
48
+ elif category in ['D4', 'd4']:
49
+ if item in DH4:
50
+ new_seq = new_seq + str(DH4.get(item))
51
+ else:
52
+ new_seq = new_seq + str('-')
53
+ elif category in ['S4', 's4']:
54
+ if item in SR4:
55
+ new_seq = new_seq + str(SR4.get(item))
56
+ else:
57
+ new_seq = new_seq + str('-')
58
+ else:
59
+ print('Please choose recoding scheme from d4, d6 and s4, program exited!')
60
+ exit()
61
+
62
+ msa_out_handle.write(">%s\n%s\n" % (header, new_seq))
63
+ msa_out_handle.close()
64
+
65
+
66
+ if __name__ == '__main__':
67
+
68
+ recode_parser = argparse.ArgumentParser(description='Recode amino acids to Dayoff 4, Dayoff 6 or SR4 categories')
69
+ recode_parser.add_argument('-i', required=True, help='input file')
70
+ recode_parser.add_argument('-m', required=True, help='recoding scheme, choose from d4, d6 or s4')
71
+ recode_parser.add_argument('-o', required=True, help='output file')
72
+ args = vars(recode_parser.parse_args())
73
+ recode(args)
TreeSAK/remove_bias.R ADDED
@@ -0,0 +1,112 @@
1
+ library(Biostrings)
2
+ options(digits=8)
3
+ protein <- readAAStringSet("/lomi_home/wenxiu/RIS_virus/workdir/27_ortho_RIStree/ortho_tree/v1/tree_workdir/test.aln")
4
+ outgroup <- c()
5
+ protein <- protein[setdiff(names(protein),outgroup)]
6
+ library(stringr)
7
+ chi2_2 <- function(set,taxa_number,all_amino,amino_number){
8
+ O <- str_count(set[[taxa_number]],all_amino[amino_number])##某个物种中(已经除去一个位置的氨基酸之后)的全部氨基酸
9
+ E <- sum(str_count(set,all_amino[amino_number]))/sum(nchar(str_replace_all(set,"-","")))*nchar(str_replace_all(set[[taxa_number]],"-",""))###
10
+ return((O-E)^2/E)
11
+ }
12
+
13
+ chi2_1 <- function(set,taxa_number){
14
+ all_amino <- unique(strsplit(as.character(str_replace_all(set,"-","")),"")[[1]])
15
+ return(sum(sapply(1:length(all_amino),chi2_2,taxa_number=taxa_number,set=set,all_amino=all_amino)))
16
+ }
17
+
18
+ untrim <- sum(sapply(1:length(protein),chi2_1,set=protein))
19
+
20
+ #13746.9
21
+
22
+ trimmed_chi2 <- function(x){
23
+ library(Biostrings)
24
+ protein <- readAAStringSet("/lomi_home/wenxiu/RIS_virus/workdir/27_ortho_RIStree/ortho_tree/v1/tree_workdir/test.aln")
25
+ outgroup <- c()
26
+ protein <- protein[setdiff(names(protein),outgroup)]
27
+ library(stringr)
28
+ chi2_2 <- function(set,taxa_number,all_amino,amino_number){
29
+ O <- str_count(set[[taxa_number]],all_amino[amino_number])#某一个氨基酸(传进来的)在这个物种出现的频次
30
+ E <- sum(str_count(set,all_amino[amino_number]))/sum(nchar(str_replace_all(set,"-","")))*nchar(str_replace_all(set[[taxa_number]],"-",""))##氨基酸出现在配对中出现的的总次数/总氨基酸数*这个序列的氨基酸数
31
+ return((O-E)^2/E)
32
+ }
33
+
34
+ chi2_1 <- function(set,taxa_number){###
35
+ all_amino <- unique(strsplit(as.character(str_replace_all(set,"-","")),"")[[1]])##除去补空位的,在该位置的所有氨基酸
36
+ return(sum(sapply(1:length(all_amino),chi2_2,taxa_number=taxa_number,set=set,all_amino=all_amino)))###对于其中的每一种氨基酸
37
+ }
38
+
39
+ end_pos_1 <- x-1
40
+ start_pos_2 <- x+1
41
+ if (end_pos_1>=1 & start_pos_2<=width(protein)[1]){
42
+ trimmed_protein <- str_c(substr(protein,1,end_pos_1),substr(protein,start_pos_2,width(protein)[1]))###str_c元素对元素的合并列表
43
+ print(trimmed_protein)
44
+ }else if(start_pos_2<=width(protein)[1]){
45
+ trimmed_protein <- substr(protein,start_pos_2,width(protein)[1])#从start到全长
46
+ }else {
47
+ trimmed_protein <- substr(protein,1,end_pos_1)#从起始到end
48
+ }#到此截取了想要的氨基酸(所有序列的)
49
+ return(sum(sapply(1:length(trimmed_protein),chi2_1,set=trimmed_protein)))###对于每一个物种
50
+ }
51
+
52
+
53
+ library(parallel)
54
+
55
+ cl<-makeCluster(48)
56
+
57
+ trimmed_chi2_set <- parLapply(cl,1:width(protein)[1],trimmed_chi2)
58
+
59
+ stopCluster(cl)
60
+
61
+ save.image("/lomi_home/wenxiu/RIS_virus/workdir/27_ortho_RIStree/ortho_tree/v1/tree_workdir/test_removed_bias.aln")
62
+
63
+
64
+ ###删除对应的位点
65
+ trim_site<-function(percentage){
66
+ protein1<-protein
67
+ realnum=c()
68
+ for (i in 1:length(trimmed_chi2_set)) {
69
+ realnum= c(realnum,trimmed_chi2_set[[i]])
70
+ }
71
+ #realnum=abs(realnum-untrim)
72
+ print(percentage*length(realnum))
73
+ kafang<-realnum[order(realnum,decreasing = F)[round(percentage*length(realnum))]]
74
+ site_to_be_removed=which(realnum<=kafang)
75
+ site<-IRanges(start=site_to_be_removed, end=site_to_be_removed, width=1)
76
+ protein
77
+ replaceAt(protein1,site,'')
78
+ protein1<-replaceAt(protein1,site,'')
79
+ substr(protein1[1],start=1,stop=width(protein1)[1])
80
+ #10:5984
81
+ #print(paste0('~/removed_bias_dataset3_concatenate_',percentage,'.txt'))
82
+ writeXStringSet(protein1,paste0('/lomi_home/wenxiu/RIS_virus/workdir/27_ortho_RIStree/ortho_tree/v1/tree_workdir/test_removed_bias.trimmed.concat_',percentage*100,'%.fas'))
83
+ }
84
+
85
+
86
+ for (cutoff in c(0.05,0.1,0.2,0.3,0.4,0.6,0.8,0.9)) {
87
+ trim_site(cutoff)
88
+ }
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+ ###先分成两份,再取5%
98
+ # protein1<-protein
99
+ # realnum=c()
100
+ # for (i in 1:length(trimmed_chi2_set)) {
101
+ # realnum= c(realnum,trimmed_chi2_set[[i]])
102
+ # }
103
+ # realnumplus<-realnum[which(realnum-untrim>0)]
104
+ # realnummiuis<-realnum[which(realnum-untrim<0)]
105
+ # plus<-realnumplus[order(realnumplus,decreasing = T)[1:(0.1*length(realnumplus)+1)]]
106
+ # miuis<-realnummiuis[order(realnummiuis,decreasing = F)[1:(0.1*length(realnummiuis)+1)]]
107
+ # site_to_be_removed=which(realnum %in% c(plus,miuis))
108
+ # site<-IRanges(start=site_to_be_removed, end=site_to_be_removed, width=1)
109
+ # protein
110
+ # replaceAt(protein1,site,'')
111
+ # protein1<-replaceAt(protein1,site,'')
112
+ # substr(protein1[1],start=1,stop=5984)
@@ -0,0 +1,78 @@
1
+ import os
2
+ import argparse
3
+ from ete3 import Tree
4
+
5
+
6
+ rename_leaves_usage = '''
7
+ ==================== rename_leaves example commands ====================
8
+
9
+ TreeSAK rename_leaves -i input.tree -r rename.txt -o output.tree
10
+
11
+ # format of rename.txt (tab separated)
12
+ leaf_1 leaf_1_new_name
13
+ leaf_2 leaf_2_new_name
14
+
15
+ ========================================================================
16
+ '''
17
+
18
+
19
+ def rename_leaves(args):
20
+
21
+ tree_file_in = args['i']
22
+ rename_file = args['r']
23
+ tree_format = args['f']
24
+ tree_file_out = args['o']
25
+
26
+ if os.path.isfile(tree_file_in) is False:
27
+ print('Tree file not found, program exited!')
28
+ exit()
29
+
30
+ if os.path.isfile(rename_file) is False:
31
+ print('Rename file not found, program exited!')
32
+ exit()
33
+
34
+ mag_rename_dict = {}
35
+ for each_mag in open(rename_file):
36
+ each_mag_split = each_mag.strip().split('\t')
37
+ before_rename = each_mag_split[0]
38
+ after_rename = each_mag_split[1]
39
+ mag_rename_dict[before_rename] = after_rename
40
+
41
+ t = Tree(tree_file_in, format=tree_format)
42
+
43
+ input_tree_leaf_name_list = []
44
+ for leaf in t:
45
+ input_tree_leaf_name_list.append(leaf.name)
46
+
47
+ leaves_with_new_name = 0
48
+ leaves_without_new_name = 0
49
+ for each_raw_name in input_tree_leaf_name_list:
50
+ if each_raw_name in mag_rename_dict:
51
+ leaves_with_new_name += 1
52
+ else:
53
+ leaves_without_new_name += 1
54
+
55
+ if leaves_with_new_name == 0:
56
+ print('No leaf on input tree found in rename file, please double check!')
57
+ exit()
58
+ elif leaves_without_new_name > 0:
59
+ print('%s of the %s leaves in input tree were found in the rename file, the rests unchanged.' % (leaves_with_new_name, len(input_tree_leaf_name_list)))
60
+
61
+ for leaf in t:
62
+ leaf_name_new = mag_rename_dict.get(leaf.name, leaf.name)
63
+ leaf.name = leaf_name_new
64
+ t.write(format=tree_format, outfile=tree_file_out)
65
+
66
+ print('Done!')
67
+
68
+
69
+ if __name__ == '__main__':
70
+
71
+ rename_leaves_parser = argparse.ArgumentParser()
72
+ rename_leaves_parser.add_argument('-i', required=True, help='input tree')
73
+ rename_leaves_parser.add_argument('-r', required=True, help='rename file')
74
+ rename_leaves_parser.add_argument('-f', required=False, default=1, type=int, help='tree format, default: 1')
75
+ rename_leaves_parser.add_argument('-o', required=True, help='output tree')
76
+
77
+ args = vars(rename_leaves_parser.parse_args())
78
+ rename_leaves(args)
@@ -0,0 +1,55 @@
1
+ import argparse
2
+ from ete3 import Tree
3
+
4
+
5
+ replace_clade_usage = '''
6
+ ===================== replace_clade example commands =====================
7
+
8
+ TreeSAK replace_clade -m main.tree -s sub.tree -l leaves.txt -o out.tree
9
+
10
+ ==========================================================================
11
+ '''
12
+
13
+
14
+ def replace_clade(args):
15
+
16
+ main_tree_file = args['m']
17
+ sub_tree_file = args['s']
18
+ leaf_txt = args['l']
19
+ tree_out = args['o']
20
+ tree_out_fmt = args['of']
21
+
22
+ # read in subtree
23
+ sub_tre = Tree(sub_tree_file, quoted_node_names=True, format=1)
24
+
25
+ # read in leaves
26
+ leaf_list = []
27
+ for each_leaf in open(leaf_txt):
28
+ leaf_list.append(each_leaf.strip())
29
+
30
+ # read in main tree
31
+ main_tre = Tree(main_tree_file, quoted_node_names=True, format=1)
32
+
33
+ # remove clades
34
+ lca = main_tre.get_common_ancestor(leaf_list)
35
+
36
+ lca_p = lca.up
37
+ lca_p.remove_child(lca)
38
+ lca_p.add_child(sub_tre)
39
+
40
+ # write out updated tree
41
+ main_tre.write(outfile=tree_out, format=tree_out_fmt)
42
+
43
+
44
+
45
+
46
+ if __name__ == '__main__':
47
+
48
+ replace_clade_parser = argparse.ArgumentParser()
49
+ replace_clade_parser.add_argument('-m', required=True, help='main tree file')
50
+ replace_clade_parser.add_argument('-s', required=True, help='subtree file')
51
+ replace_clade_parser.add_argument('-l', required=True, help='leaves on main tree to be replaced')
52
+ replace_clade_parser.add_argument('-o', required=True, help='output tree')
53
+ replace_clade_parser.add_argument('-of', required=False, default=9, type=int, help='output tree format, default is 9')
54
+ args = vars(replace_clade_parser.parse_args())
55
+ replace_clade(args)
@@ -0,0 +1,84 @@
1
+ from ete3 import Tree
2
+
3
+
4
+ def root_with_out_group(tree_file, out_group_txt, tree_file_rooted):
5
+
6
+ out_group_set = set()
7
+ for each_og in open(out_group_txt):
8
+ out_group_set.add(each_og.strip())
9
+
10
+ tre = Tree(tree_file, format=1)
11
+ out_group_lca = tre.get_common_ancestor(out_group_set)
12
+ tre.set_outgroup(out_group_lca)
13
+ tre.write(outfile=tree_file_rooted)
14
+
15
+
16
+ def replace_clades(main_tree, sub_tree, tree_out):
17
+
18
+ # read in sub tree
19
+ tre_sub = Tree(sub_tree, format=1)
20
+
21
+ # get all leaves in sub tree
22
+ subtree_leaf_name_list = tre_sub.get_leaf_names()
23
+
24
+ # read in main tree
25
+ tre_main = Tree(main_tree)
26
+
27
+ # remove clades
28
+ lca = tre_main.get_common_ancestor(subtree_leaf_name_list)
29
+
30
+ if len(lca.get_leaf_names()) != len(subtree_leaf_name_list):
31
+ print('LCA of subtree leaves in main tree contain extra leaves, program exited!')
32
+ exit()
33
+
34
+ lca_p = lca.up
35
+ lca_p.remove_child(lca)
36
+ lca_p.add_child(tre_sub)
37
+
38
+ # write out updated tree
39
+ tre_main.write(outfile=tree_out, format=8)
40
+
41
+
42
+ tree_file = '/Users/songweizhi/Desktop/777/PA_75_DeltaLL_50_raw.treefile'
43
+ out_group_txt = '/Users/songweizhi/Desktop/777/out_group.txt'
44
+ tree_file_rooted = '/Users/songweizhi/Desktop/777/PA_75_DeltaLL_50_rooted.treefile'
45
+ eu_tree = '/Users/songweizhi/Desktop/777/27.nwk'
46
+ rooted_tree_with_time_constraints = '/Users/songweizhi/Desktop/777/PA_75_DeltaLL_50_rooted_with_time_constraints.treefile'
47
+
48
+
49
+ root_with_out_group(tree_file, out_group_txt, tree_file_rooted)
50
+ replace_clades(tree_file_rooted, eu_tree, rooted_tree_with_time_constraints)
51
+
52
+
53
+ root_with_out_group_from_tianhua = '''
54
+ from ete3 import Tree
55
+ tpath = ''
56
+ nodes = ['F7','B7_3','A7']
57
+ tre = Tree(tpath)
58
+ tre2 = tre.copy()
59
+ lca = tre.get_common_ancestor(nodes)
60
+ lca_leaves = lca.get_leaf_names()
61
+ # intersect = set(lca_leaves).intersection(set(nodes))
62
+ ratio = len(nodes)/len(lca_leaves)
63
+ if ratio > 0.5:
64
+ tre2.set_outgroup(lca) # inplace
65
+ tre2.write(outfile='')
66
+ else:
67
+ print(ratio)
68
+ '''
69
+
70
+
71
+ replace_clade_from_tianhua ='''
72
+ ## replace clade with a new tree
73
+ tre2 = Tree(tpath2,format=3)
74
+ nodes1 = []
75
+ tre = Tree(tpath)
76
+ lca = tre.get_common_ancestor(nodes1)
77
+ lca_p = lca.up
78
+ lca_p.remove_child(lca)
79
+ if len(tre2.children)==1:
80
+ lca_p.add_child(tre2.children[0])
81
+ else:
82
+ lca_p.add_child(tre2)
83
+ tre.write(outfile='',format='')
84
+ '''