treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/PMSF.py ADDED
@@ -0,0 +1,115 @@
1
+ import os
2
+ import argparse
3
+ from distutils.spawn import find_executable
4
+
5
+
6
+ PMSF_usage = '''
7
+ ==================== PMSF example commands ====================
8
+
9
+ # Dependency: iqtree2
10
+
11
+ TreeSAK PMSF -i in.aln -o get_PMSF_tree_wd -t 12
12
+ TreeSAK PMSF -i in.aln -o get_PMSF_tree_wd -t 12 -topo topo.tree
13
+ TreeSAK PMSF -i in.aln -o get_PMSF_tree_wd -t 12 -m C60SR4 -mdef C60SR4.nex
14
+
15
+ # This is a wrapper for:
16
+ iqtree2 -T 12 -B 1000 --alrt 1000 --quiet --seqtype AA -s in.aln --prefix guide_tree -m LG+F+G
17
+ iqtree2 -T 12 -B 1000 --alrt 1000 --quiet --seqtype AA -s in.aln --prefix PMSF -m LG+C60+F+G -ft guide_tree.treefile
18
+
19
+ # more information: http://www.iqtree.org/doc/Complex-Models
20
+
21
+ # Reference: The evolutionary origin of host association in the Rickettsiales
22
+ Maximum likelihood phylogenetic reconstructions were done under the PMSF approximation
23
+ (with 100 non-parametric bootstraps; guidetree under LG+G+F) of the LG+C60+F+Γ4 model
24
+ (selected by ModelFinder) for both supermatrix alignments with IQTREE v1.6.5.
25
+
26
+ ===============================================================
27
+ '''
28
+
29
+
30
+ def PMSF(args):
31
+
32
+ msa_in = args['i']
33
+ iqtree_model_guide_tree = args['gm']
34
+ iqtree_model = args['m']
35
+ mdef_nex = args['mdef']
36
+ op_dir = args['o']
37
+ tree_prefix = args['p']
38
+ force_overwrite = args['f']
39
+ num_of_threads = args['t']
40
+ topo_constraint_txt = args['topo']
41
+
42
+ guide_tree_wd = '%s/guide_tree' % op_dir
43
+ pwd_guide_tree = '%s/guide_tree.treefile' % guide_tree_wd
44
+ pwd_cmd_txt = '%s/cmds.txt' % op_dir
45
+
46
+ iqtree_exe = ''
47
+ if find_executable('iqtree2'):
48
+ iqtree_exe = 'iqtree2'
49
+ elif find_executable('iqtree'):
50
+ iqtree_exe = 'iqtree'
51
+ else:
52
+ print('iqtree not detected, program exited!')
53
+ exit()
54
+
55
+ # check input file
56
+ if os.path.isfile(msa_in) is False:
57
+ print('MSA file not found, program exited!')
58
+ exit()
59
+
60
+ # create output dir
61
+ if os.path.isdir(op_dir) is True:
62
+ if force_overwrite is True:
63
+ os.system('rm -r %s' % op_dir)
64
+ else:
65
+ print('output folder already exist, program exited!')
66
+ exit()
67
+ os.system('mkdir %s' % op_dir)
68
+ os.system('mkdir %s' % guide_tree_wd)
69
+
70
+ guidetree_cmd = '%s -s %s --prefix %s/guide_tree --seqtype AA -m %s -T %s -B 1000 --alrt 1000 --quiet' % (iqtree_exe, msa_in, guide_tree_wd, iqtree_model_guide_tree, num_of_threads)
71
+ iqtree_cmd = '%s -s %s --prefix %s/%s --seqtype AA -m %s -T %s -B 1000 --alrt 1000 --quiet -ft %s' % (iqtree_exe, msa_in, op_dir, tree_prefix, iqtree_model, num_of_threads, pwd_guide_tree)
72
+ if topo_constraint_txt is not None:
73
+ guidetree_cmd = '%s -s %s --prefix %s/guide_tree --seqtype AA -m %s -T %s -B 1000 --alrt 1000 --quiet -g %s' % (iqtree_exe, msa_in, guide_tree_wd, iqtree_model_guide_tree, num_of_threads, topo_constraint_txt)
74
+ iqtree_cmd = '%s -s %s --prefix %s/%s --seqtype AA -m %s -T %s -B 1000 --alrt 1000 --quiet -ft %s -g %s' % (iqtree_exe, msa_in, op_dir, tree_prefix, iqtree_model, num_of_threads, pwd_guide_tree, topo_constraint_txt)
75
+
76
+ if mdef_nex is not None:
77
+ iqtree_cmd = '%s --mdef %s' % (iqtree_cmd, mdef_nex)
78
+
79
+ if iqtree_model == 'C60SR4':
80
+ iqtree_cmd = iqtree_cmd.replace('--seqtype AA ', '')
81
+
82
+ # write out commands
83
+ pwd_cmd_txt_handle = open(pwd_cmd_txt, 'w')
84
+ pwd_cmd_txt_handle.write(guidetree_cmd + '\n')
85
+ pwd_cmd_txt_handle.write(iqtree_cmd + '\n')
86
+ pwd_cmd_txt_handle.close()
87
+
88
+ # get guide tree
89
+ print('Building guide tree')
90
+ print(guidetree_cmd)
91
+ os.system(guidetree_cmd)
92
+
93
+ # get PMSF tree
94
+ print('Building PMSF tree with model %s' % iqtree_model)
95
+ print(iqtree_cmd)
96
+ os.system(iqtree_cmd)
97
+
98
+ print('Done!')
99
+
100
+
101
+ if __name__ == '__main__':
102
+
103
+ # initialize the options parser
104
+ PMSF_parser = argparse.ArgumentParser()
105
+ PMSF_parser.add_argument('-i', required=True, help='input MSA file')
106
+ PMSF_parser.add_argument('-gm', required=False, default='LG+F+G', help='iqtree model for guide tree, default: LG+F+G')
107
+ PMSF_parser.add_argument('-m', required=False, default='LG+C60+F+G', help='iqtree model, default: LG+C60+F+G')
108
+ PMSF_parser.add_argument('-mdef', required=False, default=None, help='model definition NEXUS file')
109
+ PMSF_parser.add_argument('-o', required=True, help='output plot')
110
+ PMSF_parser.add_argument('-p', required=False, default='PMSF', help='tree prefix, default: PMSF')
111
+ PMSF_parser.add_argument('-topo', required=False, default=None, help='topological constraint tree, pass to -g, default is None')
112
+ PMSF_parser.add_argument('-t', required=False, type=int, default=1, help='num of threads')
113
+ PMSF_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
114
+ args = vars(PMSF_parser.parse_args())
115
+ PMSF(args)
TreeSAK/PhyloBiAssoc.R ADDED
@@ -0,0 +1,84 @@
1
+ suppressMessages(suppressWarnings(library("ape")))
2
+ suppressMessages(suppressWarnings(library("phytools")))
3
+ suppressMessages(suppressWarnings(library("optparse")))
4
+
5
+ ################################################################################
6
+
7
+ option_list = list(
8
+ make_option(c("-t", "--treefile"), type="character", default=NULL, help="tree file"),
9
+ make_option(c("-d", "--datafile"), type="character", default=NULL, help="data file"));
10
+ opt_parser = OptionParser(option_list=option_list);
11
+ opt = parse_args(opt_parser);
12
+
13
+ tree_file = opt$treefile
14
+ data_file = opt$datafile
15
+
16
+ # Rscript PhyloBiAssoc.R -t demo.tre -d demo.txt
17
+ # phylosig 7.973475e-26 binaryPGLMM 0.03255813
18
+ # The header of the first two columns has to be "ID" and "cate".
19
+
20
+ ################################################################################
21
+
22
+ geotree <- read.tree(tree_file)
23
+ geodata <- read.table(data_file, header = TRUE, sep = "\t")
24
+
25
+ # sort rows in df according to the order of tips in the tree
26
+ row.names(geodata) <- geodata[,1]
27
+ row.names(geodata) <- geodata$ID
28
+ geodata <- geodata[geotree$tip.label,]
29
+ colnames_all = colnames(geodata[, ,])
30
+ colnames_to_process = tail(colnames_all, -2)
31
+
32
+ cat('ID', "phylosig", "binaryPGLMM", "chisq.test", "coefficient", "significance", '\n', fill=FALSE, sep = "\t")
33
+
34
+ for (i in colnames_to_process){
35
+
36
+ # perform phylosig test
37
+ phylosig_test <- phylosig(tree = geotree, x = setNames(geodata[, i], geodata$ID), method = "lambda", test = TRUE)
38
+ phylosig_test_pvalue = phylosig_test$P
39
+
40
+ # perform binaryPGLMM test if phylosig P-value <= 0.05 (indicating significant phylogenetic signal)
41
+ # perform chi-squared test if phylosig P-value > 0.05 (indicating no phylogenetic signal)
42
+ # do nothing if phylosig returns NaN
43
+
44
+ association_test = ''
45
+ association_p_value = NA
46
+ do_nothing = FALSE
47
+ association_coefficient = 'na'
48
+ significant = 'n'
49
+ if (phylosig_test_pvalue == 'NaN') {
50
+ do_nothing = TRUE
51
+ significant = 'na'
52
+ } else if (phylosig_test_pvalue <= 0.05) {
53
+ binaryPGLMM_result <- binaryPGLMM(setNames(geodata[, i], geodata$ID) ~ geodata$cate, phy = geotree)
54
+ association_test = 'binaryPGLMM'
55
+ association_coefficient = binaryPGLMM_result$B[1]
56
+ association_p_value = binaryPGLMM_result$B.pvalue[2]
57
+ if (association_p_value <= 0.05) {
58
+ significant = 'y'
59
+ }
60
+
61
+ } else {
62
+ chisq_test <- chisq.test(table(geodata$cate, setNames(geodata[, i], geodata$ID)))
63
+ association_test = 'chisq.test'
64
+ association_p_value = chisq_test$p.value
65
+
66
+ #cor_test <- cor.test(geodata$cate, geodata[, i])
67
+ #association_coefficient = cor_test$estimate
68
+
69
+ if (association_p_value <= 0.05) {
70
+ significant = 'y'
71
+ }
72
+ }
73
+
74
+ # print to screen
75
+ if (do_nothing == FALSE) {
76
+
77
+ if (association_test == 'binaryPGLMM'){
78
+ cat(i, phylosig_test_pvalue, association_p_value, 'na', association_coefficient, significant, '\n', fill=FALSE, sep = "\t")
79
+ }
80
+ if (association_test == 'chisq.test'){
81
+ cat(i, phylosig_test_pvalue, 'na', association_p_value, association_coefficient, significant, '\n', fill=FALSE, sep = "\t")
82
+ }
83
+ }
84
+ }
@@ -0,0 +1,167 @@
1
+ import os
2
+ import argparse
3
+ import pandas as pd
4
+ import multiprocessing as mp
5
+ from statsmodels.stats.multitest import multipletests
6
+
7
+
8
+ PhyloBiAssoc_usage = '''
9
+ ============================= PhyloBiAssoc example commands =============================
10
+
11
+ BioSAK PhyloBiAssoc -i demo.tre -d demo.txt -o op_dir -t 10 -f
12
+
13
+ # Note, header for the first two columns in -d has to be "ID" and "cate"!!!
14
+
15
+ # It will perform:
16
+ # 1) binaryPGLMM test if phylosig p-value <= 0.05 (significant phylogenetic signal)
17
+ # 2) chi-squared test if phylosig p-value > 0.05 (no phylogenetic signal)
18
+ # 3) do nothing if phylosig returns NaN (might due to the same value across all genomes)
19
+
20
+ # https://www.rdocumentation.org/packages/ape/versions/5.7-1/topics/binaryPGLMM
21
+
22
+ =========================================================================================
23
+ '''
24
+
25
+
26
+ def sep_path_basename_ext(file_in):
27
+
28
+ f_path, file_name = os.path.split(file_in)
29
+ if f_path == '':
30
+ f_path = '.'
31
+
32
+ f_base, f_ext = os.path.splitext(file_name)
33
+ return f_path, f_base, f_ext
34
+
35
+
36
+ def subset_df(file_in, rows_to_keep, cols_to_keep, sep_symbol, row_name_pos, column_name_pos, file_out):
37
+
38
+ df = pd.read_csv(file_in, sep=sep_symbol, header=column_name_pos, index_col=row_name_pos)
39
+
40
+ if len(rows_to_keep) == 0:
41
+ if len(cols_to_keep) == 0:
42
+ subset_df = df.loc[:, :]
43
+ else:
44
+ subset_df = df.loc[:, cols_to_keep]
45
+ else:
46
+ if len(cols_to_keep) == 0:
47
+ subset_df = df.loc[rows_to_keep, :]
48
+ else:
49
+ subset_df = df.loc[rows_to_keep, cols_to_keep]
50
+
51
+ subset_df.to_csv(file_out, sep=sep_symbol)
52
+
53
+
54
+ def PhyloBiAssoc(args):
55
+
56
+ tree_file = args['i']
57
+ data_file = args['d']
58
+ op_dir = args['o']
59
+ num_threads = args['t']
60
+ force_create_op_dir = args['f']
61
+
62
+ pwd_current_script = os.path.realpath(__file__)
63
+ current_script_path = '/'.join(pwd_current_script.split('/')[:-1])
64
+ PhyloBiAssoc_R = '%s/PhyloBiAssoc.R' % current_script_path
65
+
66
+ cmd_txt = '%s/cmds.txt' % op_dir
67
+ df_subset_dir = '%s/df_subset' % op_dir
68
+ stats_op_dir = '%s/stats_results' % op_dir
69
+ combined_stats_txt = '%s/stats_results_all.txt' % op_dir
70
+ combined_stats_txt_sig = '%s/stats_results_0.05.txt' % op_dir
71
+ combined_stats_txt_adjusted = '%s/stats_results_0.05_adjusted.txt' % op_dir
72
+
73
+ # create op_dir
74
+ if os.path.isdir(op_dir) is True:
75
+ if force_create_op_dir is True:
76
+ os.system('rm -r %s' % op_dir)
77
+ else:
78
+ print('output directory exist, program exited!')
79
+ exit()
80
+ os.system('mkdir %s' % op_dir)
81
+ os.system('mkdir %s' % df_subset_dir)
82
+ os.system('mkdir %s' % stats_op_dir)
83
+
84
+ # read in dataframe
85
+ df = pd.read_csv(data_file, sep='\t', header=0, index_col=0)
86
+ col_header_list = list(df.columns.values)
87
+
88
+ subset_dict = dict()
89
+ for each_col in col_header_list[1:]:
90
+ subset_dict[each_col] = ['cate', each_col]
91
+
92
+ # subset dataframe
93
+ cmd_txt_handle = open(cmd_txt, 'w')
94
+ stats_cmd_list = []
95
+ op_stats_txt_set = set()
96
+ for each_subset in subset_dict:
97
+ cols_to_keep = subset_dict[each_subset]
98
+ df_subset_file = '%s/%s.tab' % (df_subset_dir, each_subset)
99
+ stats_out_txt = '%s/%s.txt' % (stats_op_dir, each_subset)
100
+ subset_df(data_file, set(), cols_to_keep, '\t', 0, 0, df_subset_file)
101
+ stats_cmd = 'Rscript %s -t %s -d %s > %s' % (PhyloBiAssoc_R, tree_file, df_subset_file, stats_out_txt)
102
+ cmd_txt_handle.write(stats_cmd + '\n')
103
+ stats_cmd_list.append(stats_cmd)
104
+ op_stats_txt_set.add(stats_out_txt)
105
+ cmd_txt_handle.close()
106
+
107
+ print('Processing %s objects with %s cores' % (len(stats_cmd_list), num_threads))
108
+ pool = mp.Pool(processes=num_threads)
109
+ pool.map(os.system, stats_cmd_list)
110
+ pool.close()
111
+ pool.join()
112
+ #os.system('cp /Users/songweizhi/Documents/Research/Sponge/12_PhyloBiAssoc_wd/PhyloBiAssoc_wd_backup/stats_results/*.txt %s/' % stats_op_dir)
113
+
114
+ # combine stats results
115
+ sig_list_id = []
116
+ sig_list_value = []
117
+
118
+ combined_stats_txt_handle = open(combined_stats_txt, 'w')
119
+ combined_stats_txt_handle.write('ID phylosig binaryPGLMM chisq.test coefficient significance\n')
120
+ combined_stats_txt_sig_handle = open(combined_stats_txt_sig, 'w')
121
+ for each_file in sorted(list(op_stats_txt_set)):
122
+ f_path, f_base, f_ext = sep_path_basename_ext(each_file)
123
+ for each_line in open(each_file):
124
+ if not each_line.startswith('ID\tphylosig\tbinaryPGLMM\tchisq.test'):
125
+ each_line_split = each_line.strip().split('\t')
126
+ significance = each_line_split[5]
127
+ combined_stats_txt_handle.write('%s\t%s\n' % (f_base, '\t'.join(each_line_split[1:])))
128
+ if significance == 'y':
129
+ combined_stats_txt_sig_handle.write(f_base + '\n')
130
+ sig_bi = each_line_split[2]
131
+ sig_chi = each_line_split[3]
132
+ current_sig = ''
133
+ if sig_bi == 'na':
134
+ current_sig = sig_chi
135
+ elif sig_chi == 'na':
136
+ current_sig = sig_bi
137
+ sig_list_id.append(f_base)
138
+ sig_list_value.append(float(current_sig))
139
+ combined_stats_txt_handle.close()
140
+ combined_stats_txt_sig_handle.close()
141
+
142
+ # perform Bonferroni correction
143
+ sig_list_value_adjusted = list(multipletests(sig_list_value, alpha=0.1, method='bonferroni')[1])
144
+
145
+ # write out adjusted p values
146
+ combined_stats_txt_adjusted_handle = open(combined_stats_txt_adjusted, 'w')
147
+ combined_stats_txt_adjusted_handle.write('ID\tadjusted_p_value\n')
148
+ for (id, adjusted_p) in zip(sig_list_id, sig_list_value_adjusted):
149
+ if adjusted_p <= 0.05:
150
+ combined_stats_txt_adjusted_handle.write('%s\t%s\n' % (id, adjusted_p))
151
+ combined_stats_txt_adjusted_handle.close()
152
+
153
+ # Final report
154
+ print('Results exported to: \n%s\n%s' % (combined_stats_txt, combined_stats_txt_adjusted))
155
+ print('Done')
156
+
157
+
158
+ if __name__ == "__main__":
159
+
160
+ PhyloBiAssoc_parser = argparse.ArgumentParser(usage=PhyloBiAssoc_usage)
161
+ PhyloBiAssoc_parser.add_argument('-i', required=True, help='tree file')
162
+ PhyloBiAssoc_parser.add_argument('-d', required=True, help='data file')
163
+ PhyloBiAssoc_parser.add_argument('-o', required=True, help='output directory')
164
+ PhyloBiAssoc_parser.add_argument('-t', required=False, type=int, default=1, help='number of threads, default: 1')
165
+ PhyloBiAssoc_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
166
+ args = vars(PhyloBiAssoc_parser.parse_args())
167
+ PhyloBiAssoc(args)
TreeSAK/PlotMCMC.py ADDED
@@ -0,0 +1,41 @@
1
+ import matplotlib as mpl
2
+ mpl.use('Agg')
3
+ import matplotlib.pyplot as plt
4
+
5
+
6
+ def PlotMCMC(num_list_x, num_list_y, err_range_x, err_range_y, pwd_figure):
7
+
8
+ x_err_l = []
9
+ x_err_r = []
10
+ y_err_l = []
11
+ y_err_u = []
12
+ n = 0
13
+ while n < len(num_list_x):
14
+ x_value = num_list_x[n]
15
+ y_value = num_list_y[n]
16
+ x_range = err_range_x[n]
17
+ y_range = err_range_y[n]
18
+ x_l_dist = abs(x_value - x_range[0])
19
+ x_r_dist = abs(x_range[1] - x_value)
20
+ y_l_dist = abs(y_value - y_range[0])
21
+ y_u_dist = abs(y_range[1] - y_value)
22
+ x_err_l.append(x_l_dist)
23
+ x_err_r.append(x_r_dist)
24
+ y_err_l.append(y_l_dist)
25
+ y_err_u.append(y_u_dist)
26
+ n += 1
27
+
28
+ plt.scatter(num_list_x, num_list_y, s=0)
29
+ plt.errorbar(num_list_x, num_list_y, xerr=[x_err_l, x_err_r], yerr=[y_err_l, y_err_u], ls='none', ecolor='black', elinewidth=1)
30
+ plt.tight_layout()
31
+ plt.savefig(pwd_figure)
32
+ plt.close()
33
+
34
+
35
+ num_list_x = [1, 2, 3, 4, 5]
36
+ num_list_y = [1, 2, 3, 4, 5]
37
+ err_range_x = [[0.8, 1.1], [1.5, 2.1], [2.9, 3.7], [3.2, 4.1], [4.5, 5.5]]
38
+ err_range_y = [[0.7, 1.3], [1.9, 2.5], [2.9, 3.7], [3.1, 4.4], [4.5, 5.5]]
39
+ pwd_figure = '/Users/songweizhi/Desktop/aaa.png'
40
+
41
+ PlotMCMC(num_list_x, num_list_y, err_range_x, err_range_y, pwd_figure)
@@ -0,0 +1,152 @@
1
+ import os
2
+ import glob
3
+ import argparse
4
+ import pandas as pd
5
+ import plotly.express as px
6
+
7
+
8
+ PlotMcmcNode_usage = '''
9
+ ========================== PlotMcmcNode example commands ==========================
10
+
11
+ TreeSAK PlotMcmcNode -i McmcTree_op_files -n topo123_nodes.txt -o topo123_age.pdf
12
+
13
+ # txt file format (-i, tab separated, no header)
14
+ # column 1: mcmc file
15
+ # column 2: node id
16
+ # column 3: mcmc file description
17
+ # column 4: node description
18
+
19
+ path/to/topo1_clock3_mcmc.txt t_n171 Topo1 Symbiosis_event_1
20
+ path/to/topo1_clock3_mcmc.txt t_n151 Topo1 Symbiosis_event_2
21
+ path/to/topo1_clock3_mcmc.txt t_n131 Topo1 Symbiosis_event_3
22
+ path/to/topo2_clock3_mcmc.txt t_n171 Topo2 Symbiosis_event_1
23
+ path/to/topo2_clock3_mcm.txtc t_n171 Topo3 Symbiosis_event_1
24
+
25
+ ===================================================================================
26
+ '''
27
+
28
+
29
+ def sep_path_basename_ext(file_in):
30
+
31
+ # separate path and file name
32
+ f_path, file_name = os.path.split(file_in)
33
+ if f_path == '':
34
+ f_path = '.'
35
+
36
+ # separate file basename and extension
37
+ f_base, f_ext = os.path.splitext(file_name)
38
+
39
+ return f_path, f_base, f_ext
40
+
41
+
42
+ def plot_distribution(df_txt, output_plot):
43
+
44
+ df = pd.read_table(df_txt, sep=',')
45
+ run_id_list = df['Setting'].unique()
46
+ node_id_list = df['Node'].unique()
47
+
48
+ # sort dataframe by run id
49
+ df = df.sort_values(by='Setting', ascending=False)
50
+
51
+ plot_width = 900
52
+ plot_height = len(run_id_list)*100
53
+ if plot_height < 360:
54
+ plot_height = 360
55
+
56
+ fig = px.violin(df, x="Value", y="Setting", color="Node", points=False, orientation="h", width=plot_width, height=plot_height).update_layout(xaxis_title="Age", yaxis_title="Settings")
57
+ if len(node_id_list) == 1:
58
+ fig.update_traces(side="positive", fillcolor='lightblue', width=1.6, opacity=0.75)
59
+ else:
60
+ fig.update_traces(side="positive", fillcolor='rgba(0,0,0,0)', width=1.6)
61
+
62
+ fig.update_traces(showlegend=True)
63
+ fig.layout.template = "simple_white"
64
+ # fig.layout.width = 700
65
+ # fig.layout.height = 750
66
+ # fig.update_xaxes(range=[40, 0])
67
+ # fig.update_layout(margin_t=10, title_text='Demo', title_x=0.5)
68
+ fig.write_image(output_plot)
69
+
70
+
71
+ def PlotMcmcNode(args):
72
+
73
+ node_txt = args['i']
74
+ output_plot = args['o']
75
+
76
+ if output_plot[-4:] not in ['.PDF', '.pdf']:
77
+ output_plot = output_plot + '.pdf'
78
+ op_df_tmp = output_plot + '.txt'
79
+
80
+ # read in txt file
81
+ file_to_node_dict = dict()
82
+ file_rename_dict = dict()
83
+ node_rename_dict = dict()
84
+ not_found_file_set = set()
85
+ for each_node in open(node_txt):
86
+ line_split = each_node.strip().split()
87
+ mcmc_file = line_split[0]
88
+ node_id = line_split[1]
89
+
90
+ if os.path.isfile(mcmc_file) is False:
91
+ not_found_file_set.add(mcmc_file)
92
+
93
+ _, mcmc_f_base, _ = sep_path_basename_ext(mcmc_file)
94
+ node_id_with_file_name = '%s_____%s' % (mcmc_file, node_id)
95
+
96
+ # get file_to_node_dict
97
+ if mcmc_file not in file_to_node_dict:
98
+ file_to_node_dict[mcmc_file] = set()
99
+ file_to_node_dict[mcmc_file].add(node_id)
100
+
101
+ # get file_rename_dict
102
+ if len(line_split) >= 3:
103
+ file_rename_on_plot = line_split[2]
104
+ file_rename_dict[mcmc_file] = file_rename_on_plot
105
+
106
+ # get node_rename_dict
107
+ if len(line_split) == 4:
108
+ node_rename_on_plot = line_split[3]
109
+ node_rename_dict[node_id_with_file_name] = node_rename_on_plot
110
+
111
+ if len(not_found_file_set) > 0:
112
+ print('The following files not found program exited!')
113
+ print('\n'.join(sorted(list(not_found_file_set))))
114
+ exit()
115
+
116
+ found_matched_node = False
117
+ op_df_tmp_handle = open(op_df_tmp, 'w')
118
+ op_df_tmp_handle.write('Value,Node,Setting\n')
119
+ for mcmc_file in file_to_node_dict:
120
+ current_node_set = file_to_node_dict.get(mcmc_file, set())
121
+ file_name_to_plot = file_rename_dict.get(mcmc_file, mcmc_file)
122
+ mcmc_df = pd.read_table(mcmc_file, index_col=0)
123
+ for each_col in mcmc_df:
124
+ if each_col in current_node_set:
125
+ node_desc_to_plot = node_rename_dict.get(('%s_____%s' % (mcmc_file, each_col)), each_col)
126
+ found_matched_node = True
127
+ value_list = mcmc_df[each_col].values
128
+ for each_value in value_list:
129
+ op_df_tmp_handle.write('%s,%s,%s\n' % (each_value, node_desc_to_plot, file_name_to_plot))
130
+ op_df_tmp_handle.close()
131
+
132
+ if found_matched_node is False:
133
+ print('Provided node(s) not found, program exited!')
134
+ exit()
135
+
136
+ # plot distribution
137
+ plot_distribution(op_df_tmp, output_plot)
138
+
139
+ # remove tmp files
140
+ os.system('rm %s' % op_df_tmp)
141
+
142
+ # final report
143
+ print('Plot exported to %s, done!' % output_plot)
144
+
145
+
146
+ if __name__ == '__main__':
147
+
148
+ PlotMcmcNode_parser = argparse.ArgumentParser()
149
+ PlotMcmcNode_parser.add_argument('-i', required=True, help='input txt file')
150
+ PlotMcmcNode_parser.add_argument('-o', required=True, help='output plot')
151
+ args = vars(PlotMcmcNode_parser.parse_args())
152
+ PlotMcmcNode(args)