sh GH phylo analisis
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了sh GH phylo analisis相关的知识,希望对你有一定的参考价值。
library(phytools)
library(ape)
library(nLTT)
library(tidyverse)
gh117_tree <- midpoint.root(read.tree(file = "~/Downloads/Tao/RAxML_bipartitions.GH117"))
gh50_tree <- midpoint.root(read.tree(file = "~/Downloads/Tao/RAxML_bipartitions.GH50"))
gh16_tree <- midpoint.root(read.tree(file = "~/Downloads/Tao/RAxML_bipartitions.GH16"))
gh86_tree <- midpoint.root(read.tree(file = "~/Downloads/Tao/RAxML_bipartitions.GH86"))
gh2_tree <- midpoint.root(read.tree(file = "~/Downloads/Tao/RAxML_bipartitions.GH2"))
gh117_tree_um <- chronos(gh117_tree, lambda = 0, model = 'relaxed', calibration = makeChronosCalib(gh117_tree, node = 'root', age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE))
gh50_tree_um <- chronos(gh50_tree, lambda = 0, model = 'relaxed', calibration = makeChronosCalib(gh50_tree, node = 'root', age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE))
gh16_tree_um <- chronos(gh16_tree, lambda = 0, model = 'relaxed', calibration = makeChronosCalib(gh16_tree, node = 'root', age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE))
gh86_tree_um <- chronos(gh86_tree, lambda = 0, model = 'relaxed', calibration = makeChronosCalib(gh86_tree, node = 'root', age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE))
gh2_tree_um <- chronos(gh2_tree, lambda = 0, model = 'relaxed', calibration = makeChronosCalib(gh2_tree, node = 'root', age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE))
colors <- RColorBrewer::brewer.pal(6, "Set1")
gh16_cd <- gh16_tree$tip.label %>%
tbl_df() %>%
separate(value, into = "tax", remove = FALSE, sep = "_N", extra = "drop") %>%
mutate(color = case_when(tax == "Actinobacteria" ~ colors[[1]],
tax == "Bacteroidetes/Chlorobi_group" ~ colors[[2]],
tax == "Firmicutes" ~ colors[[3]],
tax == "Gammaproteobacteria" ~ colors[[4]],
tax == "Planctomycetes" ~ colors[[5]],
tax == "Alphaproteobacteria" ~ colors[[6]])) #%>%
#select(value, color, tax) %>% write_tsv("~/Downloads/Tao/GH16_itol.tsv", col_names = FALSE)
write.tree(gh16_tree, "~/Downloads/Tao/RAxML_bipartitions_rooted.GH16")
write.tree(gh16_tree_um, "~/Downloads/Tao/RAxML_bipartitions_rooted_um.GH16")
gh86_cd <- gh86_tree$tip.label %>%
tbl_df() %>%
separate(value, into = "tax", remove = FALSE, sep = "_N", extra = "drop") %>%
mutate(color = case_when(tax == "Actinobacteria" ~ colors[[1]],
tax == "Bacteroidetes/Chlorobi_group" ~ colors[[2]],
tax == "Firmicutes" ~ colors[[3]],
tax == "Gammaproteobacteria" ~ colors[[4]],
tax == "Planctomycetes" ~ colors[[5]],
tax == "Alphaproteobacteria" ~ colors[[6]])) #%>%
#select(value, color, tax) %>% write_tsv("~/Downloads/Tao/GH86_itol.tsv", col_names = FALSE)
write.tree(gh86_tree, "~/Downloads/Tao/RAxML_bipartitions_rooted.GH86")
write.tree(gh86_tree_um, "~/Downloads/Tao/RAxML_bipartitions_rooted_um.GH86")
gh2_cd <- gh2_tree$tip.label %>%
tbl_df() %>%
separate(value, into = "tax", remove = FALSE, sep = "_N", extra = "drop") %>%
mutate(color = case_when(tax == "Actinobacteria" ~ colors[[1]],
tax == "Bacteroidetes/Chlorobi_group" ~ colors[[2]],
tax == "Firmicutes" ~ colors[[3]],
tax == "Gammaproteobacteria" ~ colors[[4]],
tax == "Planctomycetes" ~ colors[[5]],
tax == "Alphaproteobacteria" ~ colors[[6]],
tax == "pdb_5T98_A_Chain_A_1_Glycoside_Hydrolase" ~ "#000000",
tax == "5T9A_A_PDBID_CHAIN_SEQUENCE" ~ "#000000")) #%>%
#select(value, color, tax) %>% write_tsv("~/Downloads/Tao/GH2_itol.tsv", col_names = FALSE)
write.tree(gh2_tree, "~/Downloads/Tao/RAxML_bipartitions_rooted.GH2")
write.tree(gh2_tree_um, "~/Downloads/Tao/RAxML_bipartitions_rooted_um.GH2")
gh50_cd <- gh50_tree$tip.label %>%
tbl_df() %>%
separate(value, into = "tax", remove = FALSE, sep = "_N", extra = "drop") %>%
mutate(color = case_when(tax == "Actinobacteria" ~ colors[[1]],
tax == "Bacteroidetes/Chlorobi_group" ~ colors[[2]],
tax == "Firmicutes" ~ colors[[3]],
tax == "Gammaproteobacteria" ~ colors[[4]],
tax == "Planctomycetes" ~ colors[[5]],
tax == "Alphaproteobacteria" ~ colors[[6]])) #%>%
#select(value, color, tax) %>% write_tsv("~/Downloads/Tao/GH50_itol.tsv", col_names = FALSE)
write.tree(gh50_tree, "~/Downloads/Tao/RAxML_bipartitions_rooted.GH50")
write.tree(gh50_tree_um, "~/Downloads/Tao/RAxML_bipartitions_rooted_um.GH50")
gh117_cd <- gh117_tree$tip.label %>%
tbl_df() %>%
separate(value, into = "tax", remove = FALSE, sep = "_N", extra = "drop") %>%
mutate(color = case_when(tax == "Actinobacteria" ~ colors[[1]],
tax == "Bacteroidetes_Chlorobi_group" ~ colors[[2]],
tax == "Firmicutes" ~ colors[[3]],
tax == "Gammaproteobacteria" ~ colors[[4]],
tax == "Planctomycetes" ~ colors[[5]],
tax == "Alphaproteobacteria" ~ colors[[6]]))
#select(value, color, tax) %>% write_tsv("~/Downloads/Tao/GH117_itol.tsv", col_names = FALSE)
write.tree(gh117_tree, "~/Downloads/Tao/RAxML_bipartitions_rooted.GH117", )
write.tree(gh117_tree_um, "~/Downloads/Tao/RAxML_bipartitions_rooted_um.GH117")
pb_parms <- function(X, n){
h<-max(nodeHeights(X))
x<-seq(0,h,by=h/100)
b<-(log(ape::Ntip(X))-log(2))/h
trees<-pbtree(b=b, n=ape::Ntip(X), t=h, nsim=n, method="direct", quiet=TRUE)
g<-sapply(trees,function(x) ltt(x,plot=FALSE)$gamma)
ltt <- ltt(trees, plot=FALSE)
list(h = h, x = x,b = b, trees = trees, tree = X, ltt = ltt, g = g)
}
library(ggtree)
gh117_pb_p <- pb_parms(gh117_tree_um, 100)
gh16_pb_p <- pb_parms(gh16_tree_um, 100)
gh86_pb_p <- pb_parms(gh86_tree_um, 100)
gh2_pb_p <- pb_parms(gh2_tree_um, 100)
gh50_pb_p <- pb_parms(gh50_tree_um, 100)
class(gh117_tree_um) <- "phylo"
class(gh50_tree_um) <- "phylo"
class(gh16_tree_um) <- "phylo"
class(gh86_tree_um) <- "phylo"
class(gh2_tree_um) <- "phylo"
plot_pb <- function(X, Y, Z){
X1 <- X
setNames(exp(1:Ntip(X1)), X1$tip.label)
X_ltt <- ltt(X, plot = FALSE)
X_ltt_df <- data_frame(time = X_ltt$times, lineages = X_ltt$ltt)
j <- fortify(X) %>% tbl_df %>% mutate(y1 = log(y))
f <- max(j$y1)/Ntip(X)
j <- j %>% mutate(y = exp(y*f))
cls <- Y %>% select(tax, color) %>% unique()
cls_v <- cls$color
names(cls_v) <- cls$tax
Y_states <- Y$tax
names(Y_states) <- Y$value
fitER <- ace(Y_states, X, model="ER", type="discrete", marginal = TRUE)
fitER_l <- as_data_frame(fitER$lik.anc)
fitER_l$node <- 1:X$Nnode+Ntip(X)
n_col <- dim(fitER_l)[2] - 1
pies <- nodepie(fitER_l, cols=1:n_col, alpha=0.8, color = cls_v)
plot_label <- sprintf("bold(\"Pybus\" ~ gamma == %0.4f)", round(X_ltt$gamma, 4))
g1 <- ggtree(j,ladderize=T) %<+% Y +
geom_tippoint(aes(x=x+0.005, color = tax, fill = tax), shape = 22) +
geom_step(data = X_ltt_df, aes(time, lineages), color = "red", size = 1) +
theme_bw() +
coord_cartesian() +
scale_y_log10() +
xlab("Relative time") +
ylab("Lineages (log10)") +
theme(legend.position = "top",
legend.title = element_blank()) +
annotate("text", label = plot_label, x = 0.05 , y = max(X_ltt_df$lineages), parse = TRUE) +
scale_fill_manual(values = cls_v) +
scale_color_manual(values = cls_v)
l <- bind_rows(lapply(1:100, function (x, y) {data_frame(time = y$ltt[[x]]$times, lineages = y$ltt[[x]]$ltt, rep = x)}, y = Z))
g2 <- ggplot(l, aes(time, lineages)) +
geom_step(aes(group = rep), alpha = 0.7, color = "grey60") +
geom_step(data = X_ltt_df, aes(time, lineages), size = 1) +
geom_line(data = data_frame(x = c(0,Z$h), y = c(2,ape::Ntip(Z$tree))), aes(x, y), size = 0.9, color = "red",linetype = 2) +
scale_y_log10() +
theme_bw() +
xlab("Relative time") +
ylab("Lineages (log10)") +
annotate("text", label = plot_label, x = 0.05 , y = max(X_ltt_df$lineages), parse = TRUE)
g3 <- ggtree(X,ladderize=T) %<+% Y +
geom_tippoint(aes(x=x+0.005, color = tax, fill = tax), shape = 22) +
#geom_step(data = X_ltt_df, aes(time, lineages), color = "red", size = 1) +
theme_bw() +
coord_cartesian() +
#scale_y_log10() +
xlab("Relative time") +
ylab("Lineages") +
theme(legend.position = "top",
legend.title = element_blank()) +
#annotate("text", label = plot_label, x = 0.05 , y = max(X_ltt_df$lineages), parse = TRUE) +
scale_fill_manual(values = cls_v) +
scale_color_manual(values = cls_v)
g3 <- inset(g3, pies)
list(ltt_tree = g1, pb_ltt = g2, ltt_asr = g3, asr = fitER_l)
}
gh117_plots <- plot_pb(gh117_tree_um, gh117_cd, gh117_pb_p)
gh16_plots <- plot_pb(gh16_tree_um, gh16_cd, gh16_pb_p)
gh86_plots <- plot_pb(gh86_tree_um, gh86_cd, gh86_pb_p)
gh50_plots <- plot_pb(gh50_tree_um, gh50_cd, gh50_pb_p)
gh2_plots <- plot_pb(X = gh2_tree_um, Y = gh2_cd, Z = gh2_pb_p)
phytools::ltt(gh117_tree_um, plot = FALSE)$gamma
phytools::ltt(gh50_tree_um, plot = FALSE)$gamma
phytools::ltt(gh16_tree_um, plot = FALSE)$gamma
phytools::ltt(gh86_tree_um, plot = FALSE)$gamma
phytools::ltt(gh2_tree_um, plot = FALSE)$gamma
rand_trees <- function(X, tree){
n <- sample(seq(0,0.9,0.05),1)
ntips <- ape::Ntip(tree)
tipsToDrop <- sample(ntips,round(ntips*n), replace=FALSE)
prunedTree <- ape::drop.tip(tree,tipsToDrop,trim.internal=TRUE)
ltt <- ltt(prunedTree, plot=FALSE)
list(prunedTree = prunedTree, g = ltt$gamma, n = n)
}
gh117_rand_t <- lapply(1:1000, rand_trees, tree = gh117_pb_p$tree)
gh50_rand_t <- lapply(1:1000, rand_trees, tree = gh50_pb_p$tree)
gh16_rand_t <- lapply(1:1000, rand_trees, tree = gh16_pb_p$tree)
gh86_rand_t <- lapply(1:1000, rand_trees, tree = gh86_pb_p$tree)
gh2_rand_t <- lapply(1:1000, rand_trees, tree = gh2_pb_p$tree)
comb <- bind_rows(purrr::map_dbl(gh117_rand_t, "g") %>%
tbl_df() %>%
mutate(class = "GH117"),
purrr::map_dbl(gh50_rand_t, "g") %>%
tbl_df() %>%
mutate(class = "GH50"),
purrr::map_dbl(gh16_rand_t, "g") %>%
tbl_df() %>%
mutate(class = "GH16"),
purrr::map_dbl(gh86_rand_t, "g") %>%
tbl_df() %>%
mutate(class = "GH86"),
purrr::map_dbl(gh2_rand_t, "g") %>%
tbl_df() %>%
mutate(class = "GH2"))
classes <- c("GH117", "GH50", "GH16", "GH86", "GH2")
compare_means(value ~ class, data = comb, p.adjust.method = "BH") %>% View
comparisons <- combn(classes, 2, simplify = FALSE)
comb$class <- factor(comb$class, levels = c("GH2", "GH16", "GH50", "GH86", "GH117"))
plot_label <- sprintf("bold(\"Pybus\" ~ gamma)")
ggplot(comb, aes(class, value)) +
geom_boxplot(fill = "grey90") +
#stat_compare_means(comparisons = comparisons, label = "p.signif", hide.ns = TRUE) +
xlab("") +
ylab(expression(paste("Pybus ", gamma))) +
theme_bw() +
theme(legend.position = "none")
#!/bin/bash
INFILE=${1}
NAM=$(basename ${INFILE} _alignment_filtered.fasta)
LOG=${NAM}.log
CDHIT_BIN=cd-hit
FAMSA_BIN=~/opt/FAMSA/famsa
ZORRO_BIN=~/zorro_linux_x86_64
RAXML_BIN=raxmlHPC-PTHREADS-AVX2
ODSEQ_BIN=~/opt/OD-Seq_mg/OD-seq
# Remove identical sequences
printf "Dereplicating sequences..."
${CDHIT_BIN} -i ${INFILE} -c 1 -d 0 -o ${NAM}_reduced &> ${LOG}
# Align sequence
printf "done\nAligning sequences..."
${FAMSA_BIN} ${NAM}_reduced ${NAM}_aln.fasta >> ${LOG} 2>&1
# Checking for outliers
printf "done\nChecking for outliers..."
${ODSEQ_BIN} -i ${NAM}_aln.fasta -c ${NAM}_core.fasta -o ${NAM}_outlier.fasta >> ${LOG} 2>&1
# REalign core sequences
printf "done\nAligning core sequences..."
${FAMSA_BIN} ${NAM}_core.fasta ${NAM}_core_aln.fasta >> ${LOG} 2>&1
# Mask alignment
printf "done\nMasking alignment..."
${ZORRO_BIN} ${NAM}_core_aln.fasta | awk '{print int($1+0.5)}' > ${NAM}_mask
# Build phylogeny with RAxML and Zorro masking
printf "done\nInferring tree..."
${RAXML_BIN} -T 16 -a ${NAM}_mask -f a -m PROTGAMMAAUTO -s ${NAM}_core_aln.fasta -n ${NAM} -x 1234 -N autoMRE -p 1234 >> ${LOG} 2>&1
printf "done\n"
以上是关于sh GH phylo analisis的主要内容,如果未能解决你的问题,请参考以下文章