r Rambo:Allelic Richness和Private等位基因通过多次随机减少
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了r Rambo:Allelic Richness和Private等位基因通过多次随机减少相关的知识,希望对你有一定的参考价值。
## ---------------------------------------------
##
## Rambo V0.9 : Allelic Richness and Private alleles by Multiple random reductions
## Jorge Assis (2016)
##
## ------------------------------------------------------------------------------------------------------------------
##
## Genetic diversity estimation per site or group of sites as standardised allelic richness, standardised number of private alleles and expected heterozygosity determined to the smallest site or group in terms of individuals under a set of randomizations.
## ------------------------------------------------------------------------------------------------------------------
##
## Citation:
## Assis, J., Coelho, N. C., Lamy, T., Valero, M., Alberto, F., & Serrão, E. A. (2016). Deep reefs are climatic refugia for genetic diversity of marine forests. Journal of Biogeography, (43), 833–844. https://doi.org/10.1111/jbi.12677
##
## ------------------------------------------------------------------------------------------------------------------
main.data.file = "Data/Genetics/Final Lo_Global_15L.gen"
missing.data = 0
ncode = 3L
replace = TRUE
resample.number.auto = FALSE
resample.number = 15 # 15 !!
discard.pops = NULL
number.iteractions = 999
alfa.test = 0.05
clustering.vector = 1:25
clustering.vector = c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2) # K= 3
clustering.vector = c(1,1,1,1,2,2,2,2,1,2,1,2,2,2,2,2,2,3,3,3,4,5,6,7) # K= 3 no 16 17
savefile = FALSE
save.filename = NULL
Rambo(main.data.file, missing.data, ncode, replace, resample.number.auto, resample.number,
discard.pops, number.iteractions, alfa.test, clustering.vector, savefile,
save.filename)
## ------------------------------------------------------------------------------------------------------------------
##
## ------------------------------------------------------------------------------------------------------------------
Rambo <- function (main.data.file, missing.data, ncode, replace, resample.number.auto, resample.number,
discard.pops, number.iteractions, alfa.test, clustering.vector, savefile,
save.filename) {
## -----------------------------------------------------
## Installing and Reading Dependences
substrRight <- function(x, n){
substr(x, nchar(x)-n+1, nchar(x))
}
## ----------------
installing <- FALSE
if( !"adegenet" %in% as.character(installed.packages()[,"Package"]) ) {
cat( "\r" , "\r" , " Installing Dependences...", "\r", "\r")
suppressMessages(suppressWarnings(
try(install.packages("adegenet" , verbose=FALSE , quiet = TRUE) , silent = TRUE) ))
installing <- TRUE
}
suppressMessages(suppressWarnings( try( library(adegenet) , silent = TRUE) ))
if( !"PopGenReport" %in% as.character(installed.packages()[,"Package"]) ) {
if( ! installing ) {
cat( "\r" , "\r" , " Installing Dependences...", "\r", "\r")
}
suppressMessages(suppressWarnings(
try(install.packages("PopGenReport" , verbose=FALSE , quiet = TRUE) , silent = TRUE) ))
}
suppressMessages(suppressWarnings( try( library(PopGenReport) , silent = TRUE) ))
## -----------------------------------------------------
## Reading Data
# A genepop object
if( class(main.data.file) == "genind" ) {
data <- main.data.file
}
# A genepop file
if( class(main.data.file) == "character" ) {
sink("/dev/null");
if(ncode == 3) {
suppressMessages(suppressWarnings( try( data <- read.genepop(main.data.file,ncode = 3L) , silent = FALSE) ))
}
if(ncode == 2) {
suppressMessages(suppressWarnings( try( data <- read.genepop(main.data.file,ncode = 2L) , silent = FALSE) ))
}
sink();
}
if( ! exists("data") ) {
stop("Error or Uncoded situation [!]\nGenepop Data is unreadable or the path is uncorrect")
}
## -----------------------------------------------------
## Main Objets and Defenitions
if( is.null(resample.number.auto) | ! exists("resample.number.auto") ) { resample.number.auto <- TRUE }
if( is.null(clustering.vector) | ! exists("clustering.vector") ) { clustering.vector <- NULL }
if( is.null(resample.number) | ! exists("resample.number") ) { resample.number <- NULL }
if( is.null(savefile) | ! exists("savefile") ) { savefile <- NULL }
if( is.null(replace) | ! exists("replace") ) { replace <- FALSE }
if( is.null(resample.number.auto) | ! exists("resample.number.auto") ) { resample.number.auto <- TRUE }
if( is.null(discard.pops) | ! exists("discard.pops") ) { discard.pops <- NULL }
if( is.null(number.iteractions) | ! exists("number.iteractions") ) { number.iteractions <- 999 }
if( is.null(alfa.test) | ! exists("alfa.test") ) { alfa.test <- 0.05 }
loci.names <- as.character(unique(data$loc.fac))
loci.names.individuals <- data$loc.fac
loci.n <- length(loci.names)
cat("\n")
cat("\n")
cat("\n")
cat(" ------------------------------------------------------------------------------------------------")
cat("\n")
cat(" Rambo . Random reductions for genetic data")
cat("\n")
cat("\n")
cat(" Data file: ", main.data.file)
cat("\n")
cat(" Loci: ", loci.n)
cat("\n")
cat(" Ploidy: ",unique(data$ploidy))
## -----------------------------------------------------
## Discard Populations
if( exists("discard.pops") & ! is.null(discard.pops) ) {
data <- data[ ! data@pop %in% unique(data@pop)[discard.pops] , ]
cat("\n")
cat(" Removed populations: ",discard.pops)
}
cat("\n")
cat("\n")
cat(" ------------------------------------------------------------------------------------------------")
cat("\n")
## -----------------------------------------------------
## Other Objets and Defenitions
population.names <- as.character(unique(data@pop))
population.names.results <- substr(population.names, 1, 3)
population.n <- length(unique(data@pop))
individual.n.per.pop <- vector()
for (f in 1:population.n){
individual.n.per.pop <- c(individual.n.per.pop,nrow(data[ data@pop == population.names[f] , ]@tab))
}
## -----------------------------------------------------
## Clustering
prior.variable.clustering <- TRUE
if( is.null(clustering.vector) | ! exists("clustering.vector") ) {
clustering.vector <- 1:population.n
cat("\n")
cat(" Clustering: FALSE")
cat("\n")
cat("\n")
prior.variable.clustering <- FALSE
}
if ( prior.variable.clustering & ! is.null(clustering.vector) | prior.variable.clustering & exists("clustering.vector") ) {
if( length(clustering.vector) != population.n) { stop("Error or Uncoded situation [!]\nThe number of populations does not match the length of the clustering vector.") }
clustering.array <- vector()
for (i in 1:population.n ) {
clustering.array <- c(clustering.array,rep(paste("POP.",as.character(clustering.vector[i]),sep=""),individual.n.per.pop[i]))
}
data@pop <- as.factor(clustering.array)
cat("\n")
cat(" Clustering: TRUE")
cat("\n")
cat("\n")
}
population.names <- unique( data@pop )
individual.names <- data@pop
population.n <- length(population.names)
individual.n <- length(individual.names)
if ( resample.number.auto ) {
samples <- vector()
for (i in 1:population.n) {
samples <- c(samples, sum( !is.na(which(individual.names %in% population.names[i]))))
}
resamp <- samples[which.min(samples)]
}
if ( ! resample.number.auto | is.null(resample.number.auto) ) {
resamp <- resample.number
}
## -----------------------------------------------------
## Main Random Matrices to produce Results
results.unique <- matrix(NA, ncol = number.iteractions, nrow = population.n)
results.unique.others <- matrix(NA, ncol = number.iteractions, nrow = population.n)
results.richness <- matrix(NA, ncol = number.iteractions, nrow = population.n)
results.richness.others <- matrix(NA, ncol = number.iteractions, nrow = population.n)
results.he <- matrix(NA, ncol = number.iteractions, nrow = population.n)
results.he.others <- matrix(NA, ncol = number.iteractions, nrow = population.n)
for (int in 1:number.iteractions) {
progress <- round((int/number.iteractions) * 100, digits = 0)
cat(" \r")
cat(paste(" Rambo in progress: ", progress, " % | ", number.iteractions,
" randomizations for ", population.n, " populations with ",
resamp, " individuals ",
sep = ""), "\r")
flush.console()
resample.array <- vector()
for( i in 1:population.n) {
if( sum(individual.names == population.names[i]) < resamp ) { replace.pop <- TRUE } else { replace.pop <- replace }
resample.array <- c(resample.array,sample(which(individual.names == population.names[i]), resamp, replace = replace.pop))
}
data.resamp <- data[ resample.array , ]
unique <- vector()
unique.others <- vector()
alleles <- vector()
alleles.other <- vector()
# Allele Richness & Unique Alleles
for (j in 1:population.n ) {
all.alleles.perlocus.j <- apply( data.resamp[ data.resamp@pop == population.names[j] , ]@tab , 2 , sum , na.rm=T)
all.alleles.perlocus.j <- data.frame(all.alleles.perlocus.j)
all.alleles.perlocus.j <- all.alleles.perlocus.j[substrRight(rownames(all.alleles.perlocus.j), 3) != missing.data,]
all.alleles.perlocus.j[all.alleles.perlocus.j > 0] <- 1
all.alleles.perlocus.others.j <- apply( data.resamp[ data.resamp@pop != population.names[j] , ]@tab , 2 , sum, na.rm=T)
all.alleles.perlocus.others.j <- data.frame(all.alleles.perlocus.others.j)
all.alleles.perlocus.others.j <- all.alleles.perlocus.others.j[substrRight(rownames(all.alleles.perlocus.others.j), 3) != missing.data,]
all.alleles.perlocus.others.j[all.alleles.perlocus.others.j > 0] <- 1
merged <- rbind(all.alleles.perlocus.j , all.alleles.perlocus.others.j )
unique <- c(unique , length(which(merged[1,] == 1 & merged[2,] == 0)) )
unique.others <- c(unique.others , length(which(merged[1,] == 0 & merged[2,] == 1)) )
alleles <- c( alleles , (sum(all.alleles.perlocus.j) ) / loci.n )
alleles.other <- c(alleles.other , (sum(all.alleles.perlocus.others.j > 0) ) / loci.n )
}
he <- Hs(data.resamp)
# Place results
results.unique[, int] <- unique
results.unique.others[, int] <- unique.others
results.richness[, int] <- alleles
results.richness.others[, int] <- alleles.other
results.he[, int] <- he
}
results.significance.richness <- numeric(population.n)
results.significance.unique <- numeric(population.n)
for (j in 1:population.n ) {
results.significance.richness[j] <- signif(wilcox.test(results.richness[j, ] , results.richness.others[j, ], alternative = "greater")$p.value,3)
results.significance.unique[j] <- signif(wilcox.test(results.unique[j, ] , results.unique.others[j, ], alternative = "greater")$p.value,3)
}
results.significance.richness[results.significance.richness > alfa.test] <- "NS"
results.significance.unique[results.significance.unique > alfa.test] <- "NS"
if( length(unique(clustering.vector)) == length(individual.n.per.pop) ) {
main.results.data.frame <- data.frame( Population = ( population.names.results ) ,
. = rep(" ", length(individual.n.per.pop) ) ,
n = individual.n.per.pop ,
. = rep(" ", length(individual.n.per.pop) ) ,
A = round(apply(results.richness, 1, mean), 2) ,
SD.A = round(apply(results.richness, 1, sd), 2) ,
Signif.A = results.significance.richness ,
.. = rep(" ", length(individual.n.per.pop) ) ,
PA = round(apply(results.unique, 1, mean), 2) ,
SD.PA = round(apply(results.unique, 1, sd), 2) ,
Signif.PA = results.significance.unique ,
... = rep(" ", length(individual.n.per.pop) ) ,
He = round(apply(results.he, 1, mean), 2),
SD.He = round(apply(results.he, 1, sd), 2)
) }
if( length(unique(clustering.vector)) != length(individual.n.per.pop) ) {
main.results.data.frame <- data.frame( Population = 1:length(unique(clustering.vector)) ,
. = rep(" ", length(unique(clustering.vector)) ) ,
A = round(apply(results.richness, 1, mean), 2) ,
SD.A = round(apply(results.richness, 1, sd), 2) ,
Signif.A = results.significance.richness ,
.. = rep(" ", length(unique(clustering.vector)) ) ,
PA = round(apply(results.unique, 1, mean), 2) ,
SD.PA = round(apply(results.unique, 1, sd), 2) ,
Signif.PA = results.significance.unique ,
... = rep(" ", length(unique(clustering.vector)) ) ,
He = round(apply(results.he, 1, mean), 2),
SD.He = round(apply(results.he, 1, sd), 2)
)
}
if (savefile) {
write.table( main.results.data.frame , quote = FALSE , file = paste(save.filename, ".txt", sep = ""), sep = ",", row.names = FALSE, col.names = TRUE, na = "NA", dec = ".")
}
cat("\n")
cat("\n")
cat(" Rambo Results for", number.iteractions," randomizations using", population.n, "populations with", resamp, "individuals" )
cat("\n")
cat(" ------------------------------------------------------------------------------------------------")
cat("\n")
cat("\n")
print(main.results.data.frame)
cat("\n")
cat(" ------------------------------------------------------------------------------------------------")
cat("\n")
cat(" NS for non-significant Wilcoxon tests p-values for differences in mean diversity each population and the global pool (i.e. it is a paired difference test).")
cat("\n")
cat("\n")
}
## ------------------------------------------------------------------------------------------------------------------
##
## ------------------------------------------------------------------------------------------------------------------
以上是关于r Rambo:Allelic Richness和Private等位基因通过多次随机减少的主要内容,如果未能解决你的问题,请参考以下文章
(function($){...})(jQuery)是什么意思?
转换为 spark DataFrame 时,Json 字段默认排序