R: data.frame dist matrix转换

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了R: data.frame dist matrix转换相关的知识,希望对你有一定的参考价值。

参考技术A

matrix

matrix > data.frame

matrix > dist

data.frame > dist

dist > matrix
原来的自我距离统一转成0

dist > data.frame
原来的自我距离统一转成0

r 在R中创建示例data.frame

### Function to Generate a Random Data Set
#   an adaption of
#   https://trinkerrstuff.wordpress.com/2012/05/02/function-to-generate-a-random-data-set
#   Posted on 2 May, 2012	by tylerrinker

#   The props Function
#   The props function generates a data frame of proportions whose rows sum to 1.  It takes two arguments and an optional var.names argument.  The first two arguments are the dimensions of the dataframe and are pretty self explanatory.  The final argument optionally names the columns otherwise they are named X1..Xn.  One note on this function is that for many columns it is a poorer choice.  For a slower props function but better for numerous columns Dason of talkstats.com provides an alternative (props2).
#############################################################
# function to generate random proportions whose rowSums = 1 #
#############################################################
props <- function(ncol, nrow, var.names=NULL)
{
    if (ncol < 2) stop("ncol must be greater than 1")
    p <- function(n){
        y <- 0
        z <- sapply(seq_len(n-1), function(i) {
                x <- sample(seq(0, 1-y, by=.01), 1)
                y <<- y + x
                return(x)
            }
        )
        w <- c(z , 1-sum(z))
        return(w)
    }
    DF <- data.frame(t(replicate(nrow, p(n=ncol))))
    if (!is.null(var.names)) colnames(DF) <- var.names
    return(DF)
}
#   or
props2 <- function(nrow=10, ncol=5, var.names=NULL, digits=2)
{     
    p <- function(n, digits){                                      
        tmp <- rgamma(n, 1, 1)                                     
        X <- round(tmp/sum(tmp), digits=digits)                    
        if (sum(X)!=1) {                                           
            o <- diff(c(1, sum(X)))                                
            X[which.max(X)] <- max(X)-o                            
        }                                                          
        return(X)                                                  
    }                                                              
    DF <- data.frame(t(replicate(nrow, p(n=ncol, digits=digits)))) 
    if (!is.null(var.names)) colnames(DF) <- var.names             
    return(DF)                                                     
}

#   The NAins Function
#   The NAins function takes a data frame and randomly inserts a certain proportion of missing (NA) values.  The function has two arguments: df which is the dataframe and prop which is the proportion of NA values to be inserted into the data frame (default is .1),
#   Special thanks again to Dason of talk.stats.com for helping with a speed boost with this function.  This function consumes considerable time in DFgen and he provided the code to really gain some speed.
################################################################
# RANDOMLY INSERT A CERTAIN PROPORTION OF NAs INTO A DATAFRAME #
################################################################
NAins <-  NAinsert <- function(df, prop = .1){
    n <- nrow(df)
    m <- ncol(df)
    num.to.na <- ceiling(prop*n*m)
    id <- sample(0:(m*n-1), num.to.na, replace = FALSE)
    rows <- id %/% m + 1
    cols <- id %% m + 1
    sapply(seq(num.to.na), function(x){
            df[rows[x], cols[x]] <<- NA
        }
    )
    return(df)
}

#   The DFgen Function
#   The DFgen function randomly generates an n-lenght data set with predefined variables.
############################################################
# GENERATE A RANDOM DATA SET.  CAN BE SET TO LONG OR WIDE. #
# DATA SET HAS FACTORS AND NUMERIC VARIABLES AND CAN       #
# OPTIONALLY GIVE BUDGET EXPENDITURES AS A PROPORTION.     #
# CAN ALSO TELL A PROPORTION OF CELLS TO BE MISSING VALUES #
############################################################
# NOTE RELIES ON THE props FUNCTION AND THE NAins FUNCTION #
############################################################
DFgen <- DFmaker <- function(n=10, type=wide, digits=2, 
    proportion=FALSE, na.rate=0) 
{
    rownamer <- function(dataframe){
        x <- as.data.frame(dataframe)
        rownames(x) <- NULL
        return(x)
    }

    dfround <- function(dataframe, digits = 0){
      df <- dataframe
      df[,sapply(df, is.numeric)] <-round(df[,sapply(df, is.numeric)], digits) 
      return(df)
    }

    TYPE <- as.character(substitute(type))
    time1 <- sample(1:100, n, replace = TRUE) + abs(rnorm(n))
    DF <- data.frame( id = paste( "ID.", 1:n, sep = "" ), 
        group= sample(c("control", "treat"), n, replace = TRUE),
        hs.grad = sample(c("yes", "no"), n, replace = TRUE), 
        race = sample(c("black", "white", "asian"), n, 
            replace = TRUE, prob=c(.25, .5, .25)), 
        gender = sample(c("male", "female"), n, replace = TRUE), 
        age = sample(18:40, n, replace = TRUE),
        m.status = sample(c("never", "married", "divorced", "widowed"), 
            n, replace = TRUE, prob=c(.25, .4, .3, .05)), 
        political = sample(c("democrat", "republican", 
            "independent", "other"), n, replace= TRUE, 
            prob=c(.35, .35, .20, .1)),
        n.kids = rpois(n, 1.5), 
        income = sample(c(seq(0, 30000, by=1000), 
            seq(0, 150000, by=1000)), n, replace=TRUE),
        score = rnorm(n), 
        time1, 
        time2 = c(time1 + 2 * abs(rnorm(n))), 
        time3 = c(time1 + (4 * abs(rnorm(n)))))
    if (proportion) {
        DF <- cbind (DF[, 1:10], 
            props(ncol=3, nrow=n, var.names=c("food", 
                "housing", "other")),
            DF[, 11:14])
    }
    if (na.rate!=0) {  
        DF <- cbind(DF[, 1, drop=FALSE], NAins(DF[, -1], 
            prop=na.rate))
    }
    DF <- switch(TYPE, 
        wide = DF, 
        long = {DF <- reshape(DF, direction = "long", idvar = "id",
                varying = c("time1","time2", "time3"),
                v.names = c("value"),
                timevar = "time", times = c("time1", "time2", "time3"))
            rownamer(DF)}, 
        stop("Invalid Data \"type\""))
    return(dfround(DF, digits=digits))
}

以上是关于R: data.frame dist matrix转换的主要内容,如果未能解决你的问题,请参考以下文章

R data.frame

R data.frame

R:`[.data.frame`(frame, predictors) 中的决策树错误:选择了未定义的列

如何从r中的列联表中获取带有案例的data.frame?

R语言中的 Vector, Array, List 和 Data Frame

将数字转换为data.frame中的字母