r 使用R和Tableau分析总统辩论中的文本

Posted 2021-05-24

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了r 使用R和Tableau分析总统辩论中的文本相关的知识，希望对你有一定的参考价值。

#https://www.washingtonpost.com/news/the-fix/wp/2016/09/26/the-first-trump-clinton-presidential-debate-transcript-annotated/
transcript <- read.csv(file="~/R/reddit/speach/import/transcript.csv",header = FALSE, stringsAsFactors = FALSE)
colnames(transcript) <- c("candidate","statement")
transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
for(i in 1:nrow(transcript)) {
  #removes non alphanumeric, then splits statement into a vector of words
  words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
  for(word in words) {
    transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
  }
}
transcript_melted <- data.frame(transcript_melted)
colnames(transcript_melted) <- c("candidate","word")
#remove spaces and empty values
transcript_melted <- transcript_melted[which(transcript_melted$word!=""),]
transcript_melted <- transcript_melted[which(transcript_melted$word!=" "),]

transcript_melted$word <- tolower(transcript_melted$word)

#Removes Lester Holt from Candidates
transcript_melted <- transcript_melted[which(transcript_melted$candidate!="LESTER HOLT"),]
transcript_melted$candidate <- factor(transcript_melted$candidate)

summary <- data.frame(table(transcript_melted$candidate, transcript_melted$word))
summary <- summary[order(-summary$Freq),]
print(summary[1:200,],row.names=FALSE)

#Find some differences
export_table <- matrix(nrow = 0,ncol = 4)
for(word in unique(transcript_melted$word)) {
  #grab the counts
  clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
  trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
  if((clinton/trump>2)||(trump/clinton>2)) {
    print(word)
    print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
    #gets the number of times the word has been said by each candidate
    
    #append export_table
    export_table <- rbind(export_table,c("TRUMP",word,trump,trump/clinton))
    export_table <- rbind(export_table,c("CLINTON",word,clinton,trump/clinton))
  }
}
#Clinton said more
for(word in unique(transcript_melted$word)) {
  clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
  trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
  if((clinton>trump)&(trump>0)) {
    print(word)
    print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
  }
}
write.csv(export_table,file="~/R/reddit/speach/export/export_table.csv", row.names = FALSE)
write.csv(transcript_melted,file="~/R/reddit/speach/export/transcript_melted.csv", row.names = FALSE)

#fun bits
#How Many Words Trump Said over Hilary
nrow(transcript_melted[which(transcript_melted$candidate=="TRUMP"),])/nrow(transcript_melted[which(transcript_melted$candidate=="CLINTON"),])
#Tremendous
summary[which(summary$Var2=="tremendous"),]
summary[which(summary$Var2=="very"),]
summary[which(summary$Var2=="important"),]
summary[which(summary$Var2=="wrong"),]

以上是关于r 使用R和Tableau分析总统辩论中的文本的主要内容，如果未能解决你的问题，请参考以下文章