r 使用R和Tableau分析总统辩论中的文本
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了r 使用R和Tableau分析总统辩论中的文本相关的知识,希望对你有一定的参考价值。
#https://www.washingtonpost.com/news/the-fix/wp/2016/09/26/the-first-trump-clinton-presidential-debate-transcript-annotated/
transcript <- read.csv(file="~/R/reddit/speach/import/transcript.csv",header = FALSE, stringsAsFactors = FALSE)
colnames(transcript) <- c("candidate","statement")
transcript_melted <- matrix(data="NA",nrow = 0,ncol = 2)
for(i in 1:nrow(transcript)) {
#removes non alphanumeric, then splits statement into a vector of words
words <- unlist(strsplit(gsub("[^[:alnum:] \']", "", transcript[i,"statement"]), " "))
for(word in words) {
transcript_melted <- rbind(transcript_melted, c(transcript[i,"candidate"],word))
}
}
transcript_melted <- data.frame(transcript_melted)
colnames(transcript_melted) <- c("candidate","word")
#remove spaces and empty values
transcript_melted <- transcript_melted[which(transcript_melted$word!=""),]
transcript_melted <- transcript_melted[which(transcript_melted$word!=" "),]
transcript_melted$word <- tolower(transcript_melted$word)
#Removes Lester Holt from Candidates
transcript_melted <- transcript_melted[which(transcript_melted$candidate!="LESTER HOLT"),]
transcript_melted$candidate <- factor(transcript_melted$candidate)
summary <- data.frame(table(transcript_melted$candidate, transcript_melted$word))
summary <- summary[order(-summary$Freq),]
print(summary[1:200,],row.names=FALSE)
#Find some differences
export_table <- matrix(nrow = 0,ncol = 4)
for(word in unique(transcript_melted$word)) {
#grab the counts
clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
if((clinton/trump>2)||(trump/clinton>2)) {
print(word)
print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
#gets the number of times the word has been said by each candidate
#append export_table
export_table <- rbind(export_table,c("TRUMP",word,trump,trump/clinton))
export_table <- rbind(export_table,c("CLINTON",word,clinton,trump/clinton))
}
}
#Clinton said more
for(word in unique(transcript_melted$word)) {
clinton <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="CLINTON"),])
trump <- nrow(transcript_melted[which(transcript_melted$word==word&transcript_melted$candidate=="TRUMP"),])
if((clinton>trump)&(trump>0)) {
print(word)
print(table((transcript_melted[which(transcript_melted$word==word),"candidate"])))
}
}
write.csv(export_table,file="~/R/reddit/speach/export/export_table.csv", row.names = FALSE)
write.csv(transcript_melted,file="~/R/reddit/speach/export/transcript_melted.csv", row.names = FALSE)
#fun bits
#How Many Words Trump Said over Hilary
nrow(transcript_melted[which(transcript_melted$candidate=="TRUMP"),])/nrow(transcript_melted[which(transcript_melted$candidate=="CLINTON"),])
#Tremendous
summary[which(summary$Var2=="tremendous"),]
summary[which(summary$Var2=="very"),]
summary[which(summary$Var2=="important"),]
summary[which(summary$Var2=="wrong"),]
以上是关于r 使用R和Tableau分析总统辩论中的文本的主要内容,如果未能解决你的问题,请参考以下文章