# adds R lib to .libPaths after installing Spark
Sys.setenv(SPARK_HOME="/Development/Data/spark2")
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
library(SparkR)
######################
# Example 1
####
# after installing Spark, point to it in R
# Set the system environment variables
Sys.setenv(SPARK_HOME = "C:/Apache/spark-1.4.1")
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
sc <- sparkR.session(master="local[2]")
df <- createDataFrame(iris) #R data.frame to Spark DataFrame
createOrReplaceTempView(df,"iris") #register Spark DataFrame as temp view
x<-sql("select * from iris")# test Spark SQL
nrow(x)#
summary(x)
collect(df)#Spark DataFrame to R data.frame
sparkR.session.stop()
#########################################
# Example 2
# Set the system environment variables
Sys.setenv(SPARK_HOME = "C:/Apache/spark-1.4.1")
.libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
#load the Sparkr library
library(SparkR)
# Create a spark context and a SQL context
sc <- sparkR.init(master = "local")
sqlContext <- sparkRSQL.init(sc)
#create a sparkR DataFrame
DF <- createDataFrame(sqlContext, faithful)
head(DF)
# Create a simple local data.frame
localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
# Convert local data frame to a SparkR DataFrame
df <- createDataFrame(sqlContext, localDF)
# Print its schema
printSchema(df)
# root
# |-- name: string (nullable = true)
# |-- age: double (nullable = true)
# Create a DataFrame from a JSON file
path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
peopleDF <- jsonFile(sqlContext, path)
printSchema(peopleDF)
# Register this DataFrame as a table.
registerTempTable(peopleDF, "people")
# SQL statements can be run by using the sql methods provided by sqlContext
teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19")
# Call collect to get a local data.frame
teenagersLocalDF <- collect(teenagers)
# Print the teenagers in our dataset
print(teenagersLocalDF)
# Stop the SparkContext now
sparkR.stop()