зеркало из https://github.com/microsoft/RTVS-docs.git
proofreading
This commit is contained in:
Родитель
ac4919044e
Коммит
226842a54a
|
@ -89,7 +89,7 @@ library(help = foreign)
|
|||
# To install a new package, use install.packages()
|
||||
# Install the ggplot2 package for it's plotting capability.
|
||||
if (!require("ggplot2"))
|
||||
install.packages("ggplot2")
|
||||
install.packages("ggplot2")
|
||||
|
||||
# Then load the package.
|
||||
library("ggplot2")
|
||||
|
@ -146,7 +146,7 @@ theme_set(theme_gray(base_size = 18))
|
|||
|
||||
# In this sample you use ggplot2.
|
||||
ggplot(diamondSample, aes(x = carat, y = price)) +
|
||||
geom_point(colour = "blue")
|
||||
geom_point(colour = "blue")
|
||||
|
||||
# Add a log scale.
|
||||
ggplot(diamondSample, aes(x = carat, y = price)) +
|
||||
|
@ -155,9 +155,9 @@ ggplot(diamondSample, aes(x = carat, y = price)) +
|
|||
|
||||
# Add a log scale for both scales.
|
||||
ggplot(diamondSample, aes(x = carat, y = price)) +
|
||||
geom_point(colour = "blue") +
|
||||
scale_x_log10() +
|
||||
scale_y_log10()
|
||||
geom_point(colour = "blue") +
|
||||
scale_x_log10() +
|
||||
scale_y_log10()
|
||||
|
||||
### Linear Regression in R
|
||||
|
||||
|
@ -177,10 +177,10 @@ exp(coef(model)[1])
|
|||
|
||||
# Show the model in a plot.
|
||||
ggplot(diamondSample, aes(x = carat, y = price)) +
|
||||
geom_point(colour = "blue") +
|
||||
geom_smooth(method = "lm", colour = "red", size = 2) +
|
||||
scale_x_log10() +
|
||||
scale_y_log10()
|
||||
geom_point(colour = "blue") +
|
||||
geom_smooth(method = "lm", colour = "red", size = 2) +
|
||||
scale_x_log10() +
|
||||
scale_y_log10()
|
||||
|
||||
|
||||
### Regression Diagnostics
|
||||
|
|
|
@ -36,8 +36,8 @@ theme_set(theme_gray(base_size = 18))
|
|||
# and lat to the y-axis.
|
||||
# Then you add a layer with points (geom_point) and a layer to plot maps.
|
||||
p0 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point() +
|
||||
coord_map()
|
||||
geom_point() +
|
||||
coord_map()
|
||||
p0
|
||||
|
||||
# You can use a number of different aesthetics, for example colour or size
|
||||
|
@ -45,14 +45,14 @@ p0
|
|||
|
||||
# Map the depth column to the colour aesthetic.
|
||||
p1 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth)) +
|
||||
coord_map()
|
||||
geom_point(aes(colour = depth)) +
|
||||
coord_map()
|
||||
p1
|
||||
|
||||
# Add size for magnitude. The bigger the magnitude, the larger the point.
|
||||
p2 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag)) +
|
||||
coord_map()
|
||||
geom_point(aes(colour = depth, size = mag)) +
|
||||
coord_map()
|
||||
p2
|
||||
|
||||
# You can control the transparancy of a plot object with the alpha aesthetic.
|
||||
|
@ -60,24 +60,24 @@ p2
|
|||
# are translucent.
|
||||
# Add alpha level to hide overplotting, thus revealing detail.
|
||||
p3 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map()
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map()
|
||||
p3
|
||||
|
||||
# Change colour gradient by adding a gradient scale.
|
||||
p4 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map() +
|
||||
scale_colour_gradient(low = "blue", high = "red")
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map() +
|
||||
scale_colour_gradient(low = "blue", high = "red")
|
||||
p4
|
||||
|
||||
# Add a plot title.
|
||||
p5 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
scale_colour_gradient(low = "blue", high = "red") +
|
||||
ggtitle("Distribution of earthquakes near Fiji") +
|
||||
coord_map()
|
||||
p5
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
scale_colour_gradient(low = "blue", high = "red") +
|
||||
ggtitle("Distribution of earthquakes near Fiji") +
|
||||
coord_map()
|
||||
p5
|
||||
|
||||
# Now plot multiple plots on the same graphic.
|
||||
# The package "grid" is built into R and allows you to take control of the
|
||||
|
@ -89,10 +89,10 @@ theme_set(theme_grey(12) + theme(legend.key.size = unit(0.5, "lines")))
|
|||
library(grid)
|
||||
plot.new()
|
||||
grid.draw(cbind(
|
||||
ggplotGrob(p1),
|
||||
ggplotGrob(p2),
|
||||
ggplotGrob(p3),
|
||||
size = "last"
|
||||
))
|
||||
ggplotGrob(p1),
|
||||
ggplotGrob(p2),
|
||||
ggplotGrob(p3),
|
||||
size = "last"
|
||||
))
|
||||
|
||||
|
||||
|
|
|
@ -1,30 +1,27 @@
|
|||
#-------------------------------------------------------------------------------------------------------------
|
||||
#-------------------------- Regression: Demand estimation with Microsoft R Server ----------------------------
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
#
|
||||
# This example demonstrates the feature engineering process for building a regression model to predict
|
||||
# bike rental demand.
|
||||
#
|
||||
# The dataset contains 17,379 rows and 17 columns, each row representing the number of bike rentals within
|
||||
# a specific hour of a day in the years 2011 or 2012. Weather conditions (such as temperature, humidity,
|
||||
# and wind speed) were included in this raw feature set, and the dates were categorized as holiday vs.
|
||||
# weekday etc.
|
||||
#
|
||||
# The field to predict is "cnt", which contain a count value ranging from 1 to 977, representing the number
|
||||
# of bike rentals within a specific hour. The lag features we add in the data set is the number of bikes that
|
||||
# were rented in each of the previous 12 hours, which caputures the very recent demand for the bikes.
|
||||
#
|
||||
# The following scripts include five basic steps of building this example using Microsoft R Server.
|
||||
#
|
||||
#
|
||||
#--------------------------------------------------------------------------------------------------------------
|
||||
# Regression: Demand estimation with Microsoft R Server
|
||||
|
||||
# This example demonstrates the feature engineering process for building
|
||||
# a regression model to predict bike rental demand.
|
||||
|
||||
# The dataset contains 17,379 rows and 17 columns, each row representing
|
||||
# the number of bike rentals within a specific hour of a day in the years
|
||||
# 2011 or 2012. Weather conditions (such as temperature, humidity,
|
||||
# and wind speed) were included in this raw feature set, and
|
||||
# the dates were categorized as holiday vs. weekday, etc.
|
||||
|
||||
# The field to predict is "cnt", which contains a count value ranging
|
||||
# from 1 to 977, representing the number of bike rentals within
|
||||
# a specific hour. The lag features we add in the data set are the number
|
||||
# of bikes that were rented in each of the previous 12 hours.
|
||||
# This caputures the very recent demand for the bikes.
|
||||
|
||||
# The following scripts include five basic steps of building this example
|
||||
# using Microsoft R Server.
|
||||
|
||||
|
||||
#---------------------------Step 0: Get Started-------------------------------
|
||||
# ----------------------------------------------------------------------------
|
||||
# Check if Microsoft R Server (RRE 8.0) is installed
|
||||
# ----------------------------------------------------------------------------
|
||||
# Step 0: Get Started
|
||||
|
||||
# Check whether Microsoft R Server (RRE 8.0) is installed
|
||||
if (!require("RevoScaleR")) {
|
||||
cat("RevoScaleR package does not seem to exist.
|
||||
\nThis means that the functions starting with 'rx' will not run.
|
||||
|
@ -38,7 +35,7 @@ if (!require("RevoScaleR")) {
|
|||
quit()
|
||||
}
|
||||
|
||||
# Initial some variables.
|
||||
# Initialize some variables.
|
||||
github <- "https://raw.githubusercontent.com/Microsoft/RTVS-docs/master/examples/MRS_and_Machine_Learning/Datasets/"
|
||||
inputFileBikeURL <- paste0(github, "Bike_Rental_UCI_Dataset.csv")
|
||||
|
||||
|
@ -47,81 +44,115 @@ td <- tempdir()
|
|||
outFileBike <- paste0(td, "/bike.xdf")
|
||||
outFileLag <- paste0(td, "/lagData.xdf")
|
||||
|
||||
#---------------------------Step 1: Import the Bike Data---------------------------
|
||||
bike <- rxImport(inData = inputFileBikeURL, outFile = outFileBike, overwrite = TRUE,
|
||||
|
||||
# Step 1: Import the Bike Data
|
||||
|
||||
bike <- rxImport(inData = inputFileBikeURL,
|
||||
outFile = outFileBike, overwrite = TRUE,
|
||||
missingValueString = "M", stringsAsFactors = FALSE,
|
||||
# Remove timestamps and all columns that are part of the label.
|
||||
# Remove timestamps and all columns that are
|
||||
# part of the label.
|
||||
varsToDrop = c("instant", "dteday", "casual", "registered"),
|
||||
# Definite year, weather conditions and season columns as categorical.
|
||||
# Define year, weather conditions and season columns
|
||||
# as categorical.
|
||||
colInfo = list(yr = list(type = "factor"),
|
||||
weathersit = list(type = "factor"),
|
||||
season = list(type = "factor")))
|
||||
|
||||
#---------------------------Step 2: Feature Engineering---------------------------
|
||||
# Add number of bikes that were rented in each of the previous 12 hours as 12 lag features.
|
||||
computeLagFeatures <- function (dataList) { # function for computing lag features.
|
||||
|
||||
# Step 2: Feature Engineering
|
||||
|
||||
# Add the number of bikes that were rented in each of the previous
|
||||
# 12 hours as 12 lag features.
|
||||
computeLagFeatures <- function(dataList) {
|
||||
# Total number of lags that need to be added.
|
||||
numLags <- length(nLagsVector)
|
||||
# lag feature names as lagN
|
||||
varLagNameVector <- paste("lag", nLagsVector, sep="")
|
||||
|
||||
numLags <- length(nLagsVector) # total number of lags that need to be added
|
||||
varLagNameVector <- paste("lag", nLagsVector, sep="") # lag feature names as lagN
|
||||
|
||||
# Set the value of an object "storeLagData" in the transform environment.
|
||||
if (!exists("storeLagData"))
|
||||
{
|
||||
lagData <- mapply(rep, dataList[[varName]][1], times = nLagsVector)
|
||||
names(lagData) <- varLagNameVector
|
||||
.rxSet("storeLagData",lagData)
|
||||
}
|
||||
|
||||
if (!.rxIsTestChunk)
|
||||
{
|
||||
for (iL in 1:numLags)
|
||||
{
|
||||
numRowsInChunk <- length(dataList[[varName]]) # number of rows in the current chunk
|
||||
nlags <- nLagsVector[iL]
|
||||
varLagName <- paste("lag", nlags, sep="")
|
||||
lagData <- .rxGet("storeLagData") # retrieve lag data from the previous chunk
|
||||
allData <- c(lagData[[varLagName]], dataList[[varName]]) # concatenate lagData and the "cnt" feature
|
||||
dataList[[varLagName]] <- allData[1:numRowsInChunk] # take the first N rows of allData, where N is the total number of rows in the original dataList
|
||||
lagData[[varLagName]] <- tail(allData, nlags) # save last nlag rows as the new lagData to be used to process in the next chunk
|
||||
.rxSet("storeLagData", lagData)
|
||||
# Set the value of an object "storeLagData" in the transform environment.
|
||||
if (!exists("storeLagData"))
|
||||
{
|
||||
lagData <- mapply(rep, dataList[[varName]][1], times = nLagsVector)
|
||||
names(lagData) <- varLagNameVector
|
||||
.rxSet("storeLagData",lagData)
|
||||
}
|
||||
}
|
||||
return(dataList)
|
||||
|
||||
if (!.rxIsTestChunk)
|
||||
{
|
||||
for (iL in 1:numLags) {
|
||||
# Number of rows in the current chunk.
|
||||
numRowsInChunk <- length(dataList[[varName]])
|
||||
nlags <- nLagsVector[iL]
|
||||
varLagName <- paste("lag", nlags, sep = "")
|
||||
# Retrieve lag data from the previous chunk.
|
||||
lagData <- .rxGet("storeLagData")
|
||||
# Concatenate lagData and the "cnt" feature.
|
||||
allData <- c(lagData[[varLagName]], dataList[[varName]])
|
||||
# Take the first N rows of allData, where N is the total
|
||||
# number of rows in the original dataList.
|
||||
dataList[[varLagName]] <- allData[1:numRowsInChunk]
|
||||
# Save last nlag rows as the new lagData to be used
|
||||
# to process in the next chunk.
|
||||
lagData[[varLagName]] <- tail(allData, nlags)
|
||||
.rxSet("storeLagData", lagData)
|
||||
}
|
||||
}
|
||||
return(dataList)
|
||||
}
|
||||
|
||||
# Apply the "computeLagFeatures" on the bike data.
|
||||
lagData <- rxDataStep(inData = bike, outFile = outFileLag, transformFunc = computeLagFeatures,
|
||||
transformObjects = list(varName = "cnt", nLagsVector = seq(12)),
|
||||
lagData <- rxDataStep(inData = bike, outFile = outFileLag,
|
||||
transformFunc = computeLagFeatures,
|
||||
transformObjects = list(varName = "cnt",
|
||||
nLagsVector = seq(12)),
|
||||
transformVars = "cnt", overwrite=TRUE)
|
||||
|
||||
#---------------------------Step 3: Prepare Training and Test Datasets---------------------------
|
||||
# Split data by "yr" so that the training data contains records for the year 2011 and the test data contains records for 2012.
|
||||
rxSplit(inData = lagData, outFilesBase = paste0(td, "/modelData"), splitByFactor = "yr", overwrite = TRUE, reportProgress = 0, verbose = 0)
|
||||
|
||||
# Step 3: Prepare Training and Test Datasets
|
||||
|
||||
# Split data by "yr" so that the training data contains records
|
||||
# for the year 2011 and the test data contains records for 2012.
|
||||
rxSplit(inData = lagData, outFilesBase = paste0(td, "/modelData"),
|
||||
splitByFactor = "yr", overwrite = TRUE,
|
||||
reportProgress = 0, verbose = 0)
|
||||
|
||||
# Point to the .xdf files for the training and test set.
|
||||
train <- RxXdfData(paste0(td, "/modelData.yr.0.xdf"))
|
||||
test <- RxXdfData(paste0(td, "/modelData.yr.1.xdf"))
|
||||
|
||||
#---------------------------Step 4: Choose and apply a learning algorithm (Decision Forest Regression)---------------------------
|
||||
# Build a formula for the regression model and remove the "yr", which is used to split the training and test data.
|
||||
modelFormula <- formula(train, depVars = "cnt", varsToDrop = c("RowNum", "yr"))
|
||||
|
||||
# Step 4: Choose and apply a learning algorithm (Decision Forest Regression)
|
||||
|
||||
# Build a formula for the regression model and remove the "yr",
|
||||
# which is used to split the training and test data.
|
||||
modelFormula <- formula(train, depVars = "cnt",
|
||||
varsToDrop = c("RowNum", "yr"))
|
||||
|
||||
# Fit a Decision Forest Regression model on the training data.
|
||||
dForest <- rxDForest(modelFormula, data = train, importance = TRUE, seed = 123)
|
||||
dForest <- rxDForest(modelFormula, data = train, importance = TRUE,
|
||||
seed = 123)
|
||||
|
||||
|
||||
# Step 5: Predict over new data and review the model performance
|
||||
|
||||
#---------------------------Step 5: Predict over new data and review the model performance---------------------------
|
||||
# Predict the probability on the test dataset.
|
||||
predict <- rxPredict(dForest, data = test, overwrite = TRUE, computeResiduals = TRUE)
|
||||
predict <- rxPredict(dForest, data = test, overwrite = TRUE,
|
||||
computeResiduals = TRUE)
|
||||
|
||||
# Calculate three statistical measures: Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and Relative Absolute Error (RAE).
|
||||
sum <- rxSummary(~ cnt_Resid_abs+cnt_Resid_2+cnt_rel, data = predict, summaryStats = "Mean",
|
||||
# Calculate three statistical measures: Mean Absolute Error (MAE),
|
||||
# Root Mean Squared Error (RMSE), and Relative Absolute Error (RAE).
|
||||
sum <- rxSummary(~cnt_Resid_abs + cnt_Resid_2 + cnt_rel, data = predict,
|
||||
summaryStats = "Mean",
|
||||
transforms = list(cnt_Resid_abs = abs(cnt_Resid),
|
||||
cnt_Resid_2 = cnt_Resid^2,
|
||||
cnt_rel = abs(cnt_Resid)/cnt)
|
||||
)$sDataFrame
|
||||
)$sDataFrame
|
||||
|
||||
# List all measures in a data frame.
|
||||
measures <- data.frame(MAE = sum[1, 2], RMSE = sqrt(sum[2, 2]), RAE = sum[3, 2])
|
||||
measures <- data.frame(MAE = sum[1, 2],
|
||||
RMSE = sqrt(sum[2, 2]),
|
||||
RAE = sum[3, 2])
|
||||
|
||||
# Review the measures.
|
||||
measures
|
||||
|
|
|
@ -1,58 +1,61 @@
|
|||
#-------------------------------------------------------------------------------------------------------------
|
||||
#-------------------------- Regression: Demand estimation with Microsoft R Server ----------------------------
|
||||
#-------------------------------------------------------------------------------------------------------------
|
||||
#
|
||||
#
|
||||
# This example a replication of an existing Azure Machine Learning Experiment - Regression: Demand Estimation
|
||||
# Regression: Demand estimation with Microsoft R Server
|
||||
|
||||
# This example a replication of an existing Azure Machine Learning
|
||||
# Experiment - Regression: Demand Estimation
|
||||
# https://gallery.cortanaanalytics.com/Experiment/Regression-Demand-estimation-4.
|
||||
#
|
||||
# The dataset contains 17,379 rows and 17 columns, each row representing the number of bike rentals within
|
||||
# a specific hour of a day in the years 2011 or 2012. Weather conditions (such as temperature, humidity,
|
||||
# and wind speed) were included in this raw feature set, and the dates were categorized as holiday vs.
|
||||
# weekday etc.
|
||||
#
|
||||
# The field to predict is "cnt", which contain a count value ranging from 1 to 977, representing the number
|
||||
# of bike rentals within a specific hour.
|
||||
#
|
||||
# We built four models using the same algorithm, but with four different training datasets. The four training
|
||||
# datasets that we constructed were all based on the same raw input data, but we added different additional
|
||||
# features to each training set.
|
||||
#
|
||||
|
||||
# The dataset contains 17,379 rows and 17 columns, each row representing
|
||||
# the number of bike rentals within a given hour of a given day in
|
||||
# the years 2011 or 2012. Weather conditions (such as temperature, humidity,
|
||||
# and wind speed) were included in this raw feature set, and the dates
|
||||
# were categorized as holiday vs. weekday, etc.
|
||||
|
||||
# The field to predict is "cnt", which contains a count value ranging
|
||||
# from 1 to 977, representing the number of bike rentals within
|
||||
# a given hour.
|
||||
|
||||
# We built four models using the same algorithm, but with four different
|
||||
# training datasets. The four training datasets that we constructed
|
||||
# were all based on the same raw input data, but we added different
|
||||
# additional features to each training set.
|
||||
|
||||
# Set A = weather + holiday + weekday + weekend features for the predicted day
|
||||
# Set B = number of bikes that were rented in each of the previous 12 hours
|
||||
# Set C = number of bikes that were rented in each of the previous 12 days at the same hour
|
||||
# Set D = number of bikes that were rented in each of the previous 12 weeks at the same hour and the same day
|
||||
#
|
||||
# Set C = number of bikes that were rented in each of the previous 12 days
|
||||
# at the same hour
|
||||
# Set D = number of bikes that were rented in each of the previous 12 weeks
|
||||
# at the same hour and the same day
|
||||
|
||||
# Each of these feature sets captures different aspects of the problem:
|
||||
# Feature set B captures very recent demand for the bikes.
|
||||
# Feature set C captures the demand for bikes at a particular hour.
|
||||
# Feature set D captures demand for bikes at a particular hour and particular day of the week.
|
||||
#
|
||||
# The four training datasets were built by combining the feature set as follows:
|
||||
# Feature set D captures demand for bikes at a particular hour and
|
||||
# particular day of the week.
|
||||
|
||||
# The four training datasets were built by combining the feature set
|
||||
# as follows:
|
||||
# Training set 1: feature set A only
|
||||
# Training set 2: feature sets A+B
|
||||
# Training set 3: feature sets A+B+C
|
||||
# Training set 4: feature sets A+B+C+D
|
||||
#
|
||||
# The following scripts include five basic steps of building this example using Microsoft R Server.
|
||||
#
|
||||
#
|
||||
#--------------------------------------------------------------------------------------------------------------
|
||||
|
||||
# The following scripts include five basic steps of building this example
|
||||
# using Microsoft R Server.
|
||||
|
||||
|
||||
#---------------------------Step 0: Get Started-------------------------------
|
||||
# ----------------------------------------------------------------------------
|
||||
# Check if Microsoft R Server (RRE 8.0) is installed
|
||||
# ----------------------------------------------------------------------------
|
||||
# Step 0: Get Started
|
||||
|
||||
# Check whether Microsoft R Server (RRE 8.0) is installed
|
||||
if (!require("RevoScaleR")) {
|
||||
cat("RevoScaleR package does not seem to exist.
|
||||
\nThis means that the functions starting with 'rx' will not run.
|
||||
\nIf you have Microsoft R Server installed, please switch the R engine.
|
||||
\nFor example, in R Tools for Visual Studio:
|
||||
\nR Tools -> Options -> R Engine.
|
||||
\nIf Microsoft R Server is not installed, you can download it from:
|
||||
\nhttps://www.microsoft.com/en-us/server-cloud/products/r-server/
|
||||
\n")
|
||||
\nThis means that the functions starting with 'rx' will not run.
|
||||
\nIf you have Microsoft R Server installed,
|
||||
\nplease switch the R engine.
|
||||
\nFor example, in R Tools for Visual Studio:
|
||||
\nR Tools -> Options -> R Engine.
|
||||
\nIf Microsoft R Server is not installed, you can download it from:
|
||||
\nhttps://www.microsoft.com/en-us/server-cloud/products/r-server/
|
||||
\n")
|
||||
|
||||
quit()
|
||||
}
|
||||
|
@ -67,95 +70,126 @@ outFileBike <- paste0(td, "/bike.xdf")
|
|||
outFileEdit <- paste0(td, "/editData.xdf")
|
||||
outFileLag <- paste0(td, "/lagData")
|
||||
|
||||
#---------------------------Step 1: Import Data---------------------------
|
||||
|
||||
# Step 1: Import Data
|
||||
|
||||
# Import the bike data.
|
||||
# Remove timestamps and all columns that are part of the label.
|
||||
bike_mrs <- rxImport(inData = inputFileBikeURL, outFile = outFileBike, overwrite = TRUE,
|
||||
bike_mrs <- rxImport(inData = inputFileBikeURL,
|
||||
outFile = outFileBike, overwrite = TRUE,
|
||||
missingValueString = "M", stringsAsFactors = FALSE,
|
||||
varsToDrop = c("instant", "dteday", "casual", "registered"))
|
||||
varsToDrop = c("instant",
|
||||
"dteday",
|
||||
"casual",
|
||||
"registered"))
|
||||
|
||||
# Edit Metadata: Definite year, weather conditions and season columns as categorical.
|
||||
editData_mrs <- rxFactors(inData = bike_mrs, outFile = outFileEdit, sortLevels = TRUE,
|
||||
factorInfo = c("yr", "weathersit", "season"), overwrite = TRUE)
|
||||
# Edit Metadata: Define year, weather conditions and season columns
|
||||
# as categorical.
|
||||
editData_mrs <- rxFactors(inData = bike_mrs, outFile = outFileEdit,
|
||||
sortLevels = TRUE,
|
||||
factorInfo = c("yr", "weathersit", "season"),
|
||||
overwrite = TRUE)
|
||||
|
||||
|
||||
# Step 2: Feature Engineering
|
||||
|
||||
#---------------------------Step 2: Feature Engineering---------------------------
|
||||
# Create a function to construct lag features for four different aspects.
|
||||
computeLagFeatures <- function (dataList) {
|
||||
|
||||
numLags <- length(nLagsVector) # total number of lags that need to be added
|
||||
for (iL in 1:numLags)
|
||||
{
|
||||
nlag <- nLagsVector[iL]
|
||||
varLagName <- paste("demand.",nlag,unit,sep="")
|
||||
numRowsInChunk <- length(dataList[[baseVar]])
|
||||
numRowsToRead <- nlag * interval
|
||||
numRowsPadding <- 0
|
||||
if (numRowsToRead >= .rxStartRow)
|
||||
{
|
||||
numRowsToRead <- .rxStartRow - 1
|
||||
numRowsPadding <- nlag * interval - numRowsToRead
|
||||
# Total number of lags that need to be added.
|
||||
numLags <- length(nLagsVector)
|
||||
for (iL in 1:numLags) {
|
||||
nlag <- nLagsVector[iL]
|
||||
varLagName <- paste("demand.", nlag, unit, sep = "")
|
||||
numRowsInChunk <- length(dataList[[baseVar]])
|
||||
numRowsToRead <- nlag * interval
|
||||
numRowsPadding <- 0
|
||||
if (numRowsToRead >= .rxStartRow) {
|
||||
numRowsToRead <- .rxStartRow - 1
|
||||
numRowsPadding <- nlag * interval - numRowsToRead
|
||||
}
|
||||
# Determine the current row to start processing the data
|
||||
# between chunks.
|
||||
startRow <- .rxStartRow - numRowsToRead
|
||||
previousRowsDataList <- rxReadXdf(file = .rxReadFileName,
|
||||
varsToKeep = baseVar,
|
||||
startRow = startRow,
|
||||
numRows = numRowsToRead,
|
||||
returnDataFrame = FALSE)
|
||||
paddingRowsDataList <- rxReadXdf(file = .rxReadFileName,
|
||||
varsToKeep = baseVar,
|
||||
startRow = 1,
|
||||
numRows = numRowsPadding,
|
||||
returnDataFrame = FALSE)
|
||||
dataList[[varLagName]] <- c(paddingRowsDataList[[baseVar]],
|
||||
previousRowsDataList[[baseVar]],
|
||||
dataList[[baseVar]])[1:numRowsInChunk]
|
||||
}
|
||||
startRow <- .rxStartRow - numRowsToRead # determine the current row to start processing the data between chunks.
|
||||
previousRowsDataList <- rxReadXdf(file = .rxReadFileName,
|
||||
varsToKeep = baseVar,
|
||||
startRow = startRow, numRows = numRowsToRead,
|
||||
returnDataFrame = FALSE)
|
||||
paddingRowsDataList <- rxReadXdf(file=.rxReadFileName,
|
||||
varsToKeep = baseVar,
|
||||
startRow = 1, numRows = numRowsPadding,
|
||||
returnDataFrame = FALSE)
|
||||
dataList[[varLagName]] <- c(paddingRowsDataList[[baseVar]], previousRowsDataList[[baseVar]], dataList[[baseVar]])[1:numRowsInChunk]
|
||||
}
|
||||
return(dataList)
|
||||
return(dataList)
|
||||
}
|
||||
|
||||
# Create a function to add lag features a set of columns at a time.
|
||||
addLag <- function(inputData, outputFileBase) {
|
||||
|
||||
inputFile <- inputData
|
||||
outputFileHour <- paste(outputFileBase, "_hour",".xdf",sep="")
|
||||
outputFileHourDay <- paste(outputFileBase, "_hour_day",".xdf",sep="")
|
||||
outputFileHourDayWeek <- paste(outputFileBase, "_hour_day_week",".xdf",sep="")
|
||||
inputFile <- inputData
|
||||
outputFileHour <- paste(outputFileBase, "_hour",".xdf",sep="")
|
||||
outputFileHourDay <- paste(outputFileBase, "_hour_day",".xdf",sep="")
|
||||
outputFileHourDayWeek <- paste(outputFileBase, "_hour_day_week", ".xdf",
|
||||
sep="")
|
||||
|
||||
# Initialize some fix values.
|
||||
hourInterval <- 1
|
||||
dayInterval <- 24
|
||||
weekInterval <- 168
|
||||
previous <- 12
|
||||
# Initialize some fix values.
|
||||
hourInterval <- 1
|
||||
dayInterval <- 24
|
||||
weekInterval <- 168
|
||||
previous <- 12
|
||||
|
||||
# Add number of bikes that were rented in each of the previous 12 hours (for Set B).
|
||||
rxDataStep(inData = inputFile,outFile = outputFileHour, transformFunc=computeLagFeatures,
|
||||
transformObjects = list(baseVar = "cnt", unit = "hour",nLagsVector=seq(12),
|
||||
interval = hourInterval),
|
||||
transformVars = c("cnt"), overwrite=TRUE)
|
||||
# Add number of bikes that were rented in each of the previous 12 hours
|
||||
# (for Set B).
|
||||
rxDataStep(inData = inputFile, outFile = outputFileHour,
|
||||
transformFunc=computeLagFeatures,
|
||||
transformObjects = list(baseVar = "cnt", unit = "hour",
|
||||
nLagsVector=seq(12),
|
||||
interval = hourInterval),
|
||||
transformVars = c("cnt"), overwrite=TRUE)
|
||||
|
||||
# Add number of bikes that were rented in each of the previous 12 days at the same hour (for Set C).
|
||||
rxDataStep(inData = outputFileHour,outFile = outputFileHourDay, transformFunc=computeLagFeatures,
|
||||
transformObjects = list(baseVar = "cnt", unit = "day",nLagsVector=seq(12),
|
||||
interval = dayInterval),
|
||||
transformVars = c("cnt"), overwrite=TRUE)
|
||||
# Add number of bikes that were rented in each of the previous 12 days
|
||||
# at the same hour (for Set C).
|
||||
rxDataStep(inData = outputFileHour, outFile = outputFileHourDay,
|
||||
transformFunc=computeLagFeatures,
|
||||
transformObjects = list(baseVar = "cnt",
|
||||
unit = "day",
|
||||
nLagsVector=seq(12),
|
||||
interval = dayInterval),
|
||||
transformVars = c("cnt"),
|
||||
overwrite=TRUE)
|
||||
|
||||
# Add number of bikes that were rented in each of the previous 12 weeks at the same hour and the same day (for Set D).
|
||||
rxDataStep(inData = outputFileHourDay,outFile = outputFileHourDayWeek, transformFunc=computeLagFeatures,
|
||||
transformObjects = list(baseVar = "cnt", unit = "week",nLagsVector=seq(12),
|
||||
interval = weekInterval),
|
||||
transformVars = c("cnt"), overwrite=TRUE)
|
||||
# Add number of bikes that were rented in each of the previous 12 weeks
|
||||
# at the same hour and the same day (for Set D).
|
||||
rxDataStep(inData = outputFileHourDay,
|
||||
outFile = outputFileHourDayWeek,
|
||||
transformFunc=computeLagFeatures,
|
||||
transformObjects = list(baseVar = "cnt",
|
||||
unit = "week",
|
||||
nLagsVector=seq(12),
|
||||
interval = weekInterval),
|
||||
transformVars = c("cnt"), overwrite=TRUE)
|
||||
|
||||
file.remove(outputFileHour)
|
||||
file.remove(outputFileHourDay)
|
||||
|
||||
return(outputFileHourDayWeek)
|
||||
file.remove(outputFileHour)
|
||||
file.remove(outputFileHourDay)
|
||||
return(outputFileHourDayWeek)
|
||||
}
|
||||
|
||||
# Set A = weather + holiday + weekday + weekend features for the predicted day.
|
||||
# Set A = weather + holiday + weekday + weekend features for
|
||||
# the predicted day.
|
||||
finalDataA_mrs <- editData_mrs
|
||||
|
||||
# Set B, C & D.
|
||||
finalDataLag_dir <- addLag(inputData = editData_mrs, outputFileBase = outFileLag)
|
||||
finalDataLag_dir <- addLag(inputData = editData_mrs,
|
||||
outputFileBase = outFileLag)
|
||||
finalDataLag_mrs <- RxXdfData(finalDataLag_dir)
|
||||
|
||||
|
||||
#---------------------------Step 3: Prepare Training and Test Datasets---------------------------
|
||||
# Step 3: Prepare Training and Test Datasets
|
||||
|
||||
## Set A:
|
||||
# Split Data.
|
||||
rxSplit(inData = finalDataA_mrs,
|
||||
|
@ -168,41 +202,62 @@ testA_mrs <- RxXdfData(paste0(td, "/modelDataA.yr.1.xdf"))
|
|||
|
||||
## Set B, C & D:
|
||||
# Split Data.
|
||||
rxSplit(inData = finalDataLag_mrs, outFilesBase = paste0(td, "/modelDataLag"), splitByFactor = "yr",
|
||||
rxSplit(inData = finalDataLag_mrs,
|
||||
outFilesBase = paste0(td, "/modelDataLag"), splitByFactor = "yr",
|
||||
overwrite = TRUE, reportProgress = 0, verbose = 0)
|
||||
# Point to the .xdf files for the training and test set.
|
||||
train_mrs <- RxXdfData(paste0(td, "/modelDataLag.yr.0.xdf"))
|
||||
test_mrs <- RxXdfData(paste0(td, "/modelDataLag.yr.1.xdf"))
|
||||
|
||||
#---------------------------Step 4: Choose and apply a learning algorithm (Decision Forest Regression)---------------------------
|
||||
|
||||
# Step 4: Choose and apply a learning algorithm (Decision Forest Regression)
|
||||
|
||||
newDayFeatures <- paste("demand", ".", seq(12), "day", sep = "")
|
||||
newWeekFeatures <- paste("demand", ".", seq(12), "week", sep = "")
|
||||
|
||||
## Set A:
|
||||
# Build a formula for the regression model and remove the "yr", which is used to split the training and test data.
|
||||
formA_mrs <- formula(trainA_mrs, depVars = "cnt", varsToDrop = c("RowNum", "yr"))
|
||||
# Build a formula for the regression model and remove the "yr",
|
||||
# which is used to split the training and test data.
|
||||
formA_mrs <- formula(trainA_mrs, depVars = "cnt",
|
||||
varsToDrop = c("RowNum", "yr"))
|
||||
# Fit Decision Forest Regression model.
|
||||
dForestA_mrs <- rxDForest(formA_mrs, data = trainA_mrs, importance = TRUE, seed = 123)
|
||||
dForestA_mrs <- rxDForest(formA_mrs, data = trainA_mrs,
|
||||
importance = TRUE, seed = 123)
|
||||
|
||||
## Set B:
|
||||
# Build a formula for the regression model and remove the "yr", which is used to split the training and test data, and lag features for Set C and D.
|
||||
formB_mrs <- formula(train_mrs, depVars = "cnt", varsToDrop = c("RowNum", "yr", newDayFeatures, newWeekFeatures))
|
||||
# Build a formula for the regression model and remove the "yr",
|
||||
# which is used to split the training and test data, and lag features
|
||||
# for Set C and D.
|
||||
formB_mrs <- formula(train_mrs, depVars = "cnt",
|
||||
varsToDrop = c("RowNum", "yr",
|
||||
newDayFeatures,
|
||||
newWeekFeatures))
|
||||
# Fit Decision Forest Regression model.
|
||||
dForestB_mrs <- rxDForest(formB_mrs, data = train_mrs, importance = TRUE, seed = 123)
|
||||
dForestB_mrs <- rxDForest(formB_mrs, data = train_mrs,
|
||||
importance = TRUE, seed = 123)
|
||||
|
||||
## Set C:
|
||||
# Build a formula for the regression model and remove the "yr", which is used to split the training and test data, and lag features for Set D.
|
||||
formC_mrs <- formula(train_mrs, depVars = "cnt", varsToDrop = c("RowNum", "yr", newWeekFeatures))
|
||||
# Build a formula for the regression model and remove the "yr",
|
||||
# which is used to split the training and test data, and lag features
|
||||
# for Set D.
|
||||
formC_mrs <- formula(train_mrs, depVars = "cnt",
|
||||
varsToDrop = c("RowNum", "yr", newWeekFeatures))
|
||||
# Fit Decision Forest Regression model.
|
||||
dForestC_mrs <- rxDForest(formC_mrs, data = train_mrs, importance = TRUE, seed = 123)
|
||||
dForestC_mrs <- rxDForest(formC_mrs, data = train_mrs,
|
||||
importance = TRUE, seed = 123)
|
||||
|
||||
## Set D:
|
||||
# Build a formula for the regression model and remove the "yr", which is used to split the training and test data.
|
||||
formD_mrs <- formula(train_mrs, depVars = "cnt", varsToDrop = c("RowNum", "yr"))
|
||||
# Build a formula for the regression model and remove the "yr",
|
||||
# which is used to split the training and test data.
|
||||
formD_mrs <- formula(train_mrs, depVars = "cnt",
|
||||
varsToDrop = c("RowNum", "yr"))
|
||||
# Fit Decision Forest Regression model.
|
||||
dForestD_mrs <- rxDForest(formD_mrs, data = train_mrs, importance = TRUE, seed = 123)
|
||||
dForestD_mrs <- rxDForest(formD_mrs, data = train_mrs,
|
||||
importance = TRUE, seed = 123)
|
||||
|
||||
|
||||
# Step 5: Predict over new data
|
||||
|
||||
#---------------------------Step 5: Predict over new data---------------------------
|
||||
## Set A:
|
||||
# Predict the probability on the test dataset.
|
||||
rxPredict(dForestA_mrs, data = testA_mrs,
|
||||
|
@ -231,17 +286,22 @@ rxPredict(dForestD_mrs, data = test_mrs,
|
|||
residVarNames = "cnt_Resid_D",
|
||||
overwrite = TRUE, computeResiduals = TRUE)
|
||||
|
||||
#---------------------------Prepare outputs---------------------------
|
||||
|
||||
# Prepare outputs
|
||||
## Set A:
|
||||
# Calculate three statistical measures: Mean Absolute Error (MAE), Root Mean Squared Error (RMSE), and Relative Absolute Error (RAE).
|
||||
sumA <- rxSummary(~ cnt_Resid_A_abs+cnt_Resid_A_2+cnt_rel_A, data = testA_mrs, summaryStats = "Mean",
|
||||
# Calculate three statistical measures: Mean Absolute Error (MAE),
|
||||
# Root Mean Squared Error (RMSE), and Relative Absolute Error (RAE).
|
||||
sumA <- rxSummary( ~ cnt_Resid_A_abs + cnt_Resid_A_2 + cnt_rel_A,
|
||||
data = testA_mrs, summaryStats = "Mean",
|
||||
transforms = list(cnt_Resid_A_abs = abs(cnt_Resid_A),
|
||||
cnt_Resid_A_2 = cnt_Resid_A^2,
|
||||
cnt_rel_A = abs(cnt_Resid_A)/cnt)
|
||||
)$sDataFrame
|
||||
)$sDataFrame
|
||||
|
||||
## Set B, C & D:
|
||||
sum <- rxSummary(~ cnt_Resid_B_abs+cnt_Resid_B_2+cnt_rel_B+cnt_Resid_C_abs+cnt_Resid_C_2+cnt_rel_C+cnt_Resid_D_abs+cnt_Resid_D_2+cnt_rel_D,
|
||||
sum <- rxSummary( ~ cnt_Resid_B_abs + cnt_Resid_B_2 + cnt_rel_B +
|
||||
cnt_Resid_C_abs + cnt_Resid_C_2 + cnt_rel_C +
|
||||
cnt_Resid_D_abs+cnt_Resid_D_2+cnt_rel_D,
|
||||
data = test_mrs, summaryStats = "Mean",
|
||||
transforms = list(cnt_Resid_B_abs = abs(cnt_Resid_B),
|
||||
cnt_Resid_B_2 = cnt_Resid_B^2,
|
||||
|
@ -252,7 +312,7 @@ sum <- rxSummary(~ cnt_Resid_B_abs+cnt_Resid_B_2+cnt_rel_B+cnt_Resid_C_abs+cnt_R
|
|||
cnt_Resid_D_abs = abs(cnt_Resid_D),
|
||||
cnt_Resid_D_2 = cnt_Resid_D^2,
|
||||
cnt_rel_D = abs(cnt_Resid_D)/cnt)
|
||||
)$sDataFrame
|
||||
)$sDataFrame
|
||||
|
||||
# Add row names.
|
||||
features <- c("baseline: weather + holiday + weekday + weekend features for the predicted day",
|
||||
|
@ -263,7 +323,10 @@ features <- c("baseline: weather + holiday + weekday + weekend features for the
|
|||
# List all measures in a data frame.
|
||||
measures <- data.frame(Features = features,
|
||||
MAE = c(sumA[1, 2], sum[1, 2], sum[4, 2], sum[7, 2]),
|
||||
RMSE = c(sqrt(sumA[2, 2]), sqrt(sum[2, 2]), sqrt(sum[5, 2]), sqrt(sum[8, 2])),
|
||||
RMSE = c(sqrt(sumA[2, 2]),
|
||||
sqrt(sum[2, 2]),
|
||||
sqrt(sum[5, 2]),
|
||||
sqrt(sum[8, 2])),
|
||||
RAE = c(sumA[3, 2], sum[3, 2], sum[6, 2], sum[9, 2]))
|
||||
|
||||
# Review the measures.
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# Install a package if it's not already installed.
|
||||
# This script shows how to import data into R that is referenced by a URL.
|
||||
|
||||
# Install the RCurl package if it's not already installed.
|
||||
(if (!require("RCurl", quietly = TRUE)) install.packages("RCurl"))
|
||||
|
||||
# Load packages.
|
||||
|
|
|
@ -3,16 +3,17 @@
|
|||
|
||||
# Check whether the "RevoScaleR" package is loaded in the current environment.
|
||||
if (!require("RevoScaleR")) {
|
||||
cat("RevoScaleR package does not seem to exist.
|
||||
\nThis means that the functions starting with 'rx' will not run.
|
||||
\nIf you have Microsoft R Server installed, please switch the R engine.
|
||||
\nFor example, in R Tools for Visual Studio:
|
||||
\nR Tools -> Options -> R Engine.
|
||||
\nIf Microsoft R Server is not installed, you can download it from:
|
||||
\nhttps://www.microsoft.com/en-us/server-cloud/products/r-server/
|
||||
\n")
|
||||
cat("RevoScaleR package does not seem to exist.
|
||||
\nThis means that the functions starting with 'rx' will not run.
|
||||
\nIf you have Microsoft R Server installed,
|
||||
\nplease switch the R engine.
|
||||
\nFor example, in R Tools for Visual Studio:
|
||||
\nR Tools -> Options -> R Engine.
|
||||
\nIf Microsoft R Server is not installed, you can download it from:
|
||||
\nhttps://www.microsoft.com/en-us/server-cloud/products/r-server/
|
||||
\n")
|
||||
|
||||
quit()
|
||||
quit()
|
||||
}
|
||||
|
||||
# A URL contains the raw data.
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# ----------------------------------------------------------------------------
|
||||
# purpose: introduction to plotting using the ggplot2 package
|
||||
# audience: you are expected to have some prior experience with R
|
||||
# ----------------------------------------------------------------------------
|
||||
# This sample gives an introduction to plotting using the ggplot2 package.
|
||||
|
||||
# R has a number of built-in datasets
|
||||
# In this example you use the dataset called quakes
|
||||
# This data contains locations of earthquakes off Fiji
|
||||
# The ggplot2 package is tremendously popular because it allows you to create
|
||||
# beautiful plots by describing the plot structure
|
||||
|
||||
# R has a number of built-in datasets.
|
||||
# In this example you use the dataset called quakes.
|
||||
# This data contains locations of earthquakes off Fiji.
|
||||
|
||||
# Read the help page for more information
|
||||
?quakes
|
||||
|
@ -24,71 +24,55 @@ if (!require("mapproj"))
|
|||
install.packages("mapproj") # required for map projections
|
||||
library("mapproj")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Starting to use the ggplot2 package
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# The ggplot2 package is tremendously popular because it allows you to create
|
||||
# beautiful plots by describing the plot structure
|
||||
|
||||
|
||||
# Plot longitude and latitude of quakes
|
||||
# To create a plot, you have to specify the data, then map aesthetics to
|
||||
# columns in your data. In this example, you map the column long to the x-axis
|
||||
# and lat to the y-axis.
|
||||
# Then you add a layer with points (geom_point) and a layer to plot maps
|
||||
|
||||
# Then you add a layer with points (geom_point) and a layer to plot maps.
|
||||
p0 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point() +
|
||||
coord_map()
|
||||
geom_point() +
|
||||
coord_map()
|
||||
p0
|
||||
|
||||
# You can use a number of different aesthetics, for example colour or size
|
||||
# of the points
|
||||
# of the points.
|
||||
|
||||
# Map the depth column to the colour aesthetic
|
||||
|
||||
p1 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth)) +
|
||||
coord_map()
|
||||
geom_point(aes(colour = depth)) +
|
||||
coord_map()
|
||||
p1
|
||||
|
||||
# Add size for magnitude. The bigger the magnitude, the larger the point
|
||||
|
||||
p2 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag)) +
|
||||
coord_map()
|
||||
geom_point(aes(colour = depth, size = mag)) +
|
||||
coord_map()
|
||||
p2
|
||||
|
||||
# You can control the transparancy of a plot object with the alpha aesthetic
|
||||
# High values of alpha (close to 1) are opaque, while low values (close to 0)
|
||||
# are translucent
|
||||
# Add alpha level to hide overplotting, thus revealing detail
|
||||
|
||||
p3 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map()
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map()
|
||||
p3
|
||||
|
||||
# Change colour gradient by adding a gradient scale
|
||||
|
||||
p4 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map() +
|
||||
scale_colour_gradient(low = "blue", high = "red")
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
coord_map() +
|
||||
scale_colour_gradient(low = "blue", high = "red")
|
||||
p4
|
||||
|
||||
# Add a plot title
|
||||
|
||||
p5 <- ggplot(quakes, aes(x = long, y = lat)) +
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
scale_colour_gradient(low = "blue", high = "red") +
|
||||
ggtitle("Distribution of earthquakes near Fiji") +
|
||||
coord_map()
|
||||
|
||||
geom_point(aes(colour = depth, size = mag), alpha = 0.25) +
|
||||
scale_colour_gradient(low = "blue", high = "red") +
|
||||
ggtitle("Distribution of earthquakes near Fiji") +
|
||||
coord_map()
|
||||
p5
|
||||
|
||||
|
||||
# Now plot multiple plots on the same graphic
|
||||
# The package "grid" is built into R and allows you to take control of the
|
||||
# plotting area. A grob is the abbreviation for "graphical object", and the
|
||||
|
@ -100,38 +84,35 @@ theme_set(theme_grey(12) + theme(legend.key.size = unit(0.5, "lines")))
|
|||
library(grid)
|
||||
plot.new()
|
||||
grid.draw(cbind(
|
||||
ggplotGrob(p1),
|
||||
ggplotGrob(p2),
|
||||
ggplotGrob(p3),
|
||||
size = "last"
|
||||
))
|
||||
ggplotGrob(p1),
|
||||
ggplotGrob(p2),
|
||||
ggplotGrob(p3),
|
||||
size = "last"
|
||||
))
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# The package "gtable" allows you to work with objects called grob tables.
|
||||
# A grob table captures all the information needed to layout grobs in a table
|
||||
# structure. It supports row and column spanning, offers some tools to
|
||||
# automatically figure out the correct dimensions, and makes it easy to align
|
||||
# and combine multiple tables.
|
||||
|
||||
if (!require("gtable")) install.packages("gtable")
|
||||
if (!require("gtable"))
|
||||
install.packages("gtable")
|
||||
library(gtable)
|
||||
|
||||
plonglat <- ggplot(quakes, aes(x = long, y = lat, size = mag, col = depth)) +
|
||||
geom_point(alpha = 0.5) +
|
||||
ggtitle("Top view")
|
||||
plonglat <- ggplot(quakes, aes(x = long, y = lat, size = mag, col = depth)) +
|
||||
geom_point(alpha = 0.5) +
|
||||
ggtitle("Top view")
|
||||
|
||||
plongdep <- ggplot(quakes, aes(x = long, y = -depth, size = mag, col = depth)) +
|
||||
geom_point(alpha = 0.5) +
|
||||
ggtitle("Side view")
|
||||
geom_point(alpha = 0.5) +
|
||||
ggtitle("Side view")
|
||||
|
||||
platdep <- ggplot(quakes, aes(x = depth, y = lat, size = mag, col = depth)) +
|
||||
geom_point(alpha = 0.5) +
|
||||
ggtitle("Side view")
|
||||
|
||||
# Next, define a gtable and add grobs to the table
|
||||
geom_point(alpha = 0.5) +
|
||||
ggtitle("Side view")
|
||||
|
||||
# Next, define a gtable and add grobs to the table.
|
||||
gt <- gtable(widths = unit(rep(1,2), "null"),
|
||||
heights = unit(rep(1,2), "null"))
|
||||
gt <- gtable_add_grob(gt,
|
||||
|
|
Загрузка…
Ссылка в новой задаче