зеркало из https://github.com/microsoft/RTVS-docs.git
Update plot titles; add more printout messages; load ggplot2 in linear reg
This commit is contained in:
Родитель
25b2a34bf0
Коммит
87156cdae8
|
@ -6,9 +6,9 @@
|
|||
# ----------------------------------------------------------------------------
|
||||
# load packages
|
||||
# ----------------------------------------------------------------------------
|
||||
(if (!require("MASS")) install.packages("MASS"))
|
||||
(if (!require("MASS", quietly = TRUE)) install.packages("MASS"))
|
||||
library("MASS") # to use the Boston dataset
|
||||
(if (!require("gbm")) install.packages("gbm"))
|
||||
(if (!require("gbm", quietly = TRUE)) install.packages("gbm"))
|
||||
library("gbm") # Gradient Boosting Machine package
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
# ----------------------------------------------------------------------------
|
||||
# load packages
|
||||
# ----------------------------------------------------------------------------
|
||||
(if (!require("glmnet")) install.packages("glmnet"))
|
||||
(if (!require("glmnet", quietly = TRUE)) install.packages("glmnet"))
|
||||
library("glmnet") # use this package to fit a glmnet model
|
||||
(if (!require("MASS")) install.packages("MASS"))
|
||||
(if (!require("MASS", quietly = TRUE)) install.packages("MASS"))
|
||||
library("MASS") # to use the Boston dataset
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
|
|
@ -15,10 +15,13 @@ auth_token <- ""
|
|||
# ----------------------------------------------------------------------------
|
||||
# load packages
|
||||
# ----------------------------------------------------------------------------
|
||||
(if (!require("AzureML")) install.packages("AzureML"))
|
||||
# install packages if they are not already installed
|
||||
(if (!require("AzureML", quietly = TRUE)) install.packages("AzureML"))
|
||||
library("AzureML") # load the package for deploying Azure ML web service
|
||||
(if (!require("MASS")) install.packages("MASS"))
|
||||
(if (!require("MASS", quietly = TRUE)) install.packages("MASS"))
|
||||
library("MASS") # to use the Boston dataset
|
||||
if (!require("ggplot2", quietly = TRUE)) install.packages("ggplot2")
|
||||
library("ggplot2") # used for plotting
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# fit a model and check model performance
|
||||
|
@ -26,7 +29,7 @@ library("MASS") # to use the Boston dataset
|
|||
# check the data
|
||||
head(Boston)
|
||||
ggplot(Boston, aes(x=medv)) +
|
||||
geom_histogram(binwidth=5) +
|
||||
geom_histogram(binwidth=2) +
|
||||
ggtitle("Histogram of Response Variable")
|
||||
|
||||
# fit a model using medv as response and others as predictors
|
||||
|
|
|
@ -25,10 +25,7 @@ if (!RRE)
|
|||
}
|
||||
|
||||
# install a package if it's not already installed
|
||||
if (!require("ggplot2", quietly = TRUE))
|
||||
install.packages("ggplot2")
|
||||
|
||||
# load libraries
|
||||
if (!require("ggplot2", quietly = TRUE)) install.packages("ggplot2")
|
||||
library("ggplot2") # used for plotting
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
|
|
@ -25,8 +25,7 @@ if (!RRE)
|
|||
}
|
||||
|
||||
# install a package if it's not already installed
|
||||
if (!require("ggplot2", quietly = TRUE))
|
||||
install.packages("ggplot2")
|
||||
if (!require("ggplot2", quietly = TRUE)) install.packages("ggplot2")
|
||||
|
||||
# load packages
|
||||
library("MASS") # to use the mvrnorm function
|
||||
|
@ -63,7 +62,7 @@ ggplot(group_all, aes(x = V1, y = V2)) +
|
|||
xlim(-5, 5) + ylim(-5, 5) +
|
||||
geom_hline(yintercept = 0) +
|
||||
geom_vline(xintercept = 0) +
|
||||
ggtitle("Simulated data in two overlapping groups")
|
||||
ggtitle("Simulated Data in Two Overlapping Groups")
|
||||
|
||||
# assign data
|
||||
mydata <- group_all[, 1:2]
|
||||
|
@ -88,7 +87,7 @@ ggplot(mydata_clusters, aes(x = V1, y = V2)) +
|
|||
xlim(-5, 5) + ylim(-5, 5) +
|
||||
geom_hline(yintercept = 0) +
|
||||
geom_vline(xintercept = 0) +
|
||||
ggtitle("Clusters found by kmeans()")
|
||||
ggtitle("Clusters Found by kmeans()")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# cluster analysis with rxKmeans(), it works on MRS only
|
||||
|
@ -128,7 +127,7 @@ if (RRE){
|
|||
xlim(-5, 5) + ylim(-5, 5) +
|
||||
geom_hline(yintercept = 0) +
|
||||
geom_vline(xintercept = 0) +
|
||||
ggtitle("Clusters found by rxKmeans()")
|
||||
ggtitle("Clusters Found by rxKmeans()")
|
||||
|
||||
} else{
|
||||
print("rxKmeans was not run becauase the RevoScaleR package is not available")
|
||||
|
|
|
@ -25,8 +25,7 @@ if (RRE)
|
|||
}
|
||||
|
||||
# install a package if it's not already installed
|
||||
if (!require("ggplot2", quietly = TRUE))
|
||||
install.packages("ggplot2")
|
||||
if (!require("ggplot2", quietly = TRUE)) install.packages("ggplot2")
|
||||
|
||||
# load packages
|
||||
library("MASS") # to use the mvrnorm function
|
||||
|
@ -63,14 +62,14 @@ group_all <- rbind(group_a, group_b)
|
|||
nclusters <- 2
|
||||
|
||||
# plot sample data
|
||||
plot_data <- group_all[sample(nrow(group_all), 1000),]
|
||||
plot_data <- group_all[sample(2 * nsamples, min(1000, 2 * nsamples)),]
|
||||
ggplot(plot_data, aes(x = V1, y = V2)) +
|
||||
geom_point(aes(colour = group)) +
|
||||
geom_point(data = data.frame(V1 = c(-1, 1), V2 = c(-1, 1)), size = 5) +
|
||||
xlim(-5, 5) + ylim(-5, 5) +
|
||||
geom_hline(yintercept = 0) +
|
||||
geom_vline(xintercept = 0) +
|
||||
ggtitle("Simulated data in two overlapping groups")
|
||||
ggtitle("Simulated Data in Two Overlapping Groups")
|
||||
|
||||
# save data
|
||||
mydata = group_all[, 1:2]
|
||||
|
|
|
@ -51,3 +51,5 @@ k <- round(m / 2)
|
|||
A <- data.frame(A, fac = sample(LETTERS[1:g], m, replace = TRUE))
|
||||
train <- sample(1:m, k)
|
||||
system.time(L <- lda(fac ~ ., data = A, prior = rep(1, g) / g, subset = train))
|
||||
|
||||
message("Save the time and run the code on R, MRO and MRS to compare speed.")
|
|
@ -38,7 +38,12 @@ group_all <- rbind(group_a, group_b)
|
|||
nclusters <- 2
|
||||
|
||||
mydata = group_all[, 1:2]
|
||||
|
||||
message("It might take a while for this to finish if nsamples is large.")
|
||||
# K-Means Cluster Analysis
|
||||
system_time_r <- system.time(fit <- kmeans(mydata, nclusters,
|
||||
iter.max = 1000,
|
||||
algorithm = "Lloyd"))
|
||||
system_time_r
|
||||
|
||||
message("Save the time and run the code on R, MRO and MRS to compare speed.")
|
|
@ -25,8 +25,7 @@ if (!RRE)
|
|||
}
|
||||
|
||||
# install a package if it's not already installed
|
||||
if (!require("ggplot2", quietly = TRUE))
|
||||
install.packages("ggplot2")
|
||||
if (!require("ggplot2", quietly = TRUE)) install.packages("ggplot2")
|
||||
|
||||
# load libraries
|
||||
library("MASS") # to use the mvrnorm function
|
||||
|
@ -54,6 +53,9 @@ simulCluster <- function(nsamples, mean, dimension, group)
|
|||
z
|
||||
}
|
||||
|
||||
message("It might take a while for this to finish if any of the elements in ",
|
||||
"nsamples_list is large.")
|
||||
|
||||
for (nsamples in nsamples_list)
|
||||
{
|
||||
# simulate data and append
|
||||
|
@ -72,7 +74,8 @@ for (nsamples in nsamples_list)
|
|||
# kmeans with MRS
|
||||
|
||||
if (RRE){
|
||||
system_time_rre <- system.time(clust <- rxKmeans( ~ V1 + V2, data = mydata,
|
||||
system_time_rre <- system.time(clust <- rxKmeans( ~ V1 + V2,
|
||||
data = mydata,
|
||||
numClusters = nclusters,
|
||||
algorithm = "lloyd"))
|
||||
}
|
||||
|
@ -99,10 +102,12 @@ if (RRE){
|
|||
geom_point(aes(y = time_rre, colour = "rxKmeans")) +
|
||||
geom_line(aes(y = time_rre, colour = "rxKmeans")) +
|
||||
scale_x_continuous(breaks = seq(2, 8, by = 1)) +
|
||||
scale_colour_manual("Function", values = c(kmeans = "red", rxKmeans = "blue")) +
|
||||
scale_colour_manual("Function",
|
||||
values = c(kmeans = "red", rxKmeans = "blue")) +
|
||||
xlab("log10(number of samples)") +
|
||||
ylab("time in seconds") +
|
||||
ggtitle("If data fits in memory, kmeans() and rxKmeans() are equally performant")
|
||||
ggtitle(paste("If data fits in memory,",
|
||||
"kmeans() and rxKmeans() are equally performant"))
|
||||
} else {
|
||||
ggplot(data = mydata, aes(x = nsamples_log)) +
|
||||
geom_point(aes(y = time_r, colour = "kmeans")) +
|
||||
|
@ -111,5 +116,6 @@ if (RRE){
|
|||
scale_colour_manual("Function", values = c(kmeans = "red")) +
|
||||
xlab("log10(number of samples)") +
|
||||
ylab("time in seconds") +
|
||||
ggtitle("Time for kmeans. To add time for rxKmean, use the RRE engine")
|
||||
ggtitle(paste("Time for kmeans \n",
|
||||
"To add time for rxKmeans, use the R Server engine"))
|
||||
}
|
Загрузка…
Ссылка в новой задаче