Merge branch 'master' of github.com:seattleflu/incidence-mapper into no-model-duplicates

This commit is contained in:
Clinton Collins 2019-07-16 15:30:15 -07:00
Родитель 204a9faa88 8cd2773cb9
Коммит f6fb724ed9
21 изменённых файлов: 808 добавлений и 16 удалений

Просмотреть файл

@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand
export(addCensusData)
export(expandDB)
export(masterSpatialDB)
export(selectFromDB)
@ -11,6 +12,7 @@ import(jsonlite)
import(lubridate)
import(rgdal)
import(sf)
import(tidycensus)
importFrom(RCurl,getURL)
importFrom(lazyeval,interp)
importFrom(magrittr,"%>%")

75
dbViewR/R/addCensusData.R Normal file
Просмотреть файл

@ -0,0 +1,75 @@
#' addCensusData: function for fetching census data and merging it with input dataset
#'
#' @param db tibble with valid column names for INLA model
#' @param geography geography of your data, default value - 'tract', for other options check here
#' @param variables character string or vector of character strings of variable IDs.
#' @param year the year for which you are requesting data
#' @param state the state for which you are requesting data, default value - 'WA'
#' @param county the county for which you are requesting data, default value - 'King'
#' @param source source database, one of: 'acs' for five-year American Community Survey or 'decennial' for decennial Census
#' @param credentials_path path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html
#' @return db tibble with flu and census data
#'
#' @import tidycensus
#' @import dplyr
#' @importFrom magrittr %>%
#'
#' @export
#' @examples
#'
addCensusData <- function( db = dbViewR::selectFromDB(),
geography = "tract", variables, year,
state = "WA", county = "King",
source = "decennial",
credentials_path = '/home/rstudio/seattle_flu')
{
#get the census api key value from the file
keyFile <- file(file.path(credentials_path, "census_api_key.txt"), open = 'r')
keyValue <- readLines(keyFile, n = 1)
close(keyFile)
Sys.setenv(CENSUS_API_KEY = keyValue)
if(source == "decennial")
{
dataset <- "sf1"
}
else if(source =="acs")
{
dataset <- "acs5"
}
else {
print('unknown source of census data!')
return(db)
}
varNames <- tidycensus::load_variables(year, dataset, cache = TRUE)
varToQuery <- intersect(variables, varNames$name)
if (length(variables)<length(varToQuery))
print('some requested variables are not presented in census data')
if(source == "decennial")
{
censusData <- tidycensus::get_decennial(geography = geography, state = state, county = county,
variables = varToQuery, year = year)
for (var in varToQuery)
{
censusVar <- censusData %>% dplyr::filter(variable == var) %>% dplyr::select(GEOID, !!quo_name(paste(var, year, sep = ".")) := value)
db$observedData <- dplyr::left_join(db$observedData, censusVar, by = c("residence_census_tract"="GEOID"))
}
}
else if(source =="acs")
{
censusData <- tidycensus::get_acs(geography = geography, state = state, county = county,
variables = varToQuery, year = year)
for (var in varToQuery)
{
censusVar <- censusData %>% dplyr::filter(variable == var) %>% dplyr::select(GEOID, !!quo_name(paste(var, year, sep = ".")) := estimate)
db$observedData <- dplyr::left_join(db$observedData, censusVar, by = c("residence_census_tract"="GEOID"))
}
}
return(db)
}

56
dbViewR/R/loadBingData.R Normal file
Просмотреть файл

@ -0,0 +1,56 @@
#' addCensusData: function for loading bing search queries and categorizing which census tract it's in
#'
#' @param geography geography to bin the bing queries, default value - 'tract'
#' @param bing_queries_path path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html
#' @return db dataframe with number of bing queries per tract
#'
#' @import rgeos
#' @import sp
#' @import rgdal
#' @import dplyr
#'
#' @export
#' @examples
#'
loadBingData <- function( geography = "tract",
bing_queries_path = '/home/rstudio/seattle_flu')
{
bing_data <- read.csv(file.path(bing_queries_path, "Bing_queries.csv"), header=FALSE)
#edit which columns are which
bing_queries_latlong <- data.frame(bing_data)
coordinates(bing_queries_latlong)<- ~V3 + V2
#this url is broken
#url <-'https://github.com/seattleflu/simulated-data/blob/master/kingCountySpatialData/2016_CensusTracts_KingCountyWa.zip'
#for now download from original source
url <-'https://www.seattle.gov/Documents/Departments/OPCD/Demographics/GeographicFilesandMaps/KingCountyTractsShapefiles.zip'
file <- basename(url)
download.file(url, file)
unzip(file, exdir="KingCountyTractsShapefiles")
#read in shapefile
if (geography == "tract"){
kc_shp <- readOGR(dsn='KingCountyTractsShapefiles', layer = "kc_tract_10")
}
#reproject bing queries onto king county shape file coordinate system
proj4string(bing_queries_latlong) <- CRS("+proj=longlat")
bing_queries_latlong <- spTransform(bing_queries_latlong, proj4string(kc_shp))
#get the census tracts for each of the bing queries
bing_queries_censustracts <- over(bing_queries_latlong, kc_shp)
#now join with the input data
#for some reason there are 398 tracts in the king county shapefile and only 397 in mike's shapefile
bing_queries_pertract <- as.data.frame(table(bing_queries_censustracts$GEOID10))
#rename columns
bing_queries_pertract$Var1 <- as.character(bing_queries_pertract$Var1)
bing_queries_pertract <- rename(bing_queries_pertract, num_bing_queries = Freq)
bing_queries_pertract <- rename(bing_queries_pertract, GEOID = Var1)
return(bing_queries_pertract)
}

65
dbViewR/get_census_data.R Normal file
Просмотреть файл

@ -0,0 +1,65 @@
library("tidycensus")
library("tigris")
library("sf")
library("mapview")
#library(totalcensus) #downloads the entire survey data
#download the 2015 ACS 5-year survey data, which is about 50 GB.
#download_census("acs5year", 2015)
options(tigris_class = "sf")
options(tigris_use_cache = TRUE)
#census api guide https://www.census.gov/data/developers/guidance/api-user-guide.html
#get your own census api key here: https://api.census.gov/data/key_signup.html
#save the key to a txt file named census_api_key.txt
#enter your own user path here
user_path = "C:/Users/grhuynh"
#get the census api key value from the file
census_api_key_value<-read.table(file.path(user_path, "census_api_key.txt"), header=FALSE) #read in file
census_api_key_value<- as.character(census_api_key_value$V1)
#read in the value into the tidycensus library
census_api_key(census_api_key_value)
#read in the shape file for the desired geography, in this case tracts of king county
state_name = "WA"
county_name = "King"
county_tracts <-tracts(state_name, county_name)
#to see all variables
#v17 <- load_variables(2010, "sf1", cache = TRUE) #2010 decennial census
#v17_acs <- load_variables(2017, "acs5", cache = TRUE) # acs census
#view(v17_acs)
#get the total population, P001001 in decennial census
#if need multiple states alternative syntax here https://github.com/walkerke/tidycensus/issues/121
#if need multiple variable syntax here https://github.com/walkerke/tidycensus/issues/129
pop <- get_decennial(geography = "tract", state = "WA", county = county_name,
variables = "P001001", year = 2010, geometry = TRUE)
#also can get population data from acs
pop_acs <- get_acs(geography = "tract", state = state_name, county = county_name,
variables = "B01003_001", year = 2017, geometry = TRUE)
#plot population graph
pop %>%
ggplot(aes(fill = value)) +
geom_sf(color = NA) +
coord_sf(crs = 26911) +
scale_fill_viridis_c(option = "magma")
pop_acs %>%
ggplot(aes(fill = estimate)) +
geom_sf(color = NA) +
coord_sf(crs = 26911) +
scale_fill_viridis_c(option = "magma")

Просмотреть файл

@ -0,0 +1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/addCensusData.R
\name{addCensusData}
\alias{addCensusData}
\title{addCensusData: function for fetching census data and merging it with input dataset}
\usage{
addCensusData(db = dbViewR::selectFromDB(), geography = "tract",
variables, year, state = "WA", county = "King",
source = "decennial", credentials_path = "/home/rstudio/seattle_flu")
}
\arguments{
\item{db}{tibble with valid column names for INLA model}
\item{geography}{geography of your data, default value - 'tract', for other options check here}
\item{variables}{character string or vector of character strings of variable IDs.}
\item{year}{the year for which you are requesting data}
\item{state}{the state for which you are requesting data, default value - 'WA'}
\item{county}{the county for which you are requesting data, default value - 'King'}
\item{source}{source database, one of: 'acs' for five-year American Community Survey or 'decennial' for decennial Census}
\item{credentials_path}{path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html}
}
\value{
db tibble with flu and census data
}
\description{
addCensusData: function for fetching census data and merging it with input dataset
}
\examples{
}

Просмотреть файл

@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/loadBingData.R
\name{loadBingData}
\alias{loadBingData}
\title{addCensusData: function for loading bing search queries and categorizing which census tract it's in}
\usage{
loadBingData(geography = "tract",
bing_queries_path = "/home/rstudio/seattle_flu")
}
\arguments{
\item{geography}{geography to bin the bing queries, default value - 'tract'}
\item{bing_queries_path}{path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html}
}
\value{
db dataframe with number of bing queries per tract
}
\description{
addCensusData: function for loading bing search queries and categorizing which census tract it's in
}
\examples{
}

Просмотреть файл

@ -2,6 +2,7 @@
export(appendCatchmentModel)
export(constructAdjacencyNetwork)
export(effectsModel)
export(latentFieldModel)
export(modelTrainR)
export(smoothModel)

Просмотреть файл

@ -1 +1,117 @@
#' effectsModel: function to define regression models to study effects of covariates from dbViewR object
#GHH copy from latentFieldModel
#' latentFieldModel: function to define latent field models from dbViewR object
#'
#' @param db dbViewR object with valid column names for INLA model.
#' Smoothing only makes sense by age, location, and time. Factor variables cannot be smoothed!
#' @param shp sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)
#' @param family non-standard family override (default = NULL).
#' @param neighborGraph non-standard neighbor graph (default = NULL)
#'
#' @return modelDefinition object for modelTrainR, list with fields
#' type = latentField
#' family : as input
#' formula : model definition for INLA
#' inputData : inla-prepared db$observed data
#' neighborGraph : as input or derived from shp during formula construction
#'
#' @import INLA
#'
#' @export
#' @examples
#' return h1n1pdm incidence model by time and location
#' modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
#'
effectsModel <- function(db , shp, family = NULL, neighborGraph = NULL){
#INLA data frame that may get augmented columns we don't need to see when we're done
inputData <- db$observedData
# identify intended family
if(is.null(family)){
if (all(inputData$n == inputData$positive)){
family = 'poisson'
} else if (any(inputData$n > inputData$positive)){
family = 'binomial'
} else if (any(inputData$n < inputData$positive)){
return('n < positive !!! invald db$observedData.')
}
}
# construct priors
hyper=list()
hyper$global <- list(prec = list( prior = "pc.prec", param = 1/10, alpha = 0.01))
hyper$local <- list(prec = list( prior = "pc.prec", param = 1/200, alpha = 0.01))
hyper$age <- list(prec = list( prior = "pc.prec", param = 1, alpha = 0.01))
hyper$time <- list(prec = list( prior = "pc.prec", param = 1/50, alpha = 0.01))
# unlike smoothing model, we only replicate latent fields across pathogens, but treat all other factors as fixed effects
# find pathogen types
if('pathogen' %in% names(db$observedData)){
levelSet <- levels(as.factor(inputData$pathogen))
numLevels <- length(levelSet)
validLatentFieldColumns <- c('pathogen')
} else {
return('error! must provide "pathogen" column.')
}
# set family across all levels
family <- rep(family,numLevels)
# build outcome matrix and replicate list for multiple likelihoods
outcome <- matrix(NA,nrow(inputData),numLevels)
replicateIdx <- matrix(NA,nrow(inputData),1)
for( k in levelSet){
idx <- inputData$pathogen %in% k
count <- which(levelSet %in% k)
outcome[idx, count] <- inputData$positive[idx]
replicateIdx[idx]<-count
}
# initialize formula for each level
if(numLevels>1){
outcomeStr <- paste('cbind(',paste(paste('outcome',1:numLevels,sep='.'),sep='',collapse=', '),')',sep='',collapse = '')
formula <- as.formula(paste(outcomeStr,'~','pathogen - 1 + catchment',sep=' '))
} else { # why does R do inconsistent stuff with column names!?!!
#formula <- as.formula('outcome ~ 1 + catchment')
formula <- as.formula('outcome ~ 1')
}
# factors as fixed effects, assuming no interaction terms
validFactorNames <- names(db$observedData)[ !( (names(db$observedData) %in% c('pathogen','n','positive')) |
grepl('row',names(db$observedData)) |
grepl('age',names(db$observedData)) |
grepl('residence_',names(db$observedData)) |
grepl('work_',names(db$observedData)) |
grepl('encounter',names(db$observedData)) )]
factorIdx <- names(db$observedData) %in% validFactorNames
for(COLUMN in names(db$observedData)[factorIdx]){
formula <- as.formula(paste(as.character(formula)[2],'~',paste(as.character(formula)[3],COLUMN,sep='+')))
}
df <- data.frame(outcome = outcome, inputData, replicateIdx)
if(any(grepl('residence', names(inputData)) | grepl('work', names(inputData)))){
spatial_domain<-shp$domain[1]
} else {
spatial_domain <- NULL
}
modelDefinition <- list(type='effects', family = family, formula = formula, lincomb = c(),
inputData = df, neighborGraph=neighborGraph, hyper=hyper,
observedData = db$observedData,
queryList = db$queryList,
spatial_domain = spatial_domain)
return(modelDefinition)
}

Просмотреть файл

@ -39,6 +39,12 @@ modelTrainR <- function(modelDefinition){
inla = model, modelDefinition = modelDefinition))
} else if (modelDefinition$type == 'effects'){
#same as smooth data for now?
modeledData <- appendSmoothData(model,modelDefinition)
# return output data
return(list(modeledData = modeledData, inla = model, modelDefinition = modelDefinition))
}

Просмотреть файл

@ -0,0 +1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/effectsModel.R
\name{effectsModel}
\alias{effectsModel}
\title{effectsModel: function to define regression models to study effects of covariates from dbViewR object
latentFieldModel: function to define latent field models from dbViewR object}
\usage{
effectsModel(db, shp, family = NULL, neighborGraph = NULL)
}
\arguments{
\item{db}{dbViewR object with valid column names for INLA model.
Smoothing only makes sense by age, location, and time. Factor variables cannot be smoothed!}
\item{shp}{sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)}
\item{family}{non-standard family override (default = NULL).}
\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
}
\value{
modelDefinition object for modelTrainR, list with fields
type = latentField
family : as input
formula : model definition for INLA
inputData : inla-prepared db$observed data
neighborGraph : as input or derived from shp during formula construction
}
\description{
effectsModel: function to define regression models to study effects of covariates from dbViewR object
latentFieldModel: function to define latent field models from dbViewR object
}
\examples{
return h1n1pdm incidence model by time and location
modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
}

Просмотреть файл

@ -1,9 +1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/smoothModel.R
% Please edit documentation in R/smoothModel - Copy.R, R/smoothModel.R
\name{smoothModel}
\alias{smoothModel}
\title{smoothModel: function to define variable smoothing models from dbViewR object}
\usage{
smoothModel(db, shp, family = NULL, neighborGraph = NULL)
smoothModel(db, shp, family = NULL, neighborGraph = NULL)
}
\arguments{
@ -14,9 +16,25 @@ Smoothing only makes sense by age, location, and time. Factor variables cannot
\item{family}{non-standard family override (default = NULL).}
\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
\item{db}{dbViewR object with valid column names for INLA model.
Smoothing only makes sense by age, location, and time. Factor variables cannot be smoothed!}
\item{shp}{sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)}
\item{family}{non-standard family override (default = NULL).}
\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
}
\value{
modelDefinition object for modelTrainR, list with fields
type = smooth
family : as input
formula : model definition for INLA
inputData : inla-prepared db$observed data
neighborGraph : as input or derived from shp during formula construction
modelDefinition object for modelTrainR, list with fields
type = smooth
family : as input
@ -25,10 +43,15 @@ modelDefinition object for modelTrainR, list with fields
neighborGraph : as input or derived from shp during formula construction
}
\description{
smoothModel: function to define variable smoothing models from dbViewR object
smoothModel: function to define variable smoothing models from dbViewR object
}
\examples{
return h1n1pdm incidence model by time and location
modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
return h1n1pdm incidence model by time and location
modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
}

Просмотреть файл

@ -1,16 +1,5 @@
# Generated by roxygen2: do not edit by hand
export(getHumanReadableModelIdFromModel)
export(getHumanReadableModelIdFromQuery)
export(getModelIdFromModel)
export(getModelIdFromQuery)
export(getModelQueryObjectFromModel)
export(getModelQueryObjectFromQuery)
export(loadModelFileById)
export(queryLoadedModel)
export(queryModelById)
export(returnModel)
export(saveModel)
import(digest)
import(jsonlite)
import(logging)

Просмотреть файл

@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand
export(ggplotSmoothEffects)
export(ggplotSmoothMap)
export(ggplotSmoothSequential)
import(dplyr)

Просмотреть файл

@ -0,0 +1,42 @@
#' ggplotDiagnosticPlots: function for diagnostic plots for INLA model
#'
#' @param model Model object from incidenceMapR::modelTrainR
#' @param observed The observed values. If NULL defined by model family
#' @param CI Add credible intervals to the fitted values?
#' @param binwidth The size of the bins used for the histogram. If NULL ggplot guesses for you.
#' @param se Plot a ribbon showing the standard error of the smoother.
#' @param method What method should be used for the smoother. Defaults to loess unless data is large. Other options include 'gam', 'loess', 'lm'. See geom_smooth for details.
#' @return List of 3 ggplot objects: default plots, observed vs fitted and histogram, standardized residuals
#'
#' @import ggplot2
#' @import INLAutils
#'
#' @export
#' @examples
#' ggplotDiagnosticPlots(model)
#'
ggplotDiagnosticPlots <- function(model, observed = NULL, CI = FALSE, binwidth = NULL, se = TRUE, method = "auto"){
plots <- list()
# for default plots (marginals, )
plots[[1]] <- autoplot(model$inla)
if (is.null(observed))
{
if (model$modelDefinition$family == 'poisson')
observed <- model$modelDefinition$observedData$positive
else
return('observed is missing and can not be defined from model family!!! provide observed data.')
}
# for observed vs fitted and histogram plot
plots[[2]] <- ggplot_inla_residuals(model$inla, observed, CI, binwidth)
# for Standardized Residuals
plots[[3]] <- ggplot_inla_residuals2(model$inla, observed, CI, se, method)
print(plots[[3]])
return(invisible(plots))
}

Просмотреть файл

@ -16,12 +16,18 @@
#' plotDat$positive[plotDat$positive==0]<-NaN
#' ggplotSmoothMap(plotDat,shp)
#'
ggplotSmoothMap <- function(model, shp, title='', shape_level = 'residence_census_tract'){
ggplotSmoothMap <- function(model, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
plotDat <- right_join(model$modeledData,shp, by=shape_level)
plotDat$positive[plotDat$n==0]<-NaN
#convert to fraction
if(!is.null(censusdb)){
plotDat$positive <- plotDat$positive / censusdb$observedData$P005004.2010
plotDat$modeled_count_mode <- plotDat$modeled_count_mode / censusdb$observedData$P005004.2010
}
bbox<-sf::st_bbox(shp$geometry)
mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
@ -35,6 +41,7 @@ p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
colorLimits<-c(min(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE),max(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE))
colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
p1 <- p + geom_sf(data=plotDat,size=0, aes(fill=positive)) +
guides(fill=guide_legend(title="observed")) +
@ -47,6 +54,127 @@ p2 <- p + geom_sf(data=plotDat,size=0, aes(fill=modeled_count_mode)) +
grid.arrange(p1,p2,nrow=1, top=textGrob(title))
}
#' ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other
#'
#' @param plotDatEffects data.frame that joins dbViewR::modeledData
#' @return ggplot object
#'
#' @import ggplot2
#' @import gridExtra
#' @import viridis
#'
#' @export
#' @examples
#'
#'
ggplotFixedEffects <- function(model_effects, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
plotDatEffects <- right_join(model_effects$modeledData,shp, by=shape_level)
plotDatEffects$positive[plotDatEffects$n==0]<-NaN
#convert to fraction
if(!is.null(censusdb)){
plotDatEffects$positive <- plotDatEffects$positive / censusdb$observedData$P005004.2010
plotDatEffects$modeled_count_mode <- plotDatEffects$modeled_count_mode / censusdb$observedData$P005004.2010
}
bbox<-sf::st_bbox(shp$geometry)
mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
theme_bw() +
theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
# mapSettingsTight <- ggplot() + xlim(c(122.5,122.15)) + ylim(c(47.42,47.76)) + theme_bw() +
# theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
# p<-mapSettingsTight + geom_sf(data=shp,size=0.1,aes(fill=NaN))
colorLimits<-c(min(c(plotDatEffects$positive,plotDatEffects$modeled_count_mode),na.rm=TRUE),max(c(plotDatEffects$positive,plotDatEffects$modeled_count_mode),na.rm=TRUE))
colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
p1 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=num_bing_queries)) +
guides(fill=guide_legend(title="num_bing_queries")) +
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
p2 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=modeled_count_mode)) +
guides(fill=guide_legend(title="Expected")) +
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
grid.arrange(p1,p2,nrow=1, top=textGrob(title))
}
#' ggplotSmoothEffects: function for plotting smoothed model and fixed effects model next to each other
#'
#' @param plotDat data.frame that joins dbViewR::modeledData
#' @return ggplot object
#'
#' @import ggplot2
#' @import gridExtra
#' @import viridis
#'
#' @export
#' @examples
#'
#'
ggplotSmoothEffects <- function(model_smooth, model_effects, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
plotDat <- right_join(model_smooth$modeledData,shp, by=shape_level)
plotDat$positive[plotDat$n==0]<-NaN
plotDatEffects <- right_join(model_effects$modeledData,shp, by=shape_level)
plotDatEffects$positive[plotDatEffects$n==0]<-NaN
#convert to fraction
if(!is.null(censusdb)){
plotDat$positive <- plotDat$positive / censusdb$observedData$P005004.2010
plotDat$modeled_count_mode <- plotDat$modeled_count_mode / censusdb$observedData$P005004.2010
plotDatEffects$positive <- plotDatEffects$positive / censusdb$observedData$P005004.2010
plotDatEffects$modeled_count_mode <- plotDatEffects$modeled_count_mode / censusdb$observedData$P005004.2010
}
bbox<-sf::st_bbox(shp$geometry)
mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
theme_bw() +
theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
# mapSettingsTight <- ggplot() + xlim(c(122.5,122.15)) + ylim(c(47.42,47.76)) + theme_bw() +
# theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
# p<-mapSettingsTight + geom_sf(data=shp,size=0.1,aes(fill=NaN))
colorLimits<-c(min(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE),max(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE))
colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
p1 <- p + geom_sf(data=plotDat,size=0, aes(fill=positive)) +
guides(fill=guide_legend(title="observed")) +
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
p2 <- p + geom_sf(data=plotDat,size=0, aes(fill=modeled_count_mode)) +
guides(fill=guide_legend(title="expected")) +
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
p3 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=num_bing_queries)) +
guides(fill=guide_legend(title="num_bing_queries")) +
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
p4 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=modeled_count_mode)) +
guides(fill=guide_legend(title="Expected")) +
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
grid.arrange(p1,p2,p3,p4,nrow=2, top=textGrob(title))
}
#' ggplotSmoothSequential: function for plotting data and smoothed model next to each other as sequential variable (timeseries, age)
#'
#' NOT YET IMPLEMENTED!

Просмотреть файл

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggplotDiagnosticPlots.R
\name{ggplotDiagnosticPlots}
\alias{ggplotDiagnosticPlots}
\title{ggplotDiagnosticPlots: function for diagnostic plots for INLA model}
\usage{
ggplotDiagnosticPlots(model, observed = NULL, CI = FALSE,
binwidth = NULL, se = TRUE, method = "auto")
}
\arguments{
\item{model}{Model object from incidenceMapR::modelTrainR}
\item{observed}{The observed values. If NULL defined by model family}
\item{CI}{Add credible intervals to the fitted values?}
\item{binwidth}{The size of the bins used for the histogram. If NULL ggplot guesses for you.}
\item{se}{Plot a ribbon showing the standard error of the smoother.}
\item{method}{What method should be used for the smoother. Defaults to loess unless data is large. Other options include 'gam', 'loess', 'lm'. See geom_smooth for details.}
}
\value{
List of 3 ggplot objects: default plots, observed vs fitted and histogram, standardized residuals
}
\description{
ggplotDiagnosticPlots: function for diagnostic plots for INLA model
}
\examples{
ggplotDiagnosticPlots(model)
}

Просмотреть файл

@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggplotWrappers.R
\name{ggplotFixedEffects}
\alias{ggplotFixedEffects}
\title{ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other}
\usage{
ggplotFixedEffects(model_effects, shp, censusdb = NULL, title = "",
shape_level = "residence_census_tract")
}
\arguments{
\item{plotDatEffects}{data.frame that joins dbViewR::modeledData}
}
\value{
ggplot object
}
\description{
ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other
}
\examples{
}

Просмотреть файл

@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggplotWrappers.R
\name{ggplotSmoothEffects}
\alias{ggplotSmoothEffects}
\title{ggplotSmoothEffects: function for plotting data, smoothed model, and fixed effects model next to each other}
\usage{
ggplotSmoothEffects(model_smooth, model_effects, shp, censusdb = NULL,
title = "", shape_level = "residence_census_tract")
}
\arguments{
\item{plotDat}{data.frame that joins dbViewR::modeledData}
}
\value{
ggplot object
}
\description{
NOT YET IMPLEMENTED!
}
\examples{
plotDat <- right_join(model$modeledData,shp, by=c('residence_census_tract'))
plotDat$positive[plotDat$positive==0]<-NaN
ggplotSmoothSequential(plotDat)
}

Просмотреть файл

@ -4,7 +4,7 @@
\alias{ggplotSmoothMap}
\title{ggplotSmoothMap: function for plotting data and smoothed model next to each other on map}
\usage{
ggplotSmoothMap(model, shp, title = "",
ggplotSmoothMap(model, shp, censusdb = NULL, title = "",
shape_level = "residence_census_tract")
}
\arguments{

Просмотреть файл

@ -0,0 +1,74 @@
# testModelTrainR
# script to test incidenceMapR package
library(dbViewR)
library(incidenceMapR)
library(modelTestR)
shp <- masterSpatialDB() # census-tract shapefiles
# ggplot build eventually will be replaced by function ggplotSmoothSequential
library(ggplot2)
plotSettings <- ggplot() + theme_bw() + theme(panel.border = element_blank()) + xlab('')
###################################
##### smoothing models ############
###################################
## simulated data
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
)
db <- expandDB( selectFromDB( queryIn ) )
#smooth model
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
ggplotSmoothMap(model,shp)
ggplotDiagnosticPlots(model) #plot marginals and residuals
####################################################
##### fixed effects models: bing queries ############
####################################################
## simulated data
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
)
db <- expandDB( selectFromDB( queryIn ) )
#get the number of bing queries per tract
bing_queries_pertract <- loadBingData(geography = "tract",
bing_queries_path = 'C:/Users/grhuynh/FluMapModel')
#or add bing queries as random numbers from 0 to max positives
#searchdb$observedData$num_bing_queries <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights
#or add bing queries as the same values as positive
#searchdb$observedData$num_bing_queries <- searchdb$observedData$positive
#join to the observed data
searchdb <- db
library(dplyr)
searchdb$observedData <- inner_join(searchdb$observedData, bing_queries_pertract, by = c("residence_census_tract" = "GEOID"))
#run the fixed effects model
searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
modelDefinition <- effectsModel(db=searchdb, shp=shp)
model_fixedeffects <- modelTrainR(modelDefinition)
#plot just the fixed effects model
ggplotFixedEffects(model_fixedeffects,shp)
ggplotDiagnosticPlots(model_fixedeffects) #the residuals plot doesn't work
####################################################
##### compare smooth and fixed effects models ############
####################################################
#run all above code
ggplotSmoothEffects(model, model_fixedeffects,shp)

Просмотреть файл

@ -28,9 +28,50 @@ db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
ggplotSmoothMap(model,shp)
#OR plot as fraction of census population in each tract
censusdb <- addCensusData(db = db, source = "decennial", variable = c("B01003_001", "B09001_001", "P005004"), year = 2010, credentials_path = 'C:/Users/grhuynh')
ggplotSmoothMap(model,shp, censusdb = censusdb)
## simulated data with bing query search data, run fixed effects model
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
)
db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
#ADD bing query search data and run fixed effects model
searchdb <- db
searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
#add bing queries as random numbers from 0 to max positives
#searchdb$observedData$query_weight <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights
#add bing queries as the same values as positive
searchdb$observedData$query_weight <- searchdb$observedData$positive
#run the model
modelDefinition <- effectsModel(db=searchdb, shp=shp)
model_search <- modelTrainR(modelDefinition)
#compare values just for looking at the table
temp_search <- select(model_search$modeledData, residence_census_tract, query_weight, positive, modeled_count_mode)
temp_smooth <- select(model$modeledData, residence_census_tract, positive, modeled_count_mode)
joined_smoothsame <- left_join(temp_smooth, temp_search, by = "residence_census_tract")
ggplotSmoothEffects(model, model_search,shp)
# simulated data at_home catchment map
queryIn <- list(
@ -43,7 +84,6 @@ db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
ggplotSmoothMap(model,shp)