Merge branch 'master' of github.com:seattleflu/incidence-mapper into no-model-duplicates
This commit is contained in:
Коммит
f6fb724ed9
|
@ -1,5 +1,6 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export(addCensusData)
|
||||
export(expandDB)
|
||||
export(masterSpatialDB)
|
||||
export(selectFromDB)
|
||||
|
@ -11,6 +12,7 @@ import(jsonlite)
|
|||
import(lubridate)
|
||||
import(rgdal)
|
||||
import(sf)
|
||||
import(tidycensus)
|
||||
importFrom(RCurl,getURL)
|
||||
importFrom(lazyeval,interp)
|
||||
importFrom(magrittr,"%>%")
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
#' addCensusData: function for fetching census data and merging it with input dataset
|
||||
#'
|
||||
#' @param db tibble with valid column names for INLA model
|
||||
#' @param geography geography of your data, default value - 'tract', for other options check here
|
||||
#' @param variables character string or vector of character strings of variable IDs.
|
||||
#' @param year the year for which you are requesting data
|
||||
#' @param state the state for which you are requesting data, default value - 'WA'
|
||||
#' @param county the county for which you are requesting data, default value - 'King'
|
||||
#' @param source source database, one of: 'acs' for five-year American Community Survey or 'decennial' for decennial Census
|
||||
#' @param credentials_path path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html
|
||||
#' @return db tibble with flu and census data
|
||||
#'
|
||||
#' @import tidycensus
|
||||
#' @import dplyr
|
||||
#' @importFrom magrittr %>%
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
addCensusData <- function( db = dbViewR::selectFromDB(),
|
||||
geography = "tract", variables, year,
|
||||
state = "WA", county = "King",
|
||||
source = "decennial",
|
||||
credentials_path = '/home/rstudio/seattle_flu')
|
||||
|
||||
{
|
||||
#get the census api key value from the file
|
||||
keyFile <- file(file.path(credentials_path, "census_api_key.txt"), open = 'r')
|
||||
keyValue <- readLines(keyFile, n = 1)
|
||||
close(keyFile)
|
||||
|
||||
Sys.setenv(CENSUS_API_KEY = keyValue)
|
||||
|
||||
if(source == "decennial")
|
||||
{
|
||||
dataset <- "sf1"
|
||||
}
|
||||
else if(source =="acs")
|
||||
{
|
||||
dataset <- "acs5"
|
||||
}
|
||||
else {
|
||||
print('unknown source of census data!')
|
||||
return(db)
|
||||
}
|
||||
|
||||
varNames <- tidycensus::load_variables(year, dataset, cache = TRUE)
|
||||
varToQuery <- intersect(variables, varNames$name)
|
||||
if (length(variables)<length(varToQuery))
|
||||
print('some requested variables are not presented in census data')
|
||||
|
||||
|
||||
if(source == "decennial")
|
||||
{
|
||||
censusData <- tidycensus::get_decennial(geography = geography, state = state, county = county,
|
||||
variables = varToQuery, year = year)
|
||||
for (var in varToQuery)
|
||||
{
|
||||
censusVar <- censusData %>% dplyr::filter(variable == var) %>% dplyr::select(GEOID, !!quo_name(paste(var, year, sep = ".")) := value)
|
||||
db$observedData <- dplyr::left_join(db$observedData, censusVar, by = c("residence_census_tract"="GEOID"))
|
||||
}
|
||||
}
|
||||
else if(source =="acs")
|
||||
{
|
||||
censusData <- tidycensus::get_acs(geography = geography, state = state, county = county,
|
||||
variables = varToQuery, year = year)
|
||||
for (var in varToQuery)
|
||||
{
|
||||
censusVar <- censusData %>% dplyr::filter(variable == var) %>% dplyr::select(GEOID, !!quo_name(paste(var, year, sep = ".")) := estimate)
|
||||
db$observedData <- dplyr::left_join(db$observedData, censusVar, by = c("residence_census_tract"="GEOID"))
|
||||
}
|
||||
}
|
||||
|
||||
return(db)
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
#' addCensusData: function for loading bing search queries and categorizing which census tract it's in
|
||||
#'
|
||||
#' @param geography geography to bin the bing queries, default value - 'tract'
|
||||
#' @param bing_queries_path path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html
|
||||
#' @return db dataframe with number of bing queries per tract
|
||||
#'
|
||||
#' @import rgeos
|
||||
#' @import sp
|
||||
#' @import rgdal
|
||||
#' @import dplyr
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
|
||||
loadBingData <- function( geography = "tract",
|
||||
bing_queries_path = '/home/rstudio/seattle_flu')
|
||||
|
||||
{
|
||||
|
||||
bing_data <- read.csv(file.path(bing_queries_path, "Bing_queries.csv"), header=FALSE)
|
||||
#edit which columns are which
|
||||
bing_queries_latlong <- data.frame(bing_data)
|
||||
coordinates(bing_queries_latlong)<- ~V3 + V2
|
||||
|
||||
#this url is broken
|
||||
#url <-'https://github.com/seattleflu/simulated-data/blob/master/kingCountySpatialData/2016_CensusTracts_KingCountyWa.zip'
|
||||
|
||||
#for now download from original source
|
||||
url <-'https://www.seattle.gov/Documents/Departments/OPCD/Demographics/GeographicFilesandMaps/KingCountyTractsShapefiles.zip'
|
||||
file <- basename(url)
|
||||
download.file(url, file)
|
||||
unzip(file, exdir="KingCountyTractsShapefiles")
|
||||
|
||||
|
||||
#read in shapefile
|
||||
if (geography == "tract"){
|
||||
kc_shp <- readOGR(dsn='KingCountyTractsShapefiles', layer = "kc_tract_10")
|
||||
}
|
||||
#reproject bing queries onto king county shape file coordinate system
|
||||
proj4string(bing_queries_latlong) <- CRS("+proj=longlat")
|
||||
bing_queries_latlong <- spTransform(bing_queries_latlong, proj4string(kc_shp))
|
||||
|
||||
#get the census tracts for each of the bing queries
|
||||
bing_queries_censustracts <- over(bing_queries_latlong, kc_shp)
|
||||
|
||||
#now join with the input data
|
||||
#for some reason there are 398 tracts in the king county shapefile and only 397 in mike's shapefile
|
||||
bing_queries_pertract <- as.data.frame(table(bing_queries_censustracts$GEOID10))
|
||||
#rename columns
|
||||
bing_queries_pertract$Var1 <- as.character(bing_queries_pertract$Var1)
|
||||
bing_queries_pertract <- rename(bing_queries_pertract, num_bing_queries = Freq)
|
||||
bing_queries_pertract <- rename(bing_queries_pertract, GEOID = Var1)
|
||||
|
||||
return(bing_queries_pertract)
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
|
||||
|
||||
|
||||
library("tidycensus")
|
||||
library("tigris")
|
||||
library("sf")
|
||||
library("mapview")
|
||||
#library(totalcensus) #downloads the entire survey data
|
||||
#download the 2015 ACS 5-year survey data, which is about 50 GB.
|
||||
#download_census("acs5year", 2015)
|
||||
|
||||
options(tigris_class = "sf")
|
||||
options(tigris_use_cache = TRUE)
|
||||
|
||||
#census api guide https://www.census.gov/data/developers/guidance/api-user-guide.html
|
||||
#get your own census api key here: https://api.census.gov/data/key_signup.html
|
||||
#save the key to a txt file named census_api_key.txt
|
||||
|
||||
#enter your own user path here
|
||||
user_path = "C:/Users/grhuynh"
|
||||
|
||||
#get the census api key value from the file
|
||||
census_api_key_value<-read.table(file.path(user_path, "census_api_key.txt"), header=FALSE) #read in file
|
||||
census_api_key_value<- as.character(census_api_key_value$V1)
|
||||
|
||||
#read in the value into the tidycensus library
|
||||
census_api_key(census_api_key_value)
|
||||
|
||||
|
||||
#read in the shape file for the desired geography, in this case tracts of king county
|
||||
state_name = "WA"
|
||||
county_name = "King"
|
||||
county_tracts <-tracts(state_name, county_name)
|
||||
|
||||
|
||||
#to see all variables
|
||||
#v17 <- load_variables(2010, "sf1", cache = TRUE) #2010 decennial census
|
||||
#v17_acs <- load_variables(2017, "acs5", cache = TRUE) # acs census
|
||||
#view(v17_acs)
|
||||
|
||||
|
||||
#get the total population, P001001 in decennial census
|
||||
#if need multiple states alternative syntax here https://github.com/walkerke/tidycensus/issues/121
|
||||
#if need multiple variable syntax here https://github.com/walkerke/tidycensus/issues/129
|
||||
pop <- get_decennial(geography = "tract", state = "WA", county = county_name,
|
||||
variables = "P001001", year = 2010, geometry = TRUE)
|
||||
|
||||
#also can get population data from acs
|
||||
pop_acs <- get_acs(geography = "tract", state = state_name, county = county_name,
|
||||
variables = "B01003_001", year = 2017, geometry = TRUE)
|
||||
|
||||
|
||||
#plot population graph
|
||||
pop %>%
|
||||
ggplot(aes(fill = value)) +
|
||||
geom_sf(color = NA) +
|
||||
coord_sf(crs = 26911) +
|
||||
scale_fill_viridis_c(option = "magma")
|
||||
|
||||
pop_acs %>%
|
||||
ggplot(aes(fill = estimate)) +
|
||||
geom_sf(color = NA) +
|
||||
coord_sf(crs = 26911) +
|
||||
scale_fill_viridis_c(option = "magma")
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/addCensusData.R
|
||||
\name{addCensusData}
|
||||
\alias{addCensusData}
|
||||
\title{addCensusData: function for fetching census data and merging it with input dataset}
|
||||
\usage{
|
||||
addCensusData(db = dbViewR::selectFromDB(), geography = "tract",
|
||||
variables, year, state = "WA", county = "King",
|
||||
source = "decennial", credentials_path = "/home/rstudio/seattle_flu")
|
||||
}
|
||||
\arguments{
|
||||
\item{db}{tibble with valid column names for INLA model}
|
||||
|
||||
\item{geography}{geography of your data, default value - 'tract', for other options check here}
|
||||
|
||||
\item{variables}{character string or vector of character strings of variable IDs.}
|
||||
|
||||
\item{year}{the year for which you are requesting data}
|
||||
|
||||
\item{state}{the state for which you are requesting data, default value - 'WA'}
|
||||
|
||||
\item{county}{the county for which you are requesting data, default value - 'King'}
|
||||
|
||||
\item{source}{source database, one of: 'acs' for five-year American Community Survey or 'decennial' for decennial Census}
|
||||
|
||||
\item{credentials_path}{path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html}
|
||||
}
|
||||
\value{
|
||||
db tibble with flu and census data
|
||||
}
|
||||
\description{
|
||||
addCensusData: function for fetching census data and merging it with input dataset
|
||||
}
|
||||
\examples{
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/loadBingData.R
|
||||
\name{loadBingData}
|
||||
\alias{loadBingData}
|
||||
\title{addCensusData: function for loading bing search queries and categorizing which census tract it's in}
|
||||
\usage{
|
||||
loadBingData(geography = "tract",
|
||||
bing_queries_path = "/home/rstudio/seattle_flu")
|
||||
}
|
||||
\arguments{
|
||||
\item{geography}{geography to bin the bing queries, default value - 'tract'}
|
||||
|
||||
\item{bing_queries_path}{path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html}
|
||||
}
|
||||
\value{
|
||||
db dataframe with number of bing queries per tract
|
||||
}
|
||||
\description{
|
||||
addCensusData: function for loading bing search queries and categorizing which census tract it's in
|
||||
}
|
||||
\examples{
|
||||
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
export(appendCatchmentModel)
|
||||
export(constructAdjacencyNetwork)
|
||||
export(effectsModel)
|
||||
export(latentFieldModel)
|
||||
export(modelTrainR)
|
||||
export(smoothModel)
|
||||
|
|
|
@ -1 +1,117 @@
|
|||
#' effectsModel: function to define regression models to study effects of covariates from dbViewR object
|
||||
|
||||
#GHH copy from latentFieldModel
|
||||
#' latentFieldModel: function to define latent field models from dbViewR object
|
||||
#'
|
||||
#' @param db dbViewR object with valid column names for INLA model.
|
||||
#' Smoothing only makes sense by age, location, and time. Factor variables cannot be smoothed!
|
||||
#' @param shp sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)
|
||||
#' @param family non-standard family override (default = NULL).
|
||||
#' @param neighborGraph non-standard neighbor graph (default = NULL)
|
||||
#'
|
||||
#' @return modelDefinition object for modelTrainR, list with fields
|
||||
#' type = latentField
|
||||
#' family : as input
|
||||
#' formula : model definition for INLA
|
||||
#' inputData : inla-prepared db$observed data
|
||||
#' neighborGraph : as input or derived from shp during formula construction
|
||||
#'
|
||||
#' @import INLA
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#' return h1n1pdm incidence model by time and location
|
||||
#' modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
|
||||
#'
|
||||
effectsModel <- function(db , shp, family = NULL, neighborGraph = NULL){
|
||||
|
||||
#INLA data frame that may get augmented columns we don't need to see when we're done
|
||||
inputData <- db$observedData
|
||||
|
||||
# identify intended family
|
||||
if(is.null(family)){
|
||||
if (all(inputData$n == inputData$positive)){
|
||||
family = 'poisson'
|
||||
} else if (any(inputData$n > inputData$positive)){
|
||||
family = 'binomial'
|
||||
} else if (any(inputData$n < inputData$positive)){
|
||||
return('n < positive !!! invald db$observedData.')
|
||||
}
|
||||
}
|
||||
|
||||
# construct priors
|
||||
hyper=list()
|
||||
hyper$global <- list(prec = list( prior = "pc.prec", param = 1/10, alpha = 0.01))
|
||||
hyper$local <- list(prec = list( prior = "pc.prec", param = 1/200, alpha = 0.01))
|
||||
hyper$age <- list(prec = list( prior = "pc.prec", param = 1, alpha = 0.01))
|
||||
hyper$time <- list(prec = list( prior = "pc.prec", param = 1/50, alpha = 0.01))
|
||||
|
||||
|
||||
# unlike smoothing model, we only replicate latent fields across pathogens, but treat all other factors as fixed effects
|
||||
|
||||
# find pathogen types
|
||||
if('pathogen' %in% names(db$observedData)){
|
||||
levelSet <- levels(as.factor(inputData$pathogen))
|
||||
numLevels <- length(levelSet)
|
||||
|
||||
validLatentFieldColumns <- c('pathogen')
|
||||
|
||||
} else {
|
||||
return('error! must provide "pathogen" column.')
|
||||
}
|
||||
|
||||
# set family across all levels
|
||||
family <- rep(family,numLevels)
|
||||
|
||||
# build outcome matrix and replicate list for multiple likelihoods
|
||||
outcome <- matrix(NA,nrow(inputData),numLevels)
|
||||
replicateIdx <- matrix(NA,nrow(inputData),1)
|
||||
|
||||
for( k in levelSet){
|
||||
idx <- inputData$pathogen %in% k
|
||||
count <- which(levelSet %in% k)
|
||||
outcome[idx, count] <- inputData$positive[idx]
|
||||
replicateIdx[idx]<-count
|
||||
}
|
||||
|
||||
# initialize formula for each level
|
||||
if(numLevels>1){
|
||||
outcomeStr <- paste('cbind(',paste(paste('outcome',1:numLevels,sep='.'),sep='',collapse=', '),')',sep='',collapse = '')
|
||||
formula <- as.formula(paste(outcomeStr,'~','pathogen - 1 + catchment',sep=' '))
|
||||
} else { # why does R do inconsistent stuff with column names!?!!
|
||||
#formula <- as.formula('outcome ~ 1 + catchment')
|
||||
formula <- as.formula('outcome ~ 1')
|
||||
}
|
||||
|
||||
# factors as fixed effects, assuming no interaction terms
|
||||
validFactorNames <- names(db$observedData)[ !( (names(db$observedData) %in% c('pathogen','n','positive')) |
|
||||
grepl('row',names(db$observedData)) |
|
||||
grepl('age',names(db$observedData)) |
|
||||
grepl('residence_',names(db$observedData)) |
|
||||
grepl('work_',names(db$observedData)) |
|
||||
grepl('encounter',names(db$observedData)) )]
|
||||
|
||||
factorIdx <- names(db$observedData) %in% validFactorNames
|
||||
for(COLUMN in names(db$observedData)[factorIdx]){
|
||||
formula <- as.formula(paste(as.character(formula)[2],'~',paste(as.character(formula)[3],COLUMN,sep='+')))
|
||||
}
|
||||
|
||||
|
||||
df <- data.frame(outcome = outcome, inputData, replicateIdx)
|
||||
|
||||
if(any(grepl('residence', names(inputData)) | grepl('work', names(inputData)))){
|
||||
spatial_domain<-shp$domain[1]
|
||||
} else {
|
||||
spatial_domain <- NULL
|
||||
}
|
||||
|
||||
modelDefinition <- list(type='effects', family = family, formula = formula, lincomb = c(),
|
||||
inputData = df, neighborGraph=neighborGraph, hyper=hyper,
|
||||
observedData = db$observedData,
|
||||
queryList = db$queryList,
|
||||
spatial_domain = spatial_domain)
|
||||
|
||||
return(modelDefinition)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -39,6 +39,12 @@ modelTrainR <- function(modelDefinition){
|
|||
inla = model, modelDefinition = modelDefinition))
|
||||
|
||||
} else if (modelDefinition$type == 'effects'){
|
||||
#same as smooth data for now?
|
||||
|
||||
modeledData <- appendSmoothData(model,modelDefinition)
|
||||
|
||||
# return output data
|
||||
return(list(modeledData = modeledData, inla = model, modelDefinition = modelDefinition))
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/effectsModel.R
|
||||
\name{effectsModel}
|
||||
\alias{effectsModel}
|
||||
\title{effectsModel: function to define regression models to study effects of covariates from dbViewR object
|
||||
latentFieldModel: function to define latent field models from dbViewR object}
|
||||
\usage{
|
||||
effectsModel(db, shp, family = NULL, neighborGraph = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{db}{dbViewR object with valid column names for INLA model.
|
||||
Smoothing only makes sense by age, location, and time. Factor variables cannot be smoothed!}
|
||||
|
||||
\item{shp}{sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)}
|
||||
|
||||
\item{family}{non-standard family override (default = NULL).}
|
||||
|
||||
\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
|
||||
}
|
||||
\value{
|
||||
modelDefinition object for modelTrainR, list with fields
|
||||
type = latentField
|
||||
family : as input
|
||||
formula : model definition for INLA
|
||||
inputData : inla-prepared db$observed data
|
||||
neighborGraph : as input or derived from shp during formula construction
|
||||
}
|
||||
\description{
|
||||
effectsModel: function to define regression models to study effects of covariates from dbViewR object
|
||||
latentFieldModel: function to define latent field models from dbViewR object
|
||||
}
|
||||
\examples{
|
||||
return h1n1pdm incidence model by time and location
|
||||
modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
|
||||
|
||||
}
|
|
@ -1,9 +1,11 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/smoothModel.R
|
||||
% Please edit documentation in R/smoothModel - Copy.R, R/smoothModel.R
|
||||
\name{smoothModel}
|
||||
\alias{smoothModel}
|
||||
\title{smoothModel: function to define variable smoothing models from dbViewR object}
|
||||
\usage{
|
||||
smoothModel(db, shp, family = NULL, neighborGraph = NULL)
|
||||
|
||||
smoothModel(db, shp, family = NULL, neighborGraph = NULL)
|
||||
}
|
||||
\arguments{
|
||||
|
@ -14,9 +16,25 @@ Smoothing only makes sense by age, location, and time. Factor variables cannot
|
|||
|
||||
\item{family}{non-standard family override (default = NULL).}
|
||||
|
||||
\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
|
||||
|
||||
\item{db}{dbViewR object with valid column names for INLA model.
|
||||
Smoothing only makes sense by age, location, and time. Factor variables cannot be smoothed!}
|
||||
|
||||
\item{shp}{sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)}
|
||||
|
||||
\item{family}{non-standard family override (default = NULL).}
|
||||
|
||||
\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
|
||||
}
|
||||
\value{
|
||||
modelDefinition object for modelTrainR, list with fields
|
||||
type = smooth
|
||||
family : as input
|
||||
formula : model definition for INLA
|
||||
inputData : inla-prepared db$observed data
|
||||
neighborGraph : as input or derived from shp during formula construction
|
||||
|
||||
modelDefinition object for modelTrainR, list with fields
|
||||
type = smooth
|
||||
family : as input
|
||||
|
@ -25,10 +43,15 @@ modelDefinition object for modelTrainR, list with fields
|
|||
neighborGraph : as input or derived from shp during formula construction
|
||||
}
|
||||
\description{
|
||||
smoothModel: function to define variable smoothing models from dbViewR object
|
||||
|
||||
smoothModel: function to define variable smoothing models from dbViewR object
|
||||
}
|
||||
\examples{
|
||||
return h1n1pdm incidence model by time and location
|
||||
modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
|
||||
|
||||
return h1n1pdm incidence model by time and location
|
||||
modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
|
||||
|
||||
}
|
||||
|
|
|
@ -1,16 +1,5 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export(getHumanReadableModelIdFromModel)
|
||||
export(getHumanReadableModelIdFromQuery)
|
||||
export(getModelIdFromModel)
|
||||
export(getModelIdFromQuery)
|
||||
export(getModelQueryObjectFromModel)
|
||||
export(getModelQueryObjectFromQuery)
|
||||
export(loadModelFileById)
|
||||
export(queryLoadedModel)
|
||||
export(queryModelById)
|
||||
export(returnModel)
|
||||
export(saveModel)
|
||||
import(digest)
|
||||
import(jsonlite)
|
||||
import(logging)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
export(ggplotSmoothEffects)
|
||||
export(ggplotSmoothMap)
|
||||
export(ggplotSmoothSequential)
|
||||
import(dplyr)
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#' ggplotDiagnosticPlots: function for diagnostic plots for INLA model
|
||||
#'
|
||||
#' @param model Model object from incidenceMapR::modelTrainR
|
||||
#' @param observed The observed values. If NULL defined by model family
|
||||
#' @param CI Add credible intervals to the fitted values?
|
||||
#' @param binwidth The size of the bins used for the histogram. If NULL ggplot guesses for you.
|
||||
#' @param se Plot a ribbon showing the standard error of the smoother.
|
||||
#' @param method What method should be used for the smoother. Defaults to loess unless data is large. Other options include 'gam', 'loess', 'lm'. See geom_smooth for details.
|
||||
#' @return List of 3 ggplot objects: default plots, observed vs fitted and histogram, standardized residuals
|
||||
#'
|
||||
#' @import ggplot2
|
||||
#' @import INLAutils
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#' ggplotDiagnosticPlots(model)
|
||||
#'
|
||||
ggplotDiagnosticPlots <- function(model, observed = NULL, CI = FALSE, binwidth = NULL, se = TRUE, method = "auto"){
|
||||
|
||||
|
||||
plots <- list()
|
||||
|
||||
# for default plots (marginals, )
|
||||
plots[[1]] <- autoplot(model$inla)
|
||||
|
||||
if (is.null(observed))
|
||||
{
|
||||
if (model$modelDefinition$family == 'poisson')
|
||||
observed <- model$modelDefinition$observedData$positive
|
||||
else
|
||||
return('observed is missing and can not be defined from model family!!! provide observed data.')
|
||||
}
|
||||
|
||||
# for observed vs fitted and histogram plot
|
||||
plots[[2]] <- ggplot_inla_residuals(model$inla, observed, CI, binwidth)
|
||||
# for Standardized Residuals
|
||||
plots[[3]] <- ggplot_inla_residuals2(model$inla, observed, CI, se, method)
|
||||
print(plots[[3]])
|
||||
|
||||
return(invisible(plots))
|
||||
|
||||
}
|
|
@ -16,12 +16,18 @@
|
|||
#' plotDat$positive[plotDat$positive==0]<-NaN
|
||||
#' ggplotSmoothMap(plotDat,shp)
|
||||
#'
|
||||
ggplotSmoothMap <- function(model, shp, title='', shape_level = 'residence_census_tract'){
|
||||
ggplotSmoothMap <- function(model, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
|
||||
|
||||
|
||||
plotDat <- right_join(model$modeledData,shp, by=shape_level)
|
||||
plotDat$positive[plotDat$n==0]<-NaN
|
||||
|
||||
#convert to fraction
|
||||
if(!is.null(censusdb)){
|
||||
plotDat$positive <- plotDat$positive / censusdb$observedData$P005004.2010
|
||||
plotDat$modeled_count_mode <- plotDat$modeled_count_mode / censusdb$observedData$P005004.2010
|
||||
}
|
||||
|
||||
bbox<-sf::st_bbox(shp$geometry)
|
||||
|
||||
mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
|
||||
|
@ -35,6 +41,7 @@ p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
|
|||
|
||||
colorLimits<-c(min(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE),max(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE))
|
||||
colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
|
||||
colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
|
||||
|
||||
p1 <- p + geom_sf(data=plotDat,size=0, aes(fill=positive)) +
|
||||
guides(fill=guide_legend(title="observed")) +
|
||||
|
@ -47,6 +54,127 @@ p2 <- p + geom_sf(data=plotDat,size=0, aes(fill=modeled_count_mode)) +
|
|||
grid.arrange(p1,p2,nrow=1, top=textGrob(title))
|
||||
}
|
||||
|
||||
|
||||
#' ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other
|
||||
#'
|
||||
#' @param plotDatEffects data.frame that joins dbViewR::modeledData
|
||||
#' @return ggplot object
|
||||
#'
|
||||
#' @import ggplot2
|
||||
#' @import gridExtra
|
||||
#' @import viridis
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
#'
|
||||
ggplotFixedEffects <- function(model_effects, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
|
||||
|
||||
plotDatEffects <- right_join(model_effects$modeledData,shp, by=shape_level)
|
||||
plotDatEffects$positive[plotDatEffects$n==0]<-NaN
|
||||
|
||||
|
||||
#convert to fraction
|
||||
if(!is.null(censusdb)){
|
||||
plotDatEffects$positive <- plotDatEffects$positive / censusdb$observedData$P005004.2010
|
||||
plotDatEffects$modeled_count_mode <- plotDatEffects$modeled_count_mode / censusdb$observedData$P005004.2010
|
||||
}
|
||||
|
||||
bbox<-sf::st_bbox(shp$geometry)
|
||||
|
||||
mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
|
||||
theme_bw() +
|
||||
theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
|
||||
p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
|
||||
|
||||
# mapSettingsTight <- ggplot() + xlim(c(122.5,122.15)) + ylim(c(47.42,47.76)) + theme_bw() +
|
||||
# theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
|
||||
# p<-mapSettingsTight + geom_sf(data=shp,size=0.1,aes(fill=NaN))
|
||||
|
||||
colorLimits<-c(min(c(plotDatEffects$positive,plotDatEffects$modeled_count_mode),na.rm=TRUE),max(c(plotDatEffects$positive,plotDatEffects$modeled_count_mode),na.rm=TRUE))
|
||||
colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
|
||||
colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
|
||||
|
||||
p1 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=num_bing_queries)) +
|
||||
guides(fill=guide_legend(title="num_bing_queries")) +
|
||||
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
|
||||
|
||||
p2 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=modeled_count_mode)) +
|
||||
guides(fill=guide_legend(title="Expected")) +
|
||||
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
|
||||
|
||||
grid.arrange(p1,p2,nrow=1, top=textGrob(title))
|
||||
|
||||
}
|
||||
|
||||
|
||||
#' ggplotSmoothEffects: function for plotting smoothed model and fixed effects model next to each other
|
||||
#'
|
||||
#' @param plotDat data.frame that joins dbViewR::modeledData
|
||||
#' @return ggplot object
|
||||
#'
|
||||
#' @import ggplot2
|
||||
#' @import gridExtra
|
||||
#' @import viridis
|
||||
#'
|
||||
#' @export
|
||||
#' @examples
|
||||
#'
|
||||
#'
|
||||
ggplotSmoothEffects <- function(model_smooth, model_effects, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
|
||||
|
||||
plotDat <- right_join(model_smooth$modeledData,shp, by=shape_level)
|
||||
plotDat$positive[plotDat$n==0]<-NaN
|
||||
|
||||
plotDatEffects <- right_join(model_effects$modeledData,shp, by=shape_level)
|
||||
plotDatEffects$positive[plotDatEffects$n==0]<-NaN
|
||||
|
||||
|
||||
#convert to fraction
|
||||
if(!is.null(censusdb)){
|
||||
plotDat$positive <- plotDat$positive / censusdb$observedData$P005004.2010
|
||||
plotDat$modeled_count_mode <- plotDat$modeled_count_mode / censusdb$observedData$P005004.2010
|
||||
|
||||
plotDatEffects$positive <- plotDatEffects$positive / censusdb$observedData$P005004.2010
|
||||
plotDatEffects$modeled_count_mode <- plotDatEffects$modeled_count_mode / censusdb$observedData$P005004.2010
|
||||
}
|
||||
|
||||
bbox<-sf::st_bbox(shp$geometry)
|
||||
|
||||
mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
|
||||
theme_bw() +
|
||||
theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
|
||||
p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
|
||||
|
||||
# mapSettingsTight <- ggplot() + xlim(c(122.5,122.15)) + ylim(c(47.42,47.76)) + theme_bw() +
|
||||
# theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
|
||||
# p<-mapSettingsTight + geom_sf(data=shp,size=0.1,aes(fill=NaN))
|
||||
|
||||
colorLimits<-c(min(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE),max(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE))
|
||||
colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
|
||||
colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
|
||||
|
||||
p1 <- p + geom_sf(data=plotDat,size=0, aes(fill=positive)) +
|
||||
guides(fill=guide_legend(title="observed")) +
|
||||
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
|
||||
|
||||
p2 <- p + geom_sf(data=plotDat,size=0, aes(fill=modeled_count_mode)) +
|
||||
guides(fill=guide_legend(title="expected")) +
|
||||
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
|
||||
|
||||
p3 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=num_bing_queries)) +
|
||||
guides(fill=guide_legend(title="num_bing_queries")) +
|
||||
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
|
||||
|
||||
p4 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=modeled_count_mode)) +
|
||||
guides(fill=guide_legend(title="Expected")) +
|
||||
scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
|
||||
|
||||
grid.arrange(p1,p2,p3,p4,nrow=2, top=textGrob(title))
|
||||
|
||||
}
|
||||
|
||||
|
||||
#' ggplotSmoothSequential: function for plotting data and smoothed model next to each other as sequential variable (timeseries, age)
|
||||
#'
|
||||
#' NOT YET IMPLEMENTED!
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ggplotDiagnosticPlots.R
|
||||
\name{ggplotDiagnosticPlots}
|
||||
\alias{ggplotDiagnosticPlots}
|
||||
\title{ggplotDiagnosticPlots: function for diagnostic plots for INLA model}
|
||||
\usage{
|
||||
ggplotDiagnosticPlots(model, observed = NULL, CI = FALSE,
|
||||
binwidth = NULL, se = TRUE, method = "auto")
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Model object from incidenceMapR::modelTrainR}
|
||||
|
||||
\item{observed}{The observed values. If NULL defined by model family}
|
||||
|
||||
\item{CI}{Add credible intervals to the fitted values?}
|
||||
|
||||
\item{binwidth}{The size of the bins used for the histogram. If NULL ggplot guesses for you.}
|
||||
|
||||
\item{se}{Plot a ribbon showing the standard error of the smoother.}
|
||||
|
||||
\item{method}{What method should be used for the smoother. Defaults to loess unless data is large. Other options include 'gam', 'loess', 'lm'. See geom_smooth for details.}
|
||||
}
|
||||
\value{
|
||||
List of 3 ggplot objects: default plots, observed vs fitted and histogram, standardized residuals
|
||||
}
|
||||
\description{
|
||||
ggplotDiagnosticPlots: function for diagnostic plots for INLA model
|
||||
}
|
||||
\examples{
|
||||
ggplotDiagnosticPlots(model)
|
||||
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ggplotWrappers.R
|
||||
\name{ggplotFixedEffects}
|
||||
\alias{ggplotFixedEffects}
|
||||
\title{ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other}
|
||||
\usage{
|
||||
ggplotFixedEffects(model_effects, shp, censusdb = NULL, title = "",
|
||||
shape_level = "residence_census_tract")
|
||||
}
|
||||
\arguments{
|
||||
\item{plotDatEffects}{data.frame that joins dbViewR::modeledData}
|
||||
}
|
||||
\value{
|
||||
ggplot object
|
||||
}
|
||||
\description{
|
||||
ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other
|
||||
}
|
||||
\examples{
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/ggplotWrappers.R
|
||||
\name{ggplotSmoothEffects}
|
||||
\alias{ggplotSmoothEffects}
|
||||
\title{ggplotSmoothEffects: function for plotting data, smoothed model, and fixed effects model next to each other}
|
||||
\usage{
|
||||
ggplotSmoothEffects(model_smooth, model_effects, shp, censusdb = NULL,
|
||||
title = "", shape_level = "residence_census_tract")
|
||||
}
|
||||
\arguments{
|
||||
\item{plotDat}{data.frame that joins dbViewR::modeledData}
|
||||
}
|
||||
\value{
|
||||
ggplot object
|
||||
}
|
||||
\description{
|
||||
NOT YET IMPLEMENTED!
|
||||
}
|
||||
\examples{
|
||||
plotDat <- right_join(model$modeledData,shp, by=c('residence_census_tract'))
|
||||
plotDat$positive[plotDat$positive==0]<-NaN
|
||||
ggplotSmoothSequential(plotDat)
|
||||
|
||||
|
||||
}
|
|
@ -4,7 +4,7 @@
|
|||
\alias{ggplotSmoothMap}
|
||||
\title{ggplotSmoothMap: function for plotting data and smoothed model next to each other on map}
|
||||
\usage{
|
||||
ggplotSmoothMap(model, shp, title = "",
|
||||
ggplotSmoothMap(model, shp, censusdb = NULL, title = "",
|
||||
shape_level = "residence_census_tract")
|
||||
}
|
||||
\arguments{
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
# testModelTrainR
|
||||
# script to test incidenceMapR package
|
||||
|
||||
library(dbViewR)
|
||||
library(incidenceMapR)
|
||||
library(modelTestR)
|
||||
|
||||
shp <- masterSpatialDB() # census-tract shapefiles
|
||||
|
||||
# ggplot build eventually will be replaced by function ggplotSmoothSequential
|
||||
library(ggplot2)
|
||||
plotSettings <- ggplot() + theme_bw() + theme(panel.border = element_blank()) + xlab('')
|
||||
|
||||
###################################
|
||||
##### smoothing models ############
|
||||
###################################
|
||||
|
||||
## simulated data
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
|
||||
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
|
||||
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
|
||||
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
|
||||
)
|
||||
db <- expandDB( selectFromDB( queryIn ) )
|
||||
|
||||
#smooth model
|
||||
modelDefinition <- smoothModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
ggplotSmoothMap(model,shp)
|
||||
ggplotDiagnosticPlots(model) #plot marginals and residuals
|
||||
|
||||
|
||||
####################################################
|
||||
##### fixed effects models: bing queries ############
|
||||
####################################################
|
||||
## simulated data
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
|
||||
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
|
||||
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
|
||||
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
|
||||
)
|
||||
db <- expandDB( selectFromDB( queryIn ) )
|
||||
|
||||
#get the number of bing queries per tract
|
||||
bing_queries_pertract <- loadBingData(geography = "tract",
|
||||
bing_queries_path = 'C:/Users/grhuynh/FluMapModel')
|
||||
#or add bing queries as random numbers from 0 to max positives
|
||||
#searchdb$observedData$num_bing_queries <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights
|
||||
|
||||
#or add bing queries as the same values as positive
|
||||
#searchdb$observedData$num_bing_queries <- searchdb$observedData$positive
|
||||
|
||||
|
||||
#join to the observed data
|
||||
searchdb <- db
|
||||
library(dplyr)
|
||||
searchdb$observedData <- inner_join(searchdb$observedData, bing_queries_pertract, by = c("residence_census_tract" = "GEOID"))
|
||||
|
||||
#run the fixed effects model
|
||||
searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
|
||||
modelDefinition <- effectsModel(db=searchdb, shp=shp)
|
||||
model_fixedeffects <- modelTrainR(modelDefinition)
|
||||
|
||||
#plot just the fixed effects model
|
||||
ggplotFixedEffects(model_fixedeffects,shp)
|
||||
ggplotDiagnosticPlots(model_fixedeffects) #the residuals plot doesn't work
|
||||
|
||||
####################################################
|
||||
##### compare smooth and fixed effects models ############
|
||||
####################################################
|
||||
#run all above code
|
||||
ggplotSmoothEffects(model, model_fixedeffects,shp)
|
|
@ -28,9 +28,50 @@ db <- expandDB( selectFromDB( queryIn ) )
|
|||
|
||||
modelDefinition <- smoothModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
ggplotSmoothMap(model,shp)
|
||||
|
||||
#OR plot as fraction of census population in each tract
|
||||
censusdb <- addCensusData(db = db, source = "decennial", variable = c("B01003_001", "B09001_001", "P005004"), year = 2010, credentials_path = 'C:/Users/grhuynh')
|
||||
ggplotSmoothMap(model,shp, censusdb = censusdb)
|
||||
|
||||
|
||||
|
||||
|
||||
## simulated data with bing query search data, run fixed effects model
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
|
||||
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
|
||||
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
|
||||
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
|
||||
)
|
||||
db <- expandDB( selectFromDB( queryIn ) )
|
||||
modelDefinition <- smoothModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
|
||||
#ADD bing query search data and run fixed effects model
|
||||
searchdb <- db
|
||||
searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
|
||||
|
||||
#add bing queries as random numbers from 0 to max positives
|
||||
#searchdb$observedData$query_weight <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights
|
||||
|
||||
#add bing queries as the same values as positive
|
||||
searchdb$observedData$query_weight <- searchdb$observedData$positive
|
||||
|
||||
#run the model
|
||||
modelDefinition <- effectsModel(db=searchdb, shp=shp)
|
||||
model_search <- modelTrainR(modelDefinition)
|
||||
|
||||
#compare values just for looking at the table
|
||||
temp_search <- select(model_search$modeledData, residence_census_tract, query_weight, positive, modeled_count_mode)
|
||||
temp_smooth <- select(model$modeledData, residence_census_tract, positive, modeled_count_mode)
|
||||
joined_smoothsame <- left_join(temp_smooth, temp_search, by = "residence_census_tract")
|
||||
|
||||
ggplotSmoothEffects(model, model_search,shp)
|
||||
|
||||
|
||||
|
||||
|
||||
# simulated data at_home catchment map
|
||||
queryIn <- list(
|
||||
|
@ -43,7 +84,6 @@ db <- expandDB( selectFromDB( queryIn ) )
|
|||
|
||||
modelDefinition <- smoothModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
ggplotSmoothMap(model,shp)
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче