Merge branch 'master' of github.com:seattleflu/incidence-mapper into no-model-duplicates

2019-07-16 15:30:15 -07:00 · 2019-07-16 15:30:15 -07:00 · f6fb724ed9
--- a/dbViewR/NAMESPACE
+++ b/dbViewR/NAMESPACE
@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand

+export(addCensusData)
 export(expandDB)
 export(masterSpatialDB)
 export(selectFromDB)
@ -11,6 +12,7 @@ import(jsonlite)
 import(lubridate)
 import(rgdal)
 import(sf)
+import(tidycensus)
 importFrom(RCurl,getURL)
 importFrom(lazyeval,interp)
 importFrom(magrittr,"%>%")
--- a/dbViewR/R/addCensusData.R
+++ b/dbViewR/R/addCensusData.R
@ -0,0 +1,75 @@
+#' addCensusData: function for fetching census data and merging it with input dataset
+#'
+#' @param db tibble with valid column names for INLA model
+#' @param geography geography of your data, default value - 'tract', for other options check here  
+#' @param variables character string or vector of character strings of variable IDs.
+#' @param year the year for which you are requesting data
+#' @param state the state for which you are requesting data, default value - 'WA'
+#' @param county the county for which you are requesting data, default value - 'King'
+#' @param source source database, one of: 'acs' for five-year American Community Survey or 'decennial' for decennial Census
+#' @param credentials_path path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html  
+#' @return db tibble with flu and census data
+#'
+#' @import tidycensus
+#' @import dplyr
+#' @importFrom magrittr %>%
+#'
+#' @export
+#' @examples
+#' 
+addCensusData <- function( db = dbViewR::selectFromDB(), 
+                           geography = "tract", variables, year,
+                           state = "WA", county = "King",
+                           source = "decennial",
+                           credentials_path = '/home/rstudio/seattle_flu')
+  
+{
+  #get the census api key value from the file
+  keyFile <- file(file.path(credentials_path, "census_api_key.txt"), open = 'r') 
+  keyValue <- readLines(keyFile, n = 1)
+  close(keyFile)
+  
+  Sys.setenv(CENSUS_API_KEY = keyValue)
+  
+  if(source == "decennial")
+     {
+       dataset <- "sf1"
+  }
+  else if(source =="acs")
+  {
+    dataset <- "acs5"
+  }
+  else {
+    print('unknown source of census data!')
+    return(db)
+  }
+
+  varNames <- tidycensus::load_variables(year, dataset, cache = TRUE) 
+  varToQuery <- intersect(variables, varNames$name)
+  if (length(variables)<length(varToQuery))
+    print('some requested variables are not presented in census data')
+  
+  
+  if(source == "decennial")
+  {
+    censusData <- tidycensus::get_decennial(geography = geography, state = state, county = county, 
+                         variables = varToQuery, year = year)
+    for (var in varToQuery)
+    {
+      censusVar <- censusData %>% dplyr::filter(variable == var) %>% dplyr::select(GEOID, !!quo_name(paste(var, year, sep = ".")) := value)
+      db$observedData <- dplyr::left_join(db$observedData, censusVar, by = c("residence_census_tract"="GEOID"))
+    }
+  }
+  else if(source =="acs")
+  {
+    censusData <- tidycensus::get_acs(geography = geography, state = state, county = county, 
+                         variables = varToQuery, year = year)
+    for (var in varToQuery)
+    {
+      censusVar <- censusData %>% dplyr::filter(variable == var) %>% dplyr::select(GEOID, !!quo_name(paste(var, year, sep = ".")) := estimate)
+      db$observedData <- dplyr::left_join(db$observedData, censusVar, by = c("residence_census_tract"="GEOID"))
+    }
+  }
+  
+ return(db)
+}
--- a/dbViewR/R/loadBingData.R
+++ b/dbViewR/R/loadBingData.R
@ -0,0 +1,56 @@
+#' addCensusData: function for loading bing search queries and categorizing which census tract it's in
+#'
+#' @param geography geography to bin the bing queries, default value - 'tract'  
+#' @param bing_queries_path path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html  
+#' @return db dataframe with number of bing queries per tract
+#'
+#' @import rgeos
+#' @import sp
+#' @import rgdal
+#' @import dplyr
+#'
+#' @export
+#' @examples
+#' 
+
+loadBingData <- function( geography = "tract", 
+                           bing_queries_path = '/home/rstudio/seattle_flu')
+  
+{
+  
+  bing_data <- read.csv(file.path(bing_queries_path, "Bing_queries.csv"), header=FALSE)
+  #edit which columns are which
+  bing_queries_latlong <- data.frame(bing_data)
+  coordinates(bing_queries_latlong)<- ~V3 + V2
+ 
+   #this url is broken
+  #url <-'https://github.com/seattleflu/simulated-data/blob/master/kingCountySpatialData/2016_CensusTracts_KingCountyWa.zip'
+  
+  #for now download from original source 
+  url <-'https://www.seattle.gov/Documents/Departments/OPCD/Demographics/GeographicFilesandMaps/KingCountyTractsShapefiles.zip'
+  file <- basename(url)
+  download.file(url, file)
+  unzip(file, exdir="KingCountyTractsShapefiles")
+  
+  
+  #read in shapefile
+  if (geography == "tract"){
+    kc_shp <- readOGR(dsn='KingCountyTractsShapefiles', layer = "kc_tract_10")
+  }
+  #reproject bing queries onto king county shape file coordinate system 
+  proj4string(bing_queries_latlong) <- CRS("+proj=longlat")
+  bing_queries_latlong <- spTransform(bing_queries_latlong, proj4string(kc_shp))
+  
+  #get the census tracts for each of the bing queries
+  bing_queries_censustracts <- over(bing_queries_latlong, kc_shp)
+  
+  #now join with the input data
+  #for some reason there are 398 tracts in the king county shapefile and only 397 in mike's shapefile
+  bing_queries_pertract <- as.data.frame(table(bing_queries_censustracts$GEOID10))
+  #rename columns
+  bing_queries_pertract$Var1 <- as.character(bing_queries_pertract$Var1)
+  bing_queries_pertract <- rename(bing_queries_pertract, num_bing_queries = Freq)
+  bing_queries_pertract <- rename(bing_queries_pertract, GEOID = Var1)
+  
+ return(bing_queries_pertract)
+}
--- a/dbViewR/get_census_data.R
+++ b/dbViewR/get_census_data.R
@ -0,0 +1,65 @@
+
+
+
+library("tidycensus")
+library("tigris")
+library("sf")
+library("mapview")
+#library(totalcensus) #downloads the entire survey data
+#download the 2015 ACS 5-year survey data, which is about 50 GB.
+#download_census("acs5year", 2015)
+
+options(tigris_class = "sf")
+options(tigris_use_cache = TRUE)
+
+#census api guide https://www.census.gov/data/developers/guidance/api-user-guide.html 
+#get your own census api key here: https://api.census.gov/data/key_signup.html  
+#save the key to a txt file named census_api_key.txt
+
+#enter your own user path here
+user_path = "C:/Users/grhuynh"
+
+#get the census api key value from the file
+census_api_key_value<-read.table(file.path(user_path, "census_api_key.txt"), header=FALSE) #read in file
+census_api_key_value<- as.character(census_api_key_value$V1)
+
+#read in the value into the tidycensus library
+census_api_key(census_api_key_value)
+
+
+#read in the shape file for the desired geography, in this case tracts of king county
+state_name = "WA"
+county_name = "King"
+county_tracts <-tracts(state_name, county_name)
+
+
+#to see all variables
+#v17 <- load_variables(2010, "sf1", cache = TRUE) #2010 decennial census
+#v17_acs <- load_variables(2017, "acs5", cache = TRUE) # acs census 
+#view(v17_acs)
+
+
+#get the total population, P001001 in decennial census
+#if need multiple states alternative syntax here https://github.com/walkerke/tidycensus/issues/121
+#if need multiple variable syntax here https://github.com/walkerke/tidycensus/issues/129 
+pop <- get_decennial(geography = "tract", state = "WA", county = county_name, 
+                     variables = "P001001", year = 2010, geometry = TRUE)
+
+#also can get population data from acs
+pop_acs <- get_acs(geography = "tract", state = state_name, county = county_name, 
+                   variables = "B01003_001", year = 2017, geometry = TRUE)
+
+
+#plot population graph
+pop %>%
+  ggplot(aes(fill = value)) + 
+  geom_sf(color = NA) + 
+  coord_sf(crs = 26911) + 
+  scale_fill_viridis_c(option = "magma") 
+
+pop_acs %>%
+  ggplot(aes(fill = estimate)) + 
+  geom_sf(color = NA) + 
+  coord_sf(crs = 26911) + 
+  scale_fill_viridis_c(option = "magma") 
+
--- a/dbViewR/man/addCensusData.Rd
+++ b/dbViewR/man/addCensusData.Rd
@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/addCensusData.R
+\name{addCensusData}
+\alias{addCensusData}
+\title{addCensusData: function for fetching census data and merging it with input dataset}
+\usage{
+addCensusData(db = dbViewR::selectFromDB(), geography = "tract",
+  variables, year, state = "WA", county = "King",
+  source = "decennial", credentials_path = "/home/rstudio/seattle_flu")
+}
+\arguments{
+\item{db}{tibble with valid column names for INLA model}
+
+\item{geography}{geography of your data, default value - 'tract', for other options check here}
+
+\item{variables}{character string or vector of character strings of variable IDs.}
+
+\item{year}{the year for which you are requesting data}
+
+\item{state}{the state for which you are requesting data, default value - 'WA'}
+
+\item{county}{the county for which you are requesting data, default value - 'King'}
+
+\item{source}{source database, one of: 'acs' for five-year American Community Survey or 'decennial' for decennial Census}
+
+\item{credentials_path}{path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html}
+}
+\value{
+db tibble with flu and census data
+}
+\description{
+addCensusData: function for fetching census data and merging it with input dataset
+}
+\examples{
+
+}
--- a/dbViewR/man/loadBingData.Rd
+++ b/dbViewR/man/loadBingData.Rd
@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/loadBingData.R
+\name{loadBingData}
+\alias{loadBingData}
+\title{addCensusData: function for loading bing search queries and categorizing which census tract it's in}
+\usage{
+loadBingData(geography = "tract",
+  bing_queries_path = "/home/rstudio/seattle_flu")
+}
+\arguments{
+\item{geography}{geography to bin the bing queries, default value - 'tract'}
+
+\item{bing_queries_path}{path to your file with Census API key, you can get your own census api key here: https://api.census.gov/data/key_signup.html}
+}
+\value{
+db dataframe with number of bing queries per tract
+}
+\description{
+addCensusData: function for loading bing search queries and categorizing which census tract it's in
+}
+\examples{
+
+}
--- a/incidenceMapR/NAMESPACE
+++ b/incidenceMapR/NAMESPACE
@ -2,6 +2,7 @@

 export(appendCatchmentModel)
 export(constructAdjacencyNetwork)
+export(effectsModel)
 export(latentFieldModel)
 export(modelTrainR)
 export(smoothModel)
--- a/incidenceMapR/R/effectsModel.R
+++ b/incidenceMapR/R/effectsModel.R
@ -1 +1,117 @@
 #' effectsModel: function to define regression models to study effects of covariates from dbViewR object
+
+#GHH copy from latentFieldModel
+#' latentFieldModel: function to define latent field models from dbViewR object
+#'
+#' @param db dbViewR object with valid column names for INLA model.
+#'   Smoothing only makes sense by age, location, and time.  Factor variables cannot be smoothed!
+#' @param shp sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)
+#' @param family non-standard family override (default = NULL). 
+#' @param neighborGraph non-standard neighbor graph (default = NULL)
+#' 
+#' @return modelDefinition object for modelTrainR, list with fields
+#'     type = latentField
+#'     family : as input
+#'     formula : model definition for INLA
+#'     inputData : inla-prepared db$observed data
+#'     neighborGraph : as input or derived from shp during formula construction
+#'     
+#' @import INLA
+#'
+#' @export
+#' @examples
+#' return h1n1pdm incidence model by time and location
+#'    modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
+#'
+effectsModel <- function(db , shp, family = NULL, neighborGraph = NULL){
+  
+  #INLA data frame that may get augmented columns we don't need to see when we're done
+  inputData <- db$observedData
+  
+  # identify intended family
+  if(is.null(family)){
+    if (all(inputData$n == inputData$positive)){
+      family = 'poisson'
+    } else if (any(inputData$n > inputData$positive)){
+      family = 'binomial'
+    } else if (any(inputData$n < inputData$positive)){
+      return('n < positive !!!  invald db$observedData.')
+    }
+  }
+  
+  # construct priors
+  hyper=list()
+  hyper$global <- list(prec = list( prior = "pc.prec", param = 1/10, alpha = 0.01))
+  hyper$local <- list(prec = list( prior = "pc.prec", param = 1/200, alpha = 0.01))
+  hyper$age <- list(prec = list( prior = "pc.prec", param = 1, alpha = 0.01))
+  hyper$time <- list(prec = list( prior = "pc.prec", param = 1/50, alpha = 0.01))
+  
+  
+  # unlike smoothing model, we only replicate latent fields across pathogens, but treat all other factors as fixed effects
+  
+  # find pathogen types
+  if('pathogen' %in% names(db$observedData)){
+    levelSet       <- levels(as.factor(inputData$pathogen))
+    numLevels      <- length(levelSet)
+    
+    validLatentFieldColumns <- c('pathogen')
+    
+  } else {
+    return('error!  must provide "pathogen" column.')
+  }
+  
+  # set family across all levels
+  family <- rep(family,numLevels)
+  
+  # build outcome matrix and replicate list for multiple likelihoods
+  outcome      <- matrix(NA,nrow(inputData),numLevels)
+  replicateIdx <- matrix(NA,nrow(inputData),1)
+  
+  for( k in levelSet){
+    idx <- inputData$pathogen %in% k
+    count <- which(levelSet %in% k)
+    outcome[idx, count] <- inputData$positive[idx]
+    replicateIdx[idx]<-count
+  }
+  
+  # initialize formula for each level
+  if(numLevels>1){
+    outcomeStr <- paste('cbind(',paste(paste('outcome',1:numLevels,sep='.'),sep='',collapse=', '),')',sep='',collapse = '')
+    formula <- as.formula(paste(outcomeStr,'~','pathogen - 1 + catchment',sep=' '))
+  } else { # why does R do inconsistent stuff with column names!?!!
+    #formula <- as.formula('outcome ~ 1 + catchment')
+    formula <- as.formula('outcome ~ 1')
+  }
+  
+  # factors as fixed effects, assuming no interaction terms
+  validFactorNames <- names(db$observedData)[ !( (names(db$observedData) %in% c('pathogen','n','positive')) | 
+                                                   grepl('row',names(db$observedData)) |
+                                                   grepl('age',names(db$observedData)) | 
+                                                   grepl('residence_',names(db$observedData)) | 
+                                                   grepl('work_',names(db$observedData)) |
+                                                   grepl('encounter',names(db$observedData))  )]
+  
+  factorIdx <- names(db$observedData) %in% validFactorNames
+  for(COLUMN in names(db$observedData)[factorIdx]){
+    formula <- as.formula(paste(as.character(formula)[2],'~',paste(as.character(formula)[3],COLUMN,sep='+')))
+  }
+  
+  
+  df <- data.frame(outcome = outcome, inputData, replicateIdx)
+  
+  if(any(grepl('residence', names(inputData)) | grepl('work', names(inputData)))){
+    spatial_domain<-shp$domain[1]
+  } else {
+    spatial_domain <- NULL
+  }
+  
+  modelDefinition <- list(type='effects', family = family, formula = formula, lincomb = c(),
+                          inputData = df, neighborGraph=neighborGraph, hyper=hyper, 
+                          observedData = db$observedData,
+                          queryList = db$queryList,
+                          spatial_domain = spatial_domain)
+  
+  return(modelDefinition)
+}
+
+
--- a/incidenceMapR/R/modelTrainR.R
+++ b/incidenceMapR/R/modelTrainR.R
@ -39,6 +39,12 @@ modelTrainR <- function(modelDefinition){
                inla = model, modelDefinition = modelDefinition))
    
  } else if (modelDefinition$type == 'effects'){
+    #same as smooth data for now?
+    
+    modeledData <- appendSmoothData(model,modelDefinition)
+    
+    # return output data
+    return(list(modeledData = modeledData, inla = model, modelDefinition = modelDefinition))
    
  }
  
--- a/incidenceMapR/man/effectsModel.Rd
+++ b/incidenceMapR/man/effectsModel.Rd
@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/effectsModel.R
+\name{effectsModel}
+\alias{effectsModel}
+\title{effectsModel: function to define regression models to study effects of covariates from dbViewR object
+latentFieldModel: function to define latent field models from dbViewR object}
+\usage{
+effectsModel(db, shp, family = NULL, neighborGraph = NULL)
+}
+\arguments{
+\item{db}{dbViewR object with valid column names for INLA model.
+Smoothing only makes sense by age, location, and time.  Factor variables cannot be smoothed!}
+
+\item{shp}{sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)}
+
+\item{family}{non-standard family override (default = NULL).}
+
+\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
+}
+\value{
+modelDefinition object for modelTrainR, list with fields
+    type = latentField
+    family : as input
+    formula : model definition for INLA
+    inputData : inla-prepared db$observed data
+    neighborGraph : as input or derived from shp during formula construction
+}
+\description{
+effectsModel: function to define regression models to study effects of covariates from dbViewR object
+latentFieldModel: function to define latent field models from dbViewR object
+}
+\examples{
+return h1n1pdm incidence model by time and location
+   modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
+
+}
--- a/incidenceMapR/man/smoothModel.Rd
+++ b/incidenceMapR/man/smoothModel.Rd
@ -1,9 +1,11 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/smoothModel.R
+% Please edit documentation in R/smoothModel - Copy.R, R/smoothModel.R
 \name{smoothModel}
 \alias{smoothModel}
 \title{smoothModel: function to define variable smoothing models from dbViewR object}
 \usage{
+smoothModel(db, shp, family = NULL, neighborGraph = NULL)
+
 smoothModel(db, shp, family = NULL, neighborGraph = NULL)
 }
 \arguments{
@ -14,9 +16,25 @@ Smoothing only makes sense by age, location, and time.  Factor variables cannot

 \item{family}{non-standard family override (default = NULL).}

+\item{neighborGraph}{non-standard neighbor graph (default = NULL)}
+
+\item{db}{dbViewR object with valid column names for INLA model.
+Smoothing only makes sense by age, location, and time.  Factor variables cannot be smoothed!}
+
+\item{shp}{sf object with residence_census_tract shapes (all higher levels assume iid and not local smoothing)}
+
+\item{family}{non-standard family override (default = NULL).}
+
 \item{neighborGraph}{non-standard neighbor graph (default = NULL)}
 }
 \value{
+modelDefinition object for modelTrainR, list with fields
+    type = smooth
+    family : as input
+    formula : model definition for INLA
+    inputData : inla-prepared db$observed data
+    neighborGraph : as input or derived from shp during formula construction
+
 modelDefinition object for modelTrainR, list with fields
    type = smooth
    family : as input
@ -25,10 +43,15 @@ modelDefinition object for modelTrainR, list with fields
    neighborGraph : as input or derived from shp during formula construction
 }
 \description{
+smoothModel: function to define variable smoothing models from dbViewR object
+
 smoothModel: function to define variable smoothing models from dbViewR object
 }
 \examples{
 return h1n1pdm incidence model by time and location
   modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())

+return h1n1pdm incidence model by time and location
+   modelDefinition <- smoothModel(db = dbViewR::selectFromDB(), shp = dbViewR::masterSpatialDB())
+
 }
--- a/modelServR/NAMESPACE
+++ b/modelServR/NAMESPACE
@ -1,16 +1,5 @@
 # Generated by roxygen2: do not edit by hand

-export(getHumanReadableModelIdFromModel)
-export(getHumanReadableModelIdFromQuery)
-export(getModelIdFromModel)
-export(getModelIdFromQuery)
-export(getModelQueryObjectFromModel)
-export(getModelQueryObjectFromQuery)
-export(loadModelFileById)
-export(queryLoadedModel)
-export(queryModelById)
-export(returnModel)
-export(saveModel)
 import(digest)
 import(jsonlite)
 import(logging)
--- a/modelTestR/NAMESPACE
+++ b/modelTestR/NAMESPACE
@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand

+export(ggplotSmoothEffects)
 export(ggplotSmoothMap)
 export(ggplotSmoothSequential)
 import(dplyr)
--- a/modelTestR/R/ggplotDiagnosticPlots.R
+++ b/modelTestR/R/ggplotDiagnosticPlots.R
@ -0,0 +1,42 @@
+#' ggplotDiagnosticPlots: function for diagnostic plots for INLA model
+#'
+#' @param model Model object from incidenceMapR::modelTrainR
+#' @param observed The observed values. If NULL defined by model family
+#' @param CI Add credible intervals to the fitted values?
+#' @param binwidth The size of the bins used for the histogram. If NULL ggplot guesses for you.
+#' @param se Plot a ribbon showing the standard error of the smoother.
+#' @param method What method should be used for the smoother. Defaults to loess unless data is large. Other options include 'gam', 'loess', 'lm'. See geom_smooth for details.
+#' @return List of 3 ggplot objects: default plots, observed vs fitted and histogram, standardized residuals
+#'
+#' @import ggplot2
+#' @import INLAutils
+#' 
+#' @export
+#' @examples
+#' ggplotDiagnosticPlots(model)
+#'
+ggplotDiagnosticPlots <- function(model, observed = NULL, CI = FALSE, binwidth = NULL, se = TRUE, method = "auto"){
+  
+
+  plots <- list()
+  
+  # for default plots (marginals, )
+  plots[[1]] <- autoplot(model$inla)
+  
+  if (is.null(observed))
+  {
+      if (model$modelDefinition$family == 'poisson') 
+        observed <- model$modelDefinition$observedData$positive
+      else
+        return('observed is missing and can not be defined from model family!!! provide observed data.')
+  }
+  
+  # for observed vs fitted and histogram plot
+  plots[[2]] <- ggplot_inla_residuals(model$inla, observed, CI, binwidth)  
+  # for Standardized Residuals
+  plots[[3]] <- ggplot_inla_residuals2(model$inla, observed, CI, se, method) 
+  print(plots[[3]])
+  
+ return(invisible(plots))
+  
+}
--- a/modelTestR/R/ggplotWrappers.R
+++ b/modelTestR/R/ggplotWrappers.R
@ -16,12 +16,18 @@
 #' plotDat$positive[plotDat$positive==0]<-NaN
 #' ggplotSmoothMap(plotDat,shp)
 #'
-ggplotSmoothMap <- function(model, shp, title='', shape_level = 'residence_census_tract'){
+ggplotSmoothMap <- function(model, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){


 plotDat <- right_join(model$modeledData,shp, by=shape_level)
 plotDat$positive[plotDat$n==0]<-NaN

+#convert to fraction 
+if(!is.null(censusdb)){
+  plotDat$positive <- plotDat$positive / censusdb$observedData$P005004.2010
+  plotDat$modeled_count_mode <- plotDat$modeled_count_mode / censusdb$observedData$P005004.2010
+}
+
 bbox<-sf::st_bbox(shp$geometry)

 mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
@ -35,6 +41,7 @@ p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))

 colorLimits<-c(min(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE),max(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE))
 colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
+colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2

 p1 <- p + geom_sf(data=plotDat,size=0, aes(fill=positive))  +
  guides(fill=guide_legend(title="observed")) +
@ -47,6 +54,127 @@ p2 <- p + geom_sf(data=plotDat,size=0, aes(fill=modeled_count_mode))  +
 grid.arrange(p1,p2,nrow=1, top=textGrob(title))
 }

+
+#' ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other
+#'
+#' @param plotDatEffects data.frame that joins dbViewR::modeledData
+#' @return ggplot object
+#'
+#' @import ggplot2
+#' @import gridExtra
+#' @import viridis
+#'
+#' @export
+#' @examples
+#'
+#'
+ggplotFixedEffects <- function(model_effects, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
+  
+  plotDatEffects <- right_join(model_effects$modeledData,shp, by=shape_level)
+  plotDatEffects$positive[plotDatEffects$n==0]<-NaN
+  
+  
+  #convert to fraction 
+  if(!is.null(censusdb)){
+    plotDatEffects$positive <- plotDatEffects$positive / censusdb$observedData$P005004.2010
+    plotDatEffects$modeled_count_mode <- plotDatEffects$modeled_count_mode / censusdb$observedData$P005004.2010
+  }
+  
+  bbox<-sf::st_bbox(shp$geometry)
+  
+  mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
+    theme_bw() +
+    theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
+  p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
+  
+  # mapSettingsTight <- ggplot() + xlim(c(122.5,122.15)) + ylim(c(47.42,47.76)) +  theme_bw() +
+  #   theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
+  # p<-mapSettingsTight + geom_sf(data=shp,size=0.1,aes(fill=NaN))
+  
+  colorLimits<-c(min(c(plotDatEffects$positive,plotDatEffects$modeled_count_mode),na.rm=TRUE),max(c(plotDatEffects$positive,plotDatEffects$modeled_count_mode),na.rm=TRUE))
+  colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
+  colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
+  
+  p1 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=num_bing_queries))  +
+    guides(fill=guide_legend(title="num_bing_queries")) +
+    scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
+  
+  p2 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=modeled_count_mode))  +
+    guides(fill=guide_legend(title="Expected")) +
+    scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
+  
+  grid.arrange(p1,p2,nrow=1, top=textGrob(title))
+  
+}
+
+
+#' ggplotSmoothEffects: function for plotting smoothed model and fixed effects model next to each other
+#'
+#' @param plotDat data.frame that joins dbViewR::modeledData
+#' @return ggplot object
+#'
+#' @import ggplot2
+#' @import gridExtra
+#' @import viridis
+#'
+#' @export
+#' @examples
+#'
+#'
+ggplotSmoothEffects <- function(model_smooth, model_effects, shp, censusdb = NULL, title='', shape_level = 'residence_census_tract'){
+  
+  plotDat <- right_join(model_smooth$modeledData,shp, by=shape_level)
+  plotDat$positive[plotDat$n==0]<-NaN
+  
+  plotDatEffects <- right_join(model_effects$modeledData,shp, by=shape_level)
+  plotDatEffects$positive[plotDatEffects$n==0]<-NaN
+  
+  
+  #convert to fraction 
+  if(!is.null(censusdb)){
+    plotDat$positive <- plotDat$positive / censusdb$observedData$P005004.2010
+    plotDat$modeled_count_mode <- plotDat$modeled_count_mode / censusdb$observedData$P005004.2010
+  
+    plotDatEffects$positive <- plotDatEffects$positive / censusdb$observedData$P005004.2010
+    plotDatEffects$modeled_count_mode <- plotDatEffects$modeled_count_mode / censusdb$observedData$P005004.2010
+  }
+  
+  bbox<-sf::st_bbox(shp$geometry)
+  
+  mapSettings <- ggplot() + #xlim(c(min(122.5, -bbox[1]),max(121.7,-bbox[3]))) + ylim(c(max(47.17,bbox[2]),min(47.76,bbox[4]))) +
+    theme_bw() +
+    theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
+  p<-mapSettings + geom_sf(data=shp,size=0.1,aes(fill=NaN))
+  
+  # mapSettingsTight <- ggplot() + xlim(c(122.5,122.15)) + ylim(c(47.42,47.76)) +  theme_bw() +
+  #   theme(axis.text=element_blank(),axis.ticks=element_blank(),panel.grid.major=element_line(colour="transparent"), panel.border = element_blank())
+  # p<-mapSettingsTight + geom_sf(data=shp,size=0.1,aes(fill=NaN))
+  
+  colorLimits<-c(min(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE),max(c(plotDat$positive,plotDat$modeled_count_mode),na.rm=TRUE))
+  colorBreaks<-round(seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2)
+  colorBreaks<-seq(min(colorLimits),sqrt(max(colorLimits)), length.out = 6)^2
+  
+  p1 <- p + geom_sf(data=plotDat,size=0, aes(fill=positive))  +
+    guides(fill=guide_legend(title="observed")) +
+    scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
+  
+  p2 <- p + geom_sf(data=plotDat,size=0, aes(fill=modeled_count_mode))  +
+    guides(fill=guide_legend(title="expected")) +
+    scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
+  
+  p3 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=num_bing_queries))  +
+    guides(fill=guide_legend(title="num_bing_queries")) +
+    scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
+  
+  p4 <- p + geom_sf(data=plotDatEffects,size=0, aes(fill=modeled_count_mode))  +
+    guides(fill=guide_legend(title="Expected")) +
+    scale_fill_viridis(na.value="transparent",trans = "sqrt",breaks=colorBreaks,limits=colorLimits)
+  
+  grid.arrange(p1,p2,p3,p4,nrow=2, top=textGrob(title))
+  
+}
+
+
 #' ggplotSmoothSequential: function for plotting data and smoothed model next to each other as sequential variable (timeseries, age)
 #'
 #' NOT YET IMPLEMENTED!
--- a/modelTestR/man/ggplotDiagnosticPlots.Rd
+++ b/modelTestR/man/ggplotDiagnosticPlots.Rd
@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ggplotDiagnosticPlots.R
+\name{ggplotDiagnosticPlots}
+\alias{ggplotDiagnosticPlots}
+\title{ggplotDiagnosticPlots: function for diagnostic plots for INLA model}
+\usage{
+ggplotDiagnosticPlots(model, observed = NULL, CI = FALSE,
+  binwidth = NULL, se = TRUE, method = "auto")
+}
+\arguments{
+\item{model}{Model object from incidenceMapR::modelTrainR}
+
+\item{observed}{The observed values. If NULL defined by model family}
+
+\item{CI}{Add credible intervals to the fitted values?}
+
+\item{binwidth}{The size of the bins used for the histogram. If NULL ggplot guesses for you.}
+
+\item{se}{Plot a ribbon showing the standard error of the smoother.}
+
+\item{method}{What method should be used for the smoother. Defaults to loess unless data is large. Other options include 'gam', 'loess', 'lm'. See geom_smooth for details.}
+}
+\value{
+List of 3 ggplot objects: default plots, observed vs fitted and histogram, standardized residuals
+}
+\description{
+ggplotDiagnosticPlots: function for diagnostic plots for INLA model
+}
+\examples{
+ggplotDiagnosticPlots(model)
+
+}
--- a/modelTestR/man/ggplotFixedEffects.Rd
+++ b/modelTestR/man/ggplotFixedEffects.Rd
@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ggplotWrappers.R
+\name{ggplotFixedEffects}
+\alias{ggplotFixedEffects}
+\title{ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other}
+\usage{
+ggplotFixedEffects(model_effects, shp, censusdb = NULL, title = "",
+  shape_level = "residence_census_tract")
+}
+\arguments{
+\item{plotDatEffects}{data.frame that joins dbViewR::modeledData}
+}
+\value{
+ggplot object
+}
+\description{
+ggplotFixedEffects: function for plotting data, smoothed model, and fixed effects model next to each other
+}
+\examples{
+
+
+}
--- a/modelTestR/man/ggplotSmoothEffects.Rd
+++ b/modelTestR/man/ggplotSmoothEffects.Rd
@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ggplotWrappers.R
+\name{ggplotSmoothEffects}
+\alias{ggplotSmoothEffects}
+\title{ggplotSmoothEffects: function for plotting data, smoothed model, and fixed effects model next to each other}
+\usage{
+ggplotSmoothEffects(model_smooth, model_effects, shp, censusdb = NULL,
+  title = "", shape_level = "residence_census_tract")
+}
+\arguments{
+\item{plotDat}{data.frame that joins dbViewR::modeledData}
+}
+\value{
+ggplot object
+}
+\description{
+NOT YET IMPLEMENTED!
+}
+\examples{
+plotDat <- right_join(model$modeledData,shp, by=c('residence_census_tract'))
+plotDat$positive[plotDat$positive==0]<-NaN
+ggplotSmoothSequential(plotDat)
+
+
+}
--- a/modelTestR/man/ggplotSmoothMap.Rd
+++ b/modelTestR/man/ggplotSmoothMap.Rd
@ -4,7 +4,7 @@
 \alias{ggplotSmoothMap}
 \title{ggplotSmoothMap: function for plotting data and smoothed model next to each other on map}
 \usage{
-ggplotSmoothMap(model, shp, title = "",
+ggplotSmoothMap(model, shp, censusdb = NULL, title = "",
  shape_level = "residence_census_tract")
 }
 \arguments{
--- a/modelTestR/testEffectsModel.R
+++ b/modelTestR/testEffectsModel.R
@ -0,0 +1,74 @@
+# testModelTrainR
+# script to test incidenceMapR package
+
+library(dbViewR)
+library(incidenceMapR)
+library(modelTestR)
+
+shp <- masterSpatialDB()  # census-tract shapefiles
+
+# ggplot build eventually will be replaced by function ggplotSmoothSequential
+library(ggplot2)
+plotSettings <- ggplot() + theme_bw() +  theme(panel.border = element_blank()) + xlab('')
+
+###################################
+##### smoothing models ############
+###################################
+
+## simulated data
+queryIn <- list(
+  SELECT   =list(COLUMN=c('site_type','residence_census_tract')),
+  WHERE    =list(COLUMN='site_type', IN = c('kiosk')),
+  GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
+  SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
+)
+db <- expandDB( selectFromDB(  queryIn ) )
+
+#smooth model
+modelDefinition <- smoothModel(db=db, shp=shp)
+model <- modelTrainR(modelDefinition)
+ggplotSmoothMap(model,shp)
+ggplotDiagnosticPlots(model) #plot marginals and residuals
+
+
+####################################################
+##### fixed effects models: bing queries ############
+####################################################
+## simulated data
+queryIn <- list(
+  SELECT   =list(COLUMN=c('site_type','residence_census_tract')),
+  WHERE    =list(COLUMN='site_type', IN = c('kiosk')),
+  GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
+  SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
+)
+db <- expandDB( selectFromDB(  queryIn ) )
+
+#get the number of bing queries per tract
+bing_queries_pertract <- loadBingData(geography = "tract", 
+                                      bing_queries_path = 'C:/Users/grhuynh/FluMapModel')
+#or add bing queries as random numbers from 0 to max positives
+#searchdb$observedData$num_bing_queries <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights 
+
+#or add bing queries as the same values as positive 
+#searchdb$observedData$num_bing_queries <- searchdb$observedData$positive 
+
+
+#join to the observed data
+searchdb <- db
+library(dplyr)
+searchdb$observedData <- inner_join(searchdb$observedData, bing_queries_pertract, by = c("residence_census_tract" = "GEOID"))
+
+#run the fixed effects model
+searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
+modelDefinition <- effectsModel(db=searchdb, shp=shp)
+model_fixedeffects <- modelTrainR(modelDefinition)
+
+#plot just the fixed effects model
+ggplotFixedEffects(model_fixedeffects,shp)
+ggplotDiagnosticPlots(model_fixedeffects) #the residuals plot doesn't work
+
+####################################################
+##### compare smooth and fixed effects models ############
+####################################################
+#run all above code
+ggplotSmoothEffects(model, model_fixedeffects,shp)
--- a/modelTestR/testSmoothModel.R
+++ b/modelTestR/testSmoothModel.R
@ -28,9 +28,50 @@ db <- expandDB( selectFromDB(  queryIn ) )

 modelDefinition <- smoothModel(db=db, shp=shp)
 model <- modelTrainR(modelDefinition)
-
 ggplotSmoothMap(model,shp)

+#OR plot as fraction of census population in each tract
+censusdb <- addCensusData(db = db, source = "decennial", variable = c("B01003_001", "B09001_001", "P005004"), year = 2010, credentials_path = 'C:/Users/grhuynh')
+ggplotSmoothMap(model,shp, censusdb = censusdb)
+
+
+
+
+## simulated data with bing query search data, run fixed effects model
+queryIn <- list(
+  SELECT   =list(COLUMN=c('site_type','residence_census_tract')),
+  WHERE    =list(COLUMN='site_type', IN = c('kiosk')),
+  GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
+  SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
+)
+db <- expandDB( selectFromDB(  queryIn ) )
+modelDefinition <- smoothModel(db=db, shp=shp)
+model <- modelTrainR(modelDefinition)
+
+
+#ADD bing query search data and run fixed effects model
+searchdb <- db
+searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
+
+#add bing queries as random numbers from 0 to max positives
+#searchdb$observedData$query_weight <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights 
+
+#add bing queries as the same values as positive 
+searchdb$observedData$query_weight <- searchdb$observedData$positive 
+
+#run the model
+modelDefinition <- effectsModel(db=searchdb, shp=shp)
+model_search <- modelTrainR(modelDefinition)
+
+#compare values just for looking at the table
+temp_search <- select(model_search$modeledData, residence_census_tract, query_weight, positive, modeled_count_mode)
+temp_smooth <- select(model$modeledData, residence_census_tract, positive, modeled_count_mode)
+joined_smoothsame <- left_join(temp_smooth, temp_search, by = "residence_census_tract")
+
+ggplotSmoothEffects(model, model_search,shp)
+
+
+

 # simulated data at_home catchment map
 queryIn <- list(
@ -43,7 +84,6 @@ db <- expandDB( selectFromDB(  queryIn ) )

 modelDefinition <- smoothModel(db=db, shp=shp)
 model <- modelTrainR(modelDefinition)
-
 ggplotSmoothMap(model,shp)