FluMapModel/modelVisualizeR/demoSmoothModel.R

156 строки
5.2 KiB
R

# testModelTrainR
# script to test incidenceMapR package
library(dbViewR)
library(incidenceMapR)
library(modelVisualizeR)
library(dplyr)
shp <- masterSpatialDB() # census-tract shapefiles
# ggplot build eventually will be replaced by function ggplotSmoothSequential
library(ggplot2)
plotSettings <- ggplot() + theme_bw() + theme(panel.border = element_blank()) + xlab('')
###################################
##### smoothing models ############
###################################
# simulated data kiosk catchment map
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
)
db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
ggplotSmoothMap(model,shp)
#OR plot as fraction of census population in each tract
censusdb <- addCensusData(db = db, source = "decennial", variable = c("B01003_001", "B09001_001", "P005004"), year = 2010, credentials_path = 'C:/Users/grhuynh')
ggplotSmoothMap(model,shp, censusdb = censusdb)
## simulated data with bing query search data, run fixed effects model
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
WHERE =list(COLUMN='site_type', IN = c('kiosk')),
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
SUMMARIZE=list(COLUMN='site_type', IN= c('kiosk'))
)
db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
#ADD bing query search data and run fixed effects model
searchdb <- db
searchdb$observedData$site_type <- NULL #this messes up the fixed effects model because it has only 1 level
#add bing queries as random numbers from 0 to max positives
#searchdb$observedData$query_weight <- runif(nrow(searchdb$observedData), min = 0, max = max(searchdb$observedData$positive)) #add random numbers between 0 and max #of positives as the bing weights
#add bing queries as the same values as positive
searchdb$observedData$query_weight <- searchdb$observedData$positive
#run the model
modelDefinition <- effectsModel(db=searchdb, shp=shp)
model_search <- modelTrainR(modelDefinition)
#compare values just for looking at the table
temp_search <- select(model_search$modeledData, residence_census_tract, query_weight, positive, modeled_count_mode)
temp_smooth <- select(model$modeledData, residence_census_tract, positive, modeled_count_mode)
joined_smoothsame <- left_join(temp_smooth, temp_search, by = "residence_census_tract")
ggplotSmoothEffects(model, model_search,shp)
# simulated data at_home catchment map
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_census_tract')),
WHERE =list(COLUMN='site_type', IN = c('at_home')),
GROUP_BY =list(COLUMN=c('site_type','residence_census_tract')),
SUMMARIZE=list(COLUMN='site_type', IN= c('at_home'))
)
db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
ggplotSmoothMap(model,shp)
# test: real childrensHospital data
queryIn <- list(
SELECT =list(COLUMN=c('site_type','residence_cra_name')),
WHERE =list(COLUMN='site_type', IN = c('childrensHospital')),
GROUP_BY =list(COLUMN=c('site_type','residence_cra_name')),
SUMMARIZE=list(COLUMN='site_type', IN= c('all'))
)
db <- expandDB( selectFromDB( queryIn, source='production', na.rm=TRUE ) )
shp<-masterSpatialDB(shape_level = 'cra_name', source = 'seattle_geojson')
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
ggplotSmoothMap(model,shp,'childrensHospital', shape_level = 'residence_cra_name')
######################
########### age ######
######################
# simulated data h1n1pdm age fraction
queryIn <- list(
SELECT =list(COLUMN=c('pathogen','age')),
MUTATE =list(COLUMN='age', AS='age_bin'),
GROUP_BY =list(COLUMN=c('age_bin')),
SUMMARIZE=list(COLUMN='pathogen', IN= 'h1n1pdm')
)
db<-selectFromDB( queryIn )
db <- expandDB( db )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
plotDat <- model$modeledData
p1 <- plotSettings + geom_point(data=plotDat,aes(x=age_bin,y=positive/n))
p1 <- p1 + geom_line(data=plotDat,aes(x=age_bin,y=modeled_fraction_mode)) +
geom_ribbon(data=plotDat,aes(x=age_bin,ymin=modeled_fraction_0_025quant,ymax=modeled_fraction_0_975quant),alpha=0.3)
p1 + ggtitle('h1n1pdm fraction')
# simulated data rsva age fraction
queryIn <- list(
SELECT =list(COLUMN=c('pathogen','age')),
MUTATE =list(COLUMN='age', AS='age_bin'),
GROUP_BY =list(COLUMN=c('age_bin')),
SUMMARIZE=list(COLUMN='pathogen', IN= 'rsva')
)
db <- expandDB( selectFromDB( queryIn ) )
modelDefinition <- smoothModel(db=db, shp=shp)
model <- modelTrainR(modelDefinition)
plotDat <- model$modeledData
p1 <- plotSettings + geom_point(data=plotDat,aes(x=age_bin,y=positive/n))
p1 <- p1 + geom_line(data=plotDat,aes(x=age_bin,y=modeled_fraction_mode)) +
geom_ribbon(data=plotDat,aes(x=age_bin,ymin=modeled_fraction_0_025quant,ymax=modeled_fraction_0_975quant),alpha=0.3)
p1 + ggtitle('rsva fraction')