Merge pull request #55 from seattleflu/connect-to-real-data
Connect to real data
This commit is contained in:
Коммит
0dd76ac7da
|
@ -8,23 +8,74 @@ library(modelTestR)
|
|||
library(dplyr)
|
||||
library(ggplot2)
|
||||
|
||||
SRC <- 'production'
|
||||
# SRC <- 'simulated_data'
|
||||
|
||||
## factors and levels to be modeled ##
|
||||
|
||||
#####################
|
||||
### REAL DATA #######
|
||||
#####################
|
||||
|
||||
db <- selectFromDB(queryIn= list(SELECT =c("*")), source = 'production')
|
||||
db <- selectFromDB(queryIn= list(SELECT =c("*")), source = SRC)
|
||||
|
||||
pathogens <- c('all', unique(db$observedData$pathogen))
|
||||
factors <- c('site_type','sex','flu_shot')
|
||||
|
||||
geoLevels <- list( seattle_geojson = c('residence_puma','residence_neighborhood_district_name','residence_cra_name','residence_census_tract'),
|
||||
geoLevels <- list( seattle_geojson = c('residence_neighborhood_district_name','residence_cra_name'),
|
||||
wa_geojson = c('residence_puma'), # census tract impossible due to memory limits
|
||||
king_county_geojson = c('residence_puma','residence_census_tract')
|
||||
king_county_geojson = c('residence_census_tract')
|
||||
)
|
||||
|
||||
#####################################
|
||||
###### timeseries models ############
|
||||
#####################################
|
||||
|
||||
# number of subjects with pathogen and factor at residence location
|
||||
for (SOURCE in names(geoLevels)){
|
||||
for (PATHOGEN in pathogens){
|
||||
for (GEO in geoLevels[[SOURCE]]){
|
||||
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('pathogen', factors, GEO,'encountered_week')),
|
||||
WHERE =list(COLUMN='pathogen', IN=PATHOGEN),
|
||||
GROUP_BY =list(COLUMN=c(factors,GEO,"encountered_week")),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= PATHOGEN)
|
||||
)
|
||||
|
||||
shp <- masterSpatialDB(shape_level = gsub('residence_','',GEO), source = SOURCE)
|
||||
|
||||
db <- expandDB( selectFromDB( queryIn, source=SRC, na.rm=TRUE ), shp=shp )
|
||||
|
||||
# training occassionaly segfaults on but it does not appear to be deterministic...
|
||||
tryCatch(
|
||||
{
|
||||
|
||||
db <- appendCatchmentModel(db,shp=shp, source=SRC, na.rm=TRUE )
|
||||
|
||||
modelDefinition <- latentFieldModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
print(summary(model$inla))
|
||||
|
||||
saveModel(model)
|
||||
|
||||
dir.create('/home/rstudio/seattle_flu/plots/', showWarnings = FALSE)
|
||||
fname <- paste('/home/rstudio/seattle_flu/plots/',paste(PATHOGEN,SOURCE,paste(factors,collapse='-'),GEO,'encountered_week',sep='-'),'.png',sep='')
|
||||
png(filename = fname,width = 6, height = 5, units = "in", res = 300)
|
||||
print(ggplot(model$latentField) +
|
||||
geom_line(aes_string(x='encountered_week',y="modeled_intensity_mode", color=GEO,group =GEO)) +
|
||||
# geom_ribbon(aes_string(x='encountered_week',ymin="modeled_intensity_lower_95_CI", ymax="modeled_intensity_upper_95_CI", fill=GEO,group =GEO),alpha=0.1) +
|
||||
guides(color=FALSE) )
|
||||
dev.off()
|
||||
|
||||
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}
|
||||
)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
##############################
|
||||
## time-independent maps #####
|
||||
##############################
|
||||
|
@ -44,7 +95,7 @@ for (SOURCE in names(geoLevels)){
|
|||
|
||||
shp <- masterSpatialDB(shape_level = gsub('residence_','',GEO), source = SOURCE)
|
||||
|
||||
db <- expandDB( selectFromDB( queryIn, source='production', na.rm=TRUE ), shp=shp )
|
||||
db <- expandDB( selectFromDB( queryIn, source=SRC, na.rm=TRUE ), shp=shp )
|
||||
|
||||
modelDefinition <- smoothModel(db=db, shp=shp)
|
||||
|
||||
|
@ -74,28 +125,27 @@ for (SOURCE in names(geoLevels)){
|
|||
}
|
||||
}
|
||||
|
||||
# age-distributions by pathogen and factor
|
||||
|
||||
|
||||
###########################################
|
||||
# age-distributions by pathogen and factor#
|
||||
###########################################
|
||||
# eventually this should be multinomial models, but indepdendent binomial for now
|
||||
|
||||
### BROKEN BECAUSE NEED TO PROPOGATE AGE_RANGE from DATABASE THROUGH SYSTEM
|
||||
|
||||
for (PATHOGEN in pathogens){
|
||||
for (FACTOR in factors){
|
||||
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('pathogen',FACTOR,'age_range_fine_lower')),
|
||||
GROUP_BY =list(COLUMN=c('pathogen',FACTOR,'age_range_fine_lower')),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= PATHOGEN)
|
||||
WHERE =list(COLUMN='pathogen', IN= PATHOGEN),
|
||||
GROUP_BY =list(COLUMN=c(FACTOR,'age_range_fine_lower')),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= 'all')
|
||||
)
|
||||
# queryIn <- list(
|
||||
# SELECT =list(COLUMN=c(FACTOR,'age_range_fine_lower')),
|
||||
# GROUP_BY =list(COLUMN=c(FACTOR,'age_range_fine_lower')),
|
||||
# SUMMARIZE=list(COLUMN=FACTOR, IN= 'all')
|
||||
# )
|
||||
db<- selectFromDB( queryIn, source='production', na.rm=TRUE )
|
||||
|
||||
|
||||
db<- selectFromDB( queryIn, source=SRC, na.rm=TRUE )
|
||||
|
||||
# get all ages denominator. I'm not sure how to implement this as single query..
|
||||
tmp<-db$observedData %>% group_by_(.dots=c(FACTOR,'age_range_fine_lower')) %>% summarize(n=sum(n))
|
||||
tmp <- selectFromDB( queryIn[c('SELECT','GROUP_BY','SUMMARIZE')], source=SRC, na.rm=TRUE )$observedData %>%
|
||||
group_by_(.dots=c(FACTOR,'age_range_fine_lower')) %>% summarize(n=sum(n))
|
||||
db$observedData <- db$observedData %>% select(-n) %>% left_join(tmp)
|
||||
|
||||
db <- expandDB(db)
|
||||
|
@ -106,61 +156,25 @@ for (PATHOGEN in pathogens){
|
|||
|
||||
saveModel(model)
|
||||
|
||||
p1<-ggplot(model$modeledData) + geom_line(aes(x=age_range_fine_lower ,y=modeled_count_mode, group=pathogen)) +
|
||||
geom_point(aes(x=age_range_fine_lower, y=positive, group=pathogen)) +
|
||||
geom_ribbon(aes(x=age_range_fine_lower ,ymin=modeled_count_0_025quant, ymax=modeled_count_0_975quant , group=pathogen)) +
|
||||
facet_wrap(FACTOR) +
|
||||
ylim(c(0,2*max(model$modeledData$modeled_count_mode)))
|
||||
if (model$modelDefinition$family[1]=='poisson'){
|
||||
p1<-ggplot(model$modeledData) + geom_line(aes(x=age_range_fine_lower ,y=modeled_count_mode, group=pathogen)) +
|
||||
geom_point(aes(x=age_range_fine_lower, y=positive, group=pathogen)) +
|
||||
geom_ribbon(aes(x=age_range_fine_lower ,ymin=modeled_count_lower_95_CI, ymax=modeled_count_upper_95_CI , group=pathogen)) +
|
||||
facet_wrap(FACTOR) +
|
||||
ylim(c(0,2*max(model$modeledData$modeled_count_mode)))
|
||||
} else if (model$modelDefinition$family[1]=='binomial'){
|
||||
p1<-ggplot(model$modeledData) + geom_line(aes(x=age_range_fine_lower ,y=modeled_fraction_mode, group=pathogen)) +
|
||||
geom_point(aes(x=age_range_fine_lower, y=positive/n, group=pathogen)) +
|
||||
geom_ribbon(aes(x=age_range_fine_lower ,ymin=modeled_fraction_lower_95_CI, ymax=modeled_fraction_upper_95_CI , group=pathogen)) +
|
||||
facet_wrap(FACTOR) +
|
||||
ylim(c(0,1))
|
||||
}
|
||||
|
||||
fname <- paste('/home/rstudio/seattle_flu/data/plots/',paste(PATHOGEN,FACTOR,'age_range_fine',sep='-'),'.png',sep='')
|
||||
dir.create('/home/rstudio/seattle_flu/plots/', showWarnings = FALSE)
|
||||
fname <- paste('/home/rstudio/seattle_flu/plots/',paste(PATHOGEN,FACTOR,'age_range_fine',sep='-'),'.png',sep='')
|
||||
png(filename = fname,width = 6, height = 5, units = "in", res = 300)
|
||||
print(p1)
|
||||
dev.off()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#####################################
|
||||
###### timeseries models ############
|
||||
#####################################
|
||||
|
||||
# number of subjects with pathogen and factor at residence location
|
||||
for (SOURCE in names(geoLevels)){
|
||||
for (PATHOGEN in pathogens){
|
||||
for (GEO in geoLevels[[SOURCE]]){
|
||||
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('pathogen', factors, GEO,'encountered_week')),
|
||||
WHERE =list(COLUMN='pathogen', IN=PATHOGEN),
|
||||
GROUP_BY =list(COLUMN=c('pathogen',factors,GEO,"encountered_week")),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= PATHOGEN)
|
||||
)
|
||||
|
||||
shp <- masterSpatialDB(shape_level = gsub('residence_','',GEO), source = SOURCE)
|
||||
|
||||
db <- expandDB( selectFromDB( queryIn, source='production', na.rm=TRUE ), shp=shp )
|
||||
|
||||
# training occassionaly segfaults on but it does not appear to be deterministic...
|
||||
tryCatch(
|
||||
{
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
db <- appendCatchmentModel(db,shp=shp, source='production', na.rm=TRUE )
|
||||
|
||||
modelDefinition <- latentFieldModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
print(summary(model$inla))
|
||||
|
||||
saveModel(model)
|
||||
fname <- paste('/home/rstudio/seattle_flu/data/plots/',paste(PATHOGEN,SOURCE,paste(factors,collapse='-'),GEO,'encountered_week',sep='-'),'.png',sep='')
|
||||
png(filename = fname,width = 6, height = 5, units = "in", res = 300)
|
||||
print(ggplot(model$latentField) + geom_line(aes_string(x='encountered_week',y="modeled_intensity_mode", color=GEO,group =GEO)) )
|
||||
dev.off()
|
||||
|
||||
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")}
|
||||
)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -39,6 +39,11 @@ expandDB <- function( db = dbViewR::selectFromDB(),
|
|||
|
||||
# age
|
||||
validColumnData$age = seq(0,90,by=1)
|
||||
validColumnData$age_bin_fine_lower = c(0,2,seq(5,85,by=5))
|
||||
validColumnData$age_bin_fine_upper = c(2,seq(5,90,by=5))
|
||||
validColumnData$age_bin_coarse_lower = c(0,2,5,18,65,90)
|
||||
validColumnData$age_bin_coarse_upper = c(2,5,18,65,90)
|
||||
|
||||
|
||||
# age bin
|
||||
if(any(grepl('age',names(db$observedData)))) {
|
||||
|
@ -67,8 +72,7 @@ expandDB <- function( db = dbViewR::selectFromDB(),
|
|||
validColumnData$work_city = validColumnData$work_city[validColumnData$work_city!='NA']
|
||||
|
||||
# factors (these don't get interpolated by the models, so we only want the valid levels for the dataset at hand)
|
||||
factorNames <- names(db$observedData)[ !( (names(db$observedData) %in% c('n','positive')) |
|
||||
grepl('age',names(db$observedData)) |
|
||||
factorNames <- names(db$observedData)[ !( (names(db$observedData) %in% c('age','n','positive')) |
|
||||
grepl('residence_',names(db$observedData)) |
|
||||
grepl('work_',names(db$observedData)) |
|
||||
grepl('encounter',names(db$observedData)) )]
|
||||
|
@ -110,9 +114,18 @@ expandDB <- function( db = dbViewR::selectFromDB(),
|
|||
db$observedData$time_row <- validColumnData$time_row[match(db$observedData$encountered_week,validColumnData$encountered_week)]
|
||||
}
|
||||
|
||||
if(any(grepl('age_bin',names(db$observedData)))){
|
||||
db$observedData$age_row <- validColumnData$age_row[match(db$observedData$age_bin,validColumnData$age_bin)]
|
||||
}
|
||||
# if(any(grepl('age_bin',names(db$observedData)))){
|
||||
# db$observedData$age_row <- validColumnData$age_row[match(db$observedData$age_bin,validColumnData$age_bin)]
|
||||
# }
|
||||
if(any(grepl('age_range_fine_lower',names(db$observedData)))){
|
||||
db$observedData$age_row <- validColumnData$age_range_fine_lower[match(db$observedData$age_range_fine_lower,validColumnData$age_range_fine_lower)]
|
||||
} else if (any(grepl('age_range_fine_upper',names(db$observedData)))){
|
||||
db$observedData$age_row <- validColumnData$age_range_fine_upper[match(db$observedData$age_range_fine_upper,validColumnData$age_range_fine_upper)]
|
||||
} else if (any(grepl('age_range_coarse_lower',names(db$observedData)))){
|
||||
db$observedData$age_row <- validColumnData$age_range_coarse_lower[match(db$observedData$age_range_coarse_lower,validColumnData$age_range_coarse_lower)]
|
||||
} else if (any(grepl('age_range_coarse_upper',names(db$observedData)))){
|
||||
db$observedData$age_row <- validColumnData$age_range_coarse_upper[match(db$observedData$age_range_coarse_upper,validColumnData$age_range_coarse_upper)]
|
||||
}
|
||||
|
||||
return(db)
|
||||
}
|
||||
|
|
|
@ -134,32 +134,34 @@ selectFromDB <- function( queryIn = jsonlite::toJSON(
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# age bin mutations
|
||||
# this is nasty! I need to transform to age bins for modeling, but I don't want to carry the MUTATE query around!
|
||||
if(any(grepl('age',names(db)))){
|
||||
db$age_bin <- floor(pmin(db$age,90))
|
||||
queryList$GROUP_BY$COLUMN[queryList$GROUP_BY$COLUMN %in% 'age'] <- 'age_bin'
|
||||
}
|
||||
|
||||
|
||||
if('GROUP_BY' %in% names(queryList)){
|
||||
db<- db %>% dplyr::group_by_(.dots=queryList$GROUP_BY$COLUMN)
|
||||
}
|
||||
|
||||
if('SUMMARIZE' %in% names(queryList)){
|
||||
|
||||
if (queryList$SUMMARIZE$IN != 'all'){
|
||||
summary_criteria <- lazyeval::interp(~sum(y %in% x), .values=list(y = as.name(queryList$SUMMARIZE$COLUMN), x = queryList$SUMMARIZE$IN))
|
||||
} else {
|
||||
summary_criteria <- lazyeval::interp(~n()) # must always output n and positive for downstream interpretation!
|
||||
if('GROUP_BY' %in% names(queryList)){
|
||||
db<- db %>% dplyr::group_by_(.dots=queryList$GROUP_BY$COLUMN)
|
||||
}
|
||||
|
||||
db <- db %>% dplyr::summarise_(n = lazyeval::interp(~n()), positive = summary_criteria)
|
||||
}
|
||||
|
||||
|
||||
if('SUMMARIZE' %in% names(queryList)){
|
||||
|
||||
if (queryList$SUMMARIZE$IN != 'all'){
|
||||
summary_criteria <- lazyeval::interp(~sum(y %in% x), .values=list(y = as.name(queryList$SUMMARIZE$COLUMN), x = queryList$SUMMARIZE$IN))
|
||||
} else {
|
||||
summary_criteria <- lazyeval::interp(~n()) # must always output n and positive for downstream interpretation!
|
||||
}
|
||||
|
||||
db <- db %>% dplyr::summarise_(n = lazyeval::interp(~n()), positive = summary_criteria)
|
||||
}
|
||||
|
||||
# "pathogen" column is required for incidenceMapR model definitions
|
||||
if(!('pathogn' %in% queryList$GROUP_BY$COLUMN)){
|
||||
if( 'pathogen' %in% queryList$WHERE$COLUMN){
|
||||
db$pathogen <- queryList$WHERE$IN['pathogen' %in% queryList$WHERE$COLUMN]
|
||||
} else {
|
||||
db$pathogen<-'all'
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
# type harmonization
|
||||
for( COLUMN in names(db)[names(db) %in% c('residence_census_tract','residence_cra_name','residence_puma','residence_neighborhood_district_name','residence_city',
|
||||
'work_census_tract','work_cra_name','work_puma','work_neighborhood_district_name','work_city')]){
|
||||
|
@ -167,10 +169,10 @@ selectFromDB <- function( queryIn = jsonlite::toJSON(
|
|||
}
|
||||
|
||||
# drop rows with NA since incidenceMapR (INLA) will ignore them anyway
|
||||
if(na.rm){
|
||||
db <- db %>% replace(.=='NA', NA) %>% tidyr::drop_na()
|
||||
}
|
||||
|
||||
if(na.rm){
|
||||
db <- db %>% replace(.=='NA', NA) %>% tidyr::drop_na()
|
||||
}
|
||||
|
||||
summarizedData <- list(observedData = db,queryList = c(queryList))
|
||||
|
||||
return(summarizedData)
|
||||
|
|
|
@ -21,10 +21,10 @@ library(dplyr)
|
|||
## return subset
|
||||
queryJSON <- jsonlite::toJSON(
|
||||
list(
|
||||
SELECT =list(COLUMN=c('individual','pathogen','encountered_date','site_type','sex','flu_shot','age','has_fever','has_cough','has_myalgia')),
|
||||
SELECT =list(COLUMN=c('individual','pathogen','encountered_week','site_type','sex','flu_shot','age','has_fever','has_cough','has_myalgia')),
|
||||
WHERE =list(COLUMN='pathogen', IN = c('h1n1pdm', 'h3n2')),
|
||||
WHERE =list(COLUMN='encountered_date', BETWEEN = c(2019,2019.2)),
|
||||
WHERE =list(COLUMN='sampling_location', IN='hospital')
|
||||
WHERE =list(COLUMN='encountered_week', IN= c('2018-W52','2019-W01')),
|
||||
WHERE =list(COLUMN='site_type', IN='hospital')
|
||||
)
|
||||
)
|
||||
db <- selectFromDB( queryJSON )
|
||||
|
@ -37,8 +37,8 @@ library(dplyr)
|
|||
|
||||
## return h1n1pdm summary by time and location
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('pathogen','encountered_date','residence_puma','residence_census_tract')),
|
||||
GROUP_BY =list(COLUMN=c('encountered_date','residence_puma','residence_census_tract')),
|
||||
SELECT =list(COLUMN=c('pathogen','encountered_week','residence_puma','residence_census_tract')),
|
||||
GROUP_BY =list(COLUMN=c('encountered_week','residence_puma','residence_census_tract')),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= c('h1n1pdm'))
|
||||
)
|
||||
db <- selectFromDB( queryIn )
|
||||
|
@ -128,8 +128,8 @@ names(db$observedData)
|
|||
## return subset
|
||||
queryJSON <- jsonlite::toJSON(
|
||||
list(
|
||||
SELECT =list(COLUMN=c('individual','encountered_date','site_type','sex','flu_shot','age')),
|
||||
WHERE =list(COLUMN='encountered_date', BETWEEN = c('2019-01-01','2019-02-28')),
|
||||
SELECT =list(COLUMN=c('individual','encountered_week','site_type','sex','flu_shot','age')),
|
||||
WHERE =list(COLUMN='encountered_week', IN = c('2019-W04','2019-W05','2019-W06')),
|
||||
WHERE =list(COLUMN='site_type', IN='childrensHospital')
|
||||
)
|
||||
)
|
||||
|
|
|
@ -1,259 +0,0 @@
|
|||
### Example Requests
|
||||
|
||||
Incidence request
|
||||
|
||||
##### Curl
|
||||
```
|
||||
curl -X POST \
|
||||
http://40.112.165.255/flu \
|
||||
-H 'cache-control: no-cache' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'postman-token: aa46a1fe-9a61-225e-2416-94b321baa238' \
|
||||
-d '{
|
||||
"output":["incidence"],
|
||||
"attributes":["Census_Tract","vax_status"],
|
||||
"basis":["year"],
|
||||
"values":["incidence_median","incidence_sd"]
|
||||
}'
|
||||
```
|
||||
|
||||
##### Node Request
|
||||
```javascript
|
||||
var request = require("request");
|
||||
|
||||
var options = { method: 'POST',
|
||||
url: 'http://40.112.165.255/flu',
|
||||
headers:
|
||||
{ 'postman-token': 'ed0b5dd6-9eae-30de-a8eb-d6869dc0bf76',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json' },
|
||||
body:
|
||||
{ output: [ 'incidence' ],
|
||||
attributes: [ 'Census_Tract', 'vax_status' ],
|
||||
basis: [ 'year' ],
|
||||
values: [ 'incidence_median', 'incidence_sd' ] },
|
||||
json: true };
|
||||
|
||||
request(options, function (error, response, body) {
|
||||
if (error) throw new Error(error);
|
||||
|
||||
console.log(body);
|
||||
});
|
||||
```
|
||||
|
||||
##### Node Native
|
||||
```javascript
|
||||
var options = {
|
||||
"method": "POST",
|
||||
"hostname": "40.112.165.255",
|
||||
"port": null,
|
||||
"path": "/flu",
|
||||
"headers": {
|
||||
"content-type": "application/json",
|
||||
"cache-control": "no-cache",
|
||||
"postman-token": "619097ad-0469-3593-8f6c-938f1a8c6d7c"
|
||||
}
|
||||
};
|
||||
|
||||
var req = http.request(options, function (res) {
|
||||
var chunks = [];
|
||||
|
||||
res.on("data", function (chunk) {
|
||||
chunks.push(chunk);
|
||||
});
|
||||
|
||||
res.on("end", function () {
|
||||
var body = Buffer.concat(chunks);
|
||||
console.log(body.toString());
|
||||
});
|
||||
});
|
||||
|
||||
req.write(JSON.stringify({ output: [ 'incidence' ],
|
||||
attributes: [ 'Census_Tract', 'vax_status' ],
|
||||
basis: [ 'year' ],
|
||||
values: [ 'incidence_median', 'incidence_sd' ] }));
|
||||
req.end();
|
||||
```
|
||||
##### javscript
|
||||
```javascript
|
||||
var data = JSON.stringify({
|
||||
"output": [
|
||||
"incidence"
|
||||
],
|
||||
"attributes": [
|
||||
"Census_Tract",
|
||||
"vax_status"
|
||||
],
|
||||
"basis": [
|
||||
"year"
|
||||
],
|
||||
"values": [
|
||||
"incidence_median",
|
||||
"incidence_sd"
|
||||
]
|
||||
});
|
||||
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.withCredentials = true;
|
||||
|
||||
xhr.addEventListener("readystatechange", function () {
|
||||
if (this.readyState === 4) {
|
||||
console.log(this.responseText);
|
||||
}
|
||||
});
|
||||
|
||||
xhr.open("POST", "http://40.112.165.255/flu");
|
||||
xhr.setRequestHeader("content-type", "application/json");
|
||||
xhr.setRequestHeader("cache-control", "no-cache");
|
||||
xhr.setRequestHeader("postman-token", "43420113-8008-3c53-9cc3-e6c8de9d8d55");
|
||||
|
||||
xhr.send(data);
|
||||
```
|
||||
##### Python
|
||||
```python
|
||||
import requests
|
||||
|
||||
url = "http://40.112.165.255/flu"
|
||||
|
||||
payload = "{\n\t\"output\":[\"incidence\"],\n\t\"attributes\":[\"Census_Tract\",\"vax_status\"],\n\t\"basis\":[\"year\"],\n\t\"values\":[\"incidence_median\",\"incidence_sd\"]\n}"
|
||||
headers = {
|
||||
'content-type': "application/json",
|
||||
'cache-control': "no-cache",
|
||||
'postman-token': "d3083842-39d5-c6c5-9332-59f9eadcb234"
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, data=payload, headers=headers)
|
||||
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
### Observed Request
|
||||
|
||||
##### Curl
|
||||
```
|
||||
curl -X POST \
|
||||
http://40.112.165.255/flu \
|
||||
-H 'cache-control: no-cache' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'postman-token: 42b2d09b-fb4a-8135-8993-f7f8e6fa308c' \
|
||||
-d '{
|
||||
|
||||
"output": ["observed"],
|
||||
"attributes": ["Census_Tract", "vax_status"],
|
||||
"basis": ["year"],
|
||||
"values": ["flu_count", "nsamples"]
|
||||
|
||||
}
|
||||
```
|
||||
##### Node Request
|
||||
```javascript
|
||||
var request = require("request");
|
||||
|
||||
var options = { method: 'POST',
|
||||
url: 'http://40.112.165.255/flu',
|
||||
headers:
|
||||
{ 'postman-token': '7b6caf7d-a3c6-88cc-1306-249f88d152b3',
|
||||
'cache-control': 'no-cache',
|
||||
'content-type': 'application/json' },
|
||||
body:
|
||||
{ output: [ 'observed' ],
|
||||
attributes: [ 'Census_Tract', 'vax_status' ],
|
||||
basis: [ 'year' ],
|
||||
values: [ 'flu_count', 'nsamples' ] },
|
||||
json: true };
|
||||
|
||||
request(options, function (error, response, body) {
|
||||
if (error) throw new Error(error);
|
||||
|
||||
console.log(body);
|
||||
});
|
||||
|
||||
```
|
||||
##### Node Native
|
||||
```javascript
|
||||
var http = require("http");
|
||||
|
||||
var options = {
|
||||
"method": "POST",
|
||||
"hostname": "40.112.165.255",
|
||||
"port": null,
|
||||
"path": "/flu",
|
||||
"headers": {
|
||||
"content-type": "application/json",
|
||||
"cache-control": "no-cache",
|
||||
"postman-token": "18f97827-f7f3-c58c-26f4-5e9a0c04ace2"
|
||||
}
|
||||
};
|
||||
|
||||
var req = http.request(options, function (res) {
|
||||
var chunks = [];
|
||||
|
||||
res.on("data", function (chunk) {
|
||||
chunks.push(chunk);
|
||||
});
|
||||
|
||||
res.on("end", function () {
|
||||
var body = Buffer.concat(chunks);
|
||||
console.log(body.toString());
|
||||
});
|
||||
});
|
||||
|
||||
req.write(JSON.stringify({ output: [ 'observed' ],
|
||||
attributes: [ 'Census_Tract', 'vax_status' ],
|
||||
basis: [ 'year' ],
|
||||
values: [ 'flu_count', 'nsamples' ] }));
|
||||
req.end();
|
||||
```
|
||||
|
||||
##### Javascript
|
||||
```javascript
|
||||
var data = JSON.stringify({
|
||||
"output": [
|
||||
"observed"
|
||||
],
|
||||
"attributes": [
|
||||
"Census_Tract",
|
||||
"vax_status"
|
||||
],
|
||||
"basis": [
|
||||
"year"
|
||||
],
|
||||
"values": [
|
||||
"flu_count",
|
||||
"nsamples"
|
||||
]
|
||||
});
|
||||
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.withCredentials = true;
|
||||
|
||||
xhr.addEventListener("readystatechange", function () {
|
||||
if (this.readyState === 4) {
|
||||
console.log(this.responseText);
|
||||
}
|
||||
});
|
||||
|
||||
xhr.open("POST", "http://40.112.165.255/flu");
|
||||
xhr.setRequestHeader("content-type", "application/json");
|
||||
xhr.setRequestHeader("cache-control", "no-cache");
|
||||
xhr.setRequestHeader("postman-token", "4d81fb6f-f40e-b108-7a42-da98ab8ddd40");
|
||||
|
||||
xhr.send(data);
|
||||
```
|
||||
##### Python
|
||||
```python
|
||||
import requests
|
||||
|
||||
url = "http://40.112.165.255/flu"
|
||||
|
||||
payload = "{\n\n \"output\": [\"observed\"],\n \"attributes\": [\"Census_Tract\", \"vax_status\"],\n \"basis\": [\"year\"],\n \"values\": [\"flu_count\", \"nsamples\"]\n\n}\n\n "
|
||||
headers = {
|
||||
'content-type': "application/json",
|
||||
'cache-control': "no-cache",
|
||||
'postman-token': "86c99e05-61c5-204a-3e58-53db81e23856"
|
||||
}
|
||||
|
||||
response = requests.request("POST", url, data=payload, headers=headers)
|
||||
|
||||
print(response.text)
|
||||
```
|
|
@ -1,2 +1,3 @@
|
|||
.Rproj.user
|
||||
.httr-oauth
|
||||
plots
|
||||
|
|
|
@ -52,10 +52,9 @@ appendCatchmentModel <- function(db,shp = NULL, source='simulated_data', na.rm=T
|
|||
catchmentModel <- modelTrainR(catchmentModelDefinition)
|
||||
|
||||
# append catchment as intercept covariate
|
||||
db$observedData <- db$observedData %>% left_join(catchmentModel$modeledData %>% select(site_type, geo, modeled_count_0_5quant))
|
||||
names(db$observedData)[names(db$observedData) %in% 'modeled_count_0_5quant'] <- 'catchment'
|
||||
db$observedData <- db$observedData %>% left_join(catchmentModel$modeledData %>% select(site_type, geo, modeled_count_median))
|
||||
names(db$observedData)[names(db$observedData) %in% 'modeled_count_median'] <- 'catchment'
|
||||
db$observedData$catchment <- log(db$observedData$catchment) - mean(log(db$observedData$catchment))
|
||||
# db$observedData$catchment <- db$observedData$catchment/sd(db$observedData$catchment)
|
||||
|
||||
return(db)
|
||||
}
|
||||
|
|
|
@ -28,6 +28,9 @@ appendSmoothData <- function(model,modelDefinition){
|
|||
# snake_case
|
||||
names(modeledData) <- gsub('\\.','_',names(modeledData))
|
||||
|
||||
# clean quantile names
|
||||
names(modeledData)[grepl('0_',names(modeledData))]<-paste(outputColName,c('lower_95_CI','median','upper_95_CI'),sep='_')
|
||||
|
||||
# pretty order
|
||||
columns <- modelDefinition$queryList$GROUP_BY$COLUMN[modelDefinition$queryList$GROUP_BY$COLUMN %in% names(modeledData)]
|
||||
modeledData <- modeledData %>% arrange_(.dots=columns)
|
||||
|
@ -62,6 +65,47 @@ appendLatentFieldData <- function(model,modelDefinition){
|
|||
# snake_case
|
||||
names(latentField) <- gsub('\\.','_',names(latentField))
|
||||
|
||||
# filter out unwanted fields
|
||||
latentField <- latentField
|
||||
latentField <- latentField[!(names(latentField) %in% c('modeled_intensity_ID','modeled_intensity_kld'))]
|
||||
|
||||
# clean quantile names
|
||||
names(latentField)[grepl('0_',names(latentField))]<-paste('modeled_intensity',c('lower_95_CI','median','upper_95_CI'),sep='_')
|
||||
|
||||
# apply link function
|
||||
if (modelDefinition$family[1] == 'poisson'){
|
||||
for( COLUMN in names(latentField)[grepl('modeled',names(latentField))]){
|
||||
if (grepl('sd',COLUMN)){
|
||||
latentField[[COLUMN]] <- exp(latentField$modeled_intensity_median + latentField$modeled_intensity_sd^2/2) * sqrt(exp(latentField$modeled_intensity_sd^2)-1)
|
||||
} else if (grepl('mean',COLUMN)){
|
||||
latentField[[COLUMN]] <- exp(latentField$modeled_intensity_median + latentField$modeled_intensity_sd^2/2)
|
||||
} else {
|
||||
latentField[[COLUMN]] <- exp(latentField[[COLUMN]])
|
||||
}
|
||||
}
|
||||
|
||||
} else if (modelDefinition$family[1] == 'binomial'){
|
||||
for( COLUMN in names(latentField)[grepl('modeled',names(latentField))]){
|
||||
if (grepl('sd',COLUMN)){
|
||||
# TODO: transform marginals
|
||||
tmp <- exp(latentField$modeled_intensity_mean + rnorm(1e5,sd=latentField$modeled_intensity_sd))
|
||||
tmp <- tmp/(1+tmp)
|
||||
latentField$modeled_intensity_sd<-sd(tmp)
|
||||
|
||||
} else if (grepl('mean',COLUMN)){
|
||||
# TODO: transform marginals
|
||||
tmp <- exp(latentField$modeled_intensity_mean + rnorm(1e5,sd=latentField$modeled_intensity_sd))
|
||||
tmp <- tmp/(1+tmp)
|
||||
latentField$modeled_intensity_mean<-mean(tmp)
|
||||
|
||||
} else {
|
||||
latentField[[COLUMN]] <- exp(latentField[[COLUMN]])/(1+latentField[[COLUMN]])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
# pretty order
|
||||
columns <- modelDefinition$queryList$GROUP_BY$COLUMN[modelDefinition$queryList$GROUP_BY$COLUMN %in% names(latentField)]
|
||||
latentField <- latentField %>% arrange_(.dots=columns)
|
||||
|
|
|
@ -39,7 +39,7 @@ latentFieldModel <- function(db , shp, family = NULL, neighborGraph = NULL){
|
|||
# construct priors
|
||||
hyper=list()
|
||||
hyper$global <- list(prec = list( prior = "pc.prec", param = 1/10, alpha = 0.01))
|
||||
hyper$local <- list(prec = list( prior = "pc.prec", param = 1/100, alpha = 0.01))
|
||||
hyper$local <- list(prec = list( prior = "pc.prec", param = 1/200, alpha = 0.01))
|
||||
hyper$age <- list(prec = list( prior = "pc.prec", param = 1, alpha = 0.01))
|
||||
hyper$time <- list(prec = list( prior = "pc.prec", param = 1/50, alpha = 0.01))
|
||||
|
||||
|
@ -268,7 +268,7 @@ latentFieldModel <- function(db , shp, family = NULL, neighborGraph = NULL){
|
|||
|
||||
|
||||
# get original values for linear combination categories
|
||||
lc.colIdx <- (names(inputData) %in% db$queryList$GROUP_BY$COLUMN) & !(names(inputData) %in% validFactorNames)
|
||||
lc.colIdx <- (names(inputData) %in% c('pathogen',db$queryList$GROUP_BY$COLUMN)) & !(names(inputData) %in% validFactorNames)
|
||||
lc.data <- inputData[lc.rowIdx,lc.colIdx]
|
||||
|
||||
df <- data.frame(outcome = outcome, inputData, replicateIdx)
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,7 +1,4 @@
|
|||
#' getHumanReadableModelIdFromModel: return human readable verion of model from query
|
||||
library(logging)
|
||||
|
||||
|
||||
#'
|
||||
#' @param model INLA model object that will generatie id from
|
||||
#'
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
# testModelTrainR
|
||||
# script to test incidenceMapR package
|
||||
|
||||
library(dbViewR)
|
||||
library(incidenceMapR)
|
||||
library(modelTestR)
|
||||
library(modelServR)
|
||||
library(dplyr)
|
||||
library(magrittr)
|
||||
|
||||
shp <- masterSpatialDB() # census-tract shapefiles
|
||||
neighborGraph <- constructAdjacencyNetwork(shp)
|
||||
|
||||
# ggplot build eventually will be replaced by function ggplotSmoothSequential
|
||||
library(ggplot2)
|
||||
plotSettings <- ggplot() + theme_bw() + theme(panel.border = element_blank()) + xlab('')
|
||||
|
||||
|
||||
###################################
|
||||
#### timeseries latent field ######
|
||||
###################################
|
||||
|
||||
geoLevels <- c('residence_puma5ce','residence_neighborhood_district_name','residence_cra_name','residence_neighborhood_district_name','residence_census_tract')
|
||||
|
||||
# geoLevels<-c('residence_puma5ce')
|
||||
# geoLevels<-c('residence_neighborhood_district_name')
|
||||
# geoLevels<-c('residence_cra_name')
|
||||
# geoLevels <- c('residence_census_tract')
|
||||
|
||||
|
||||
# get pathogen list
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('pathogen')),
|
||||
GROUP_BY =list(COLUMN=c('pathogen')),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= 'all')
|
||||
)
|
||||
pathogens <- unique(selectFromDB( queryIn )$observedData$pathogen)
|
||||
|
||||
for(geo in geoLevels){
|
||||
|
||||
for( k in 1:length(pathogens)){
|
||||
# query pathogen, time, levels you want to play with
|
||||
queryIn <- list(
|
||||
SELECT =list(COLUMN=c('encountered_week','pathogen','sampling_location','flu_shot',geo)),
|
||||
WHERE =list(COLUMN=c('pathogen'), IN=pathogens[k]),
|
||||
GROUP_BY =list(COLUMN=c('pathogen','encountered_week','sampling_location','flu_shot',geo)),
|
||||
SUMMARIZE=list(COLUMN='pathogen', IN= pathogens[k])
|
||||
)
|
||||
db <- expandDB( db<-selectFromDB( queryIn ))
|
||||
|
||||
db <- appendCatchmentModel(db,shp=shp)
|
||||
|
||||
# build latent field model
|
||||
modelDefinition <- latentFieldModel(db=db, shp=shp)
|
||||
model <- modelTrainR(modelDefinition)
|
||||
|
||||
# model<-returnModel(db$queryList,format='model',type='latent_field',cloudDir='~/data')
|
||||
print(summary(model$inla))
|
||||
|
||||
|
||||
idx <- model$modeledData$sampling_location=='hospital' & model$modeledData$flu_shot==1
|
||||
plot(log(model$modeledData$fitted_values_mode[idx]))
|
||||
plot(model$latentField$latent_field_mode)
|
||||
|
||||
plot(model$latentField$latent_field_mode,
|
||||
log(model$modeledData$fitted_values_mode[idx]))
|
||||
|
||||
|
||||
saveModel(model, cloudDir = './data')
|
||||
|
||||
|
||||
if (geo =='residence_census_tract'){
|
||||
for(m in unique(model$modeledData$sampling_location)){
|
||||
for (n in unique(model$modeledData$time_row)){
|
||||
tmp<-list(modeledData = model$modeledData[model$modeledData$sampling_location==m & model$modeledData$time_row == n & model$modeledData$flu_shot==0,])
|
||||
ggplotSmoothMap(tmp,shp,paste(m,n))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче