[news + projections + micromorts]: update from artdgn/pages

- Update age specific IFRs to recent Nature figures.
 - Update testing bais calculation to be windowed so that it adjusts
 to changing testing policies.
 - Add new page of deadly risk in micromorts map and its thumbnail.
 - Minor textual and graphical updates to maps and graphs.
 - Add world summary of S/I/R ratios in world stacked graphs.
 - Update methodology texts.
 - Add Makefile convenience target for transforming script file into
 calculated notebook.
This commit is contained in:
artdgn 2020-12-28 01:02:01 +11:00
Родитель c7dfff549f
Коммит 5197fdc001
9 изменённых файлов: 363731 добавлений и 9255 удалений

Просмотреть файл

@ -47,4 +47,12 @@ bash-jekyll: .FORCE
restart-jekyll: .FORCE
docker-compose restart jekyll
.FORCE:
.FORCE:
# updates a notebook from a given py script file to a similarly named ipynb file
# usage: make script-to-notebook SCRIPT='path/to/script.py"
SCRIPT_BASENAME=$(shell basename -s ".py" "$(SCRIPT)")
script-to-notebook:
cd "$(shell dirname $(SCRIPT))" && \
jupytext -o "$(SCRIPT_BASENAME).ipynb" "$(SCRIPT_BASENAME).py" && \
papermill --kernel python3 "$(SCRIPT_BASENAME).ipynb" "$(SCRIPT_BASENAME).ipynb"

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -31,11 +31,11 @@
# +
#hide
import pandas as pd
import overview_helpers
import covid_helpers
covid_data = overview_helpers.CovidData()
stylers = overview_helpers.PandasStyling
covid_data = covid_helpers.CovidData()
stylers = covid_helpers.PandasStyling
df_all, debug_dfs = covid_data.table_with_projections(debug_dfs=True)
df = covid_data.filter_df(df_all)
df.columns
@ -43,12 +43,30 @@ df.columns
#hide_input
from IPython.display import Markdown
Markdown(f"***Based on data up to: {covid_data.cur_date}***")
Markdown(f"*Based on data up to*: ***{covid_data.cur_date}***")
#hide
geo_helper = overview_helpers.GeoMap
geo_helper = covid_helpers.GeoMap
df_geo = geo_helper.make_geo_df(df_all, cases_filter=1000, deaths_filter=20)
fig = geo_helper.make_map_figure(df_geo)
def hover_text_func(r: pd.Series):
return (
"<br>"
f"Cases (reported): {r['Cases.total']:,.0f} (+<b>{r['Cases.new']:,.0f}</b>)<br>"
f"Cases (estimated): {r['Cases.total.est']:,.0f} (+<b>{r['Cases.new.est']:,.0f}</b>)<br>"
f"Affected percent: <b>{r['affected_ratio.est']:.1%}</b><br>"
f"Transmission rate: <b>{r['transmission_rate']:.1%}</b> ± {r['transmission_rate_std']:.1%}<br>"
f"Deaths: {r['Deaths.total']:,.0f} (+<b>{r['Deaths.new']:,.0f}</b>)<br>"
)
fig = geo_helper.make_map_figure(
df_geo,
col='transmission_rate',
err_col='transmission_rate_std',
colorbar_title='%',
subtitle='Transmission rate: red spreading (>5%), blue recovering (<5%)',
hover_text_func=hover_text_func,
scale_max=10,
colorscale='Bluered',
)
#hide
df_geo['affected_ratio.change.monthly.rate'] = (df_geo['affected_ratio.est.+7d'] -
@ -62,28 +80,34 @@ fig.update_layout(
geo_helper.button_dict(
df_geo['transmission_rate'], 'Transmission rate<br>percent (blue-red)',
colorscale='Bluered', scale_max=10, percent=True,
subtitle='Transmission rate: over 5% (red) spreading, under 5% (blue) recovering',
subtitle='Transmission rate: red spreading (>5%), blue recovering (<5%)',
colorbar_title='%',
err_series=df_geo['transmission_rate_std']),
geo_helper.button_dict(
df_geo['transmission_rate'], 'Transmission rate<br>percent',
colorscale='YlOrRd', scale_max=33, percent=True,
subtitle='Transmission rate (related to R0)',
colorbar_title='%',
err_series=df_geo['transmission_rate_std']),
geo_helper.button_dict(
df_geo['Cases.new.per100k.est'], 'Recent cases<br>estimated per 100k',
colorscale='YlOrRd',
colorbar_title='Cases / 100k',
subtitle='Estimated recent cases in last 5 days per 100k population'),
geo_helper.button_dict(
df_geo['Cases.new.est'], 'Recent cases<br>(estimated)',
colorscale='YlOrRd',
colorbar_title='Cases',
subtitle='Estimated recent cases in last 5 days'),
geo_helper.button_dict(
df_geo['Cases.new.per100k'], 'Recent cases<br>reported per 100k',
colorscale='YlOrRd',
colorbar_title='Cases / 100k',
subtitle='Reported recent cases in last 5 days per 100k population'),
geo_helper.button_dict(
df_geo['Cases.new'], 'Recent cases<br>(reported)',
colorscale='YlOrRd',
colorbar_title='Cases',
subtitle='Reported recent cases in last 5 days'),
],
direction="down", bgcolor='#efe9da',
@ -94,94 +118,111 @@ fig.update_layout(
geo_helper.button_dict(
df_geo['affected_ratio.est'], 'Affected percent<br>(Current)',
colorscale='Bluyl', percent=True,
colorbar_title='%',
subtitle='Estimated current affected population percentage'),
geo_helper.button_dict(
df_geo['affected_ratio.est.+14d'], 'Affected percent<br>(in 14 days)',
colorscale='Bluyl', scale_max=25, percent=True,
colorbar_title='%',
subtitle='Projected affected population percentage in 14 days',
err_series=df_geo['affected_ratio.est.+14d.err']),
geo_helper.button_dict(
df_geo['affected_ratio.est.+30d'], 'Affected percent<br>(in 30 days)',
colorscale='Bluyl', scale_max=25, percent=True,
colorbar_title='%',
subtitle='Projected affected population percentage in 30 days',
err_series=df_geo['affected_ratio.est.+30d.err']),
geo_helper.button_dict(
df_geo['affected_ratio.change.monthly.rate'],
title='Affected percent<br>montly change rate',
colorscale='Bluyl', scale_max=10, percent=True,
colorbar_title='% per month',
subtitle='Current affected population percentage monthly change rate',
err_series=df_geo['affected_ratio.est.+30d.err']),
geo_helper.button_dict(
df_geo['Cases.total.per100k.est'], 'Total cases<br>estimated per 100k',
colorscale='YlOrRd',
colorbar_title='Cases / 100k',
subtitle='Estimated total cases per 100k population'),
geo_helper.button_dict(
df_geo['Cases.total.est'], 'Total cases<br>(estimated)', colorscale='YlOrRd',
colorbar_title='Cases',
subtitle='Estimated total cases'),
geo_helper.button_dict(
df_geo['Cases.total.per100k'], 'Total cases<br>reported per 100k',
colorscale='YlOrRd',
colorbar_title='Cases / 100k',
subtitle='Reported total cases per 100k population'),
geo_helper.button_dict(
df_geo['Cases.total'], 'Total cases<br>(reported)', colorscale='YlOrRd',
colorbar_title='Cases',
subtitle='Reported total cases'),
],
direction="down", bgcolor='#dceae1',
pad={"r": 10, "t": 10},
showactive=False, x=0.29, xanchor="left", y=1.1, yanchor="top"),
showactive=False, x=0.305, xanchor="left", y=1.1, yanchor="top"),
dict(
buttons=[
geo_helper.button_dict(
df_geo['needICU.per100k'], 'ICU need<br>(current)',
colorscale='Sunsetdark', scale_max=10,
colorbar_title='ICU beds / 100k',
subtitle='Estimated current ICU need per 100k population'),
geo_helper.button_dict(
df_geo['needICU.per100k.+14d'], 'ICU need<br>(in 14 days)',
colorscale='Sunsetdark', scale_max=10,
colorbar_title='ICU beds / 100k',
subtitle='Projected ICU need per 100k population in 14 days',
err_series=df_geo['needICU.per100k.+14d.err']),
geo_helper.button_dict(
df_geo['needICU.per100k.+30d'], 'ICU need<br>(in 30 days)',
colorscale='Sunsetdark', scale_max=10,
colorbar_title='ICU beds / 100k',
subtitle='Projected ICU need per 100k population in 30 days',
err_series=df_geo['needICU.per100k.+30d.err']),
geo_helper.button_dict(
df_geo['icu_capacity_per100k'], 'Pre-COVID<br>ICU Capacity',
colorbar_title='ICU beds / 100k',
colorscale='Blues',
subtitle='Pre-COVID ICU capacity per 100k population'),
],
direction="down", bgcolor='#efdaee',
pad={"r": 10, "t": 10},
showactive=False, x=0.515, xanchor="left", y=1.1, yanchor="top"),
showactive=False, x=0.54, xanchor="left", y=1.1, yanchor="top"),
dict(
buttons=[
geo_helper.button_dict(
df_geo['Deaths.total.per100k'], 'Deaths<br>per 100k', colorscale='Reds',
colorbar_title='Deaths / 100k',
subtitle='Total deaths per 100k population'),
geo_helper.button_dict(
df_geo['Deaths.total'], 'Deaths<br>Total', colorscale='Reds',
colorbar_title='Deaths',
subtitle='Total deaths'),
geo_helper.button_dict(
df_geo['Deaths.new.per100k'], 'Recent deaths<br>per 100k', colorscale='Reds',
colorbar_title='Deaths / 100k',
subtitle='Recent deaths in last 5 days per 100k population'),
geo_helper.button_dict(
df_geo['Deaths.new'], 'Recent deaths<br>total', colorscale='Reds',
colorbar_title='Deaths',
subtitle='Recent deaths in last 5 days'),
geo_helper.button_dict(
df_geo['lagged_fatality_rate'], 'Fatality rate %<br>(lagged)',
colorscale='Reds', scale_max=20, percent=True,
subtitle='Reported fatality rate (relative to reported cases 8 days ago)'),
df_geo['current_testing_bias'], 'Current testing<br>bias',
colorscale='YlOrRd', scale_max=20, percent=False,
colorbar_title='Testing bias',
subtitle='Current testing bias'),
],
direction="down", bgcolor='#efdbda',
pad={"r": 10, "t": 10},
showactive=False, x=0.69, xanchor="left", y=1.1, yanchor="top"),
showactive=False, x=0.715, xanchor="left", y=1.1, yanchor="top"),
]);
# # World map (interactive)
# > Includes only countries with at least 1000 reported cases or at least 20 reported deaths.
# > Hover mouse over map for detailed information.
#
# - Details of estimation and prediction calculations are in [Appendix](#appendix) and in [Tables](#tables), as well as [Plots of model predictions](#examples).
# - New cases and new deaths refer to cases or deaths in the last 5 days.
# - Recent cases and Recent deaths refer to cases or deaths in the last 5 days.
# > Tip: Select columns to show on map to from the dropdown menus. The map is zoomable and draggable.
@ -251,8 +292,8 @@ style_icu_table(df[(df['transmission_rate'] < 0.05) & (df['needICU.per100k'] > 0
#hide_input
df_alt = pd.concat([d.reset_index() for d in debug_dfs], axis=0)
df_alt_filt = df_alt[(df_alt['day'] > -60) & (df_alt['country'].isin(df.index))]
overview_helpers.altair_sir_plot(df_alt_filt, df['Deaths.new.per100k'].idxmax())
df_alt_filt = df_alt[(df_alt['day'] > -120) & (df_alt['country'].isin(df.index))]
covid_helpers.altair_sir_plot(df_alt_filt, df['Deaths.new.per100k'].idxmax())
# ## Projected Affected Population percentages
# > Top 20 countries with most estimated recent cases. Sorted by number of estimated recent cases during the last 5 days. More details in [Appendix](#appendix).
@ -266,7 +307,7 @@ cols = {'Cases.new.est': 'Estimated <br> <i>recent</i> cases<br>during<br>last 5
'transmission_rate': 'Estimated<br>daily<br>transmission<br>rate',
'affected_ratio.est.+14d': 'Projected<br><i>total</i><br>affected<br>percentage<br>In 14 days',
'affected_ratio.est.+30d': 'Projected<br><i>total</i><br>affected<br>percentage<br>In 30 days',
'lagged_fatality_rate': 'Lagged<br>fatality <br> rate',
'current_testing_bias': 'Current<br>testing<br>bias',
}
df_show = stylers.country_index_emoji_link(df_cases, font_size=1)
df_show['affected_ratio.est.+14d'] = stylers.with_errs_ratio(
@ -285,10 +326,12 @@ df_show = df_show[cols.keys()].rename(columns=cols)
s_v=df_cases['transmission_rate']/0.33, subset=cols['transmission_rate'])
.bar(subset=cols['Cases.new.est'], color='#b57b17')
.bar(subset=cols['affected_ratio.est'], color='#5dad64', vmin=0, vmax=1.0)
.bar(subset=cols['lagged_fatality_rate'], color='#420412', vmin=0, vmax=0.2)
.applymap(lambda _: 'color: red', subset=cols['lagged_fatality_rate'])
.bar(subset=cols['current_testing_bias'], color='#420412', vmin=1, vmax=20)
.applymap(lambda _: 'color: red', subset=cols['current_testing_bias'])
.format('<b>{:,.0f}</b>', subset=cols['Cases.new.est'])
.format('<b>{:.1%}</b>', subset=[cols['lagged_fatality_rate'], cols['affected_ratio.est']]))
.format('<b>{:.1%}</b>', subset=cols['affected_ratio.est'])
.format('<b>{:.1f}</b>', subset=cols['current_testing_bias'])
)
# -
# <a id='methodology'></a>
@ -303,9 +346,9 @@ df_show = df_show[cols.keys()].rename(columns=cols)
# - Where the rate estimated from [Total Outstanding Cases](https://covid19dashboards.com/outstanding_cases/#Appendix:-Methodology-of-Predicting-Recovered-Cases) is too high (on down-slopes) recovery probability if 1/20 is used (equivalent 20 days to recover).
# - Total cases are estimated from the reported deaths for each country:
# - Each country has a different testing policy and capacity and cases are under-reported in some countries. Using an estimated IFR (fatality rate) we can estimate the number of cases some time ago by using the total deaths until today.
# - IFRs for each country is estimated using the age adjusted IFRs from [May 1 New York paper](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3590771) and [UN demographic data for 2020](https://population.un.org/wpp/Download/Standard/Population/). These IFRs can be found in `df['age_adjusted_ifr']` column. Some examples: US - 0.98%, UK - 1.1%, Qatar - 0.25%, Italy - 1.4%, Japan - 1.6%.
# - IFRs for each country is estimated using the age adjusted IFRs from [International IFRS study](https://www.nature.com/articles/s41586-020-2918-0#MOESM1) and [UN demographic data for 2020](https://population.un.org/wpp/Download/Standard/Population/). These IFRs can be found in `df['age_adjusted_ifr']` column.
# - The average fatality lag is assumed to be 8 days on average for a case to go from being confirmed positive (after incubation + testing lag) to death. This is the same figure used by ["Estimating The Infected Population From Deaths"](https://covid19dashboards.com/covid-infected/).
# - Testing bias adjustment: the actual lagged fatality rate is than divided by the IFR to estimate the testing bias in a country. The estimated testing bias then multiplies the reported case numbers to estimate the *true* case numbers (*=case numbers if testing coverage was as comprehensive as in the heavily tested countries*).
# - **Testing bias adjustment**: the actual lagged case fatality rate is then divided by the age adjusted IFR to estimate the testing bias in a country. To account for testing bias changes (e.g. increased testing capacity) this is done on a rolling window basis of two months (with at least 300 deaths). The estimated testing bias then multiplies the reported case numbers for each date to estimate the *true* case numbers (*=case numbers that would be consistent with the deaths and the age adjusted IFR*).
# - ICU need is calculated and age-adjusted as follows:
# - UK ICU ratio was reported as [4.4% of active reported cases](https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf).
# - Using UKs ICU ratio, UK's testing bias, and IFRs corrected for age demographics we can estimate each country's ICU ratio (the number of cases requiring ICU hospitalisation).

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -31,7 +31,7 @@
# +
#hide
import pandas as pd
import overview_helpers as covid_helpers
import covid_helpers as covid_helpers
stylers = covid_helpers.PandasStyling
@ -71,7 +71,7 @@ df_data['Deaths.diff.per100k'] = df_data['Deaths.total.diff'] / (df_data['popula
df_data['transmission_rate.change'] = (df_data['transmission_rate'] / df_data['transmission_rate_past']) - 1
df_data['affected_ratio.miss'] = (df_cur['affected_ratio.est'] / df_past['affected_ratio.est.+9d']) - 1
df_data['needICU.per100k.miss'] = (df_cur['needICU.per100k'] / df_past['needICU.per100k.+9d']) - 1
df_data['testing_bias.change'] = (df_data['testing_bias'] / df_past['testing_bias']) - 1
df_data['testing_bias.change'] = (df_data['current_testing_bias'] / df_past['current_testing_bias']) - 1
# -
@ -103,7 +103,8 @@ def style_news_infections(df):
.apply(stylers.add_bar, color='#f49d5a',
s_v=df['transmission_rate'] / rate_norm, subset=cols['transmission_rate'])
.apply(stylers.add_bar, color='#d8b193',
s_v=df['transmission_rate_past'] / rate_norm, subset=cols['transmission_rate_past'])
s_v=df['transmission_rate_past'] / rate_norm,
subset=cols['transmission_rate_past'])
.format('<b>{:.2f}</b>', subset=[cols['needICU.per100k']])
.format('<b>{:,.0f}</b>', subset=cols['Cases.new.est'])
.format('<b>{:.1%}</b>', subset=[cols['affected_ratio.est'],
@ -136,7 +137,6 @@ style_news_infections(df_data.loc[new_waves])
# +
# hide
df_alt_all = pd.concat([d.reset_index() for d in debug_dfs], axis=0)
def infected_plots(countries, title):
return covid_helpers.altair_multiple_countries_infected(
df_alt_all, countries=countries, title=title, marker_day=day_diff)
@ -185,21 +185,21 @@ def style_news_icu(df):
'Cases.new.est': 'Estimated<br><i>recent</i> cases<br> in last 5 days',
'transmission_rate': 'Estimated<br>daily<br>transmission<br>rate',
'affected_ratio.est': 'Estimated <br><i>total</i><br>affected<br>population<br>percentage',
}
}
df_show = stylers.country_index_emoji_link(df)[cols.keys()].rename(columns=cols)
return (df_show.style
.bar(subset=cols['needICU.per100k'], color='#b21e3e', vmin=0, vmax=10)
.bar(subset=cols['needICU.per100k_past'], color='#c67f8e', vmin=0, vmax=10)
.bar(subset=cols['Cases.new.est'], color='#b57b17', vmin=0)
.bar(subset=cols['affected_ratio.est'], color='#5dad64', vmin=0, vmax=1.0)
.apply(stylers.add_bar, color='#f49d5a',
s_v=df['transmission_rate'] / df['transmission_rate'].max(),
subset=cols['transmission_rate'])
.format('<b>{:.2f}</b>', subset=[cols['needICU.per100k'], cols['needICU.per100k_past']])
.format('<b>{:,.0f}</b>', subset=cols['Cases.new.est'])
.format('<b>{:.1%}</b>', subset=[cols['affected_ratio.est'],
cols['transmission_rate']]))
.bar(subset=cols['needICU.per100k'], color='#b21e3e', vmin=0, vmax=10)
.bar(subset=cols['needICU.per100k_past'], color='#c67f8e', vmin=0, vmax=10)
.bar(subset=cols['Cases.new.est'], color='#b57b17', vmin=0)
.bar(subset=cols['affected_ratio.est'], color='#5dad64', vmin=0, vmax=1.0)
.apply(stylers.add_bar, color='#f49d5a',
s_v=df['transmission_rate']/df['transmission_rate'].max(),
subset=cols['transmission_rate'])
.format('<b>{:.2f}</b>', subset=[cols['needICU.per100k'], cols['needICU.per100k_past']])
.format('<b>{:,.0f}</b>', subset=cols['Cases.new.est'])
.format('<b>{:.1%}</b>', subset=[cols['affected_ratio.est'],
cols['transmission_rate']]))
# hide
@ -266,13 +266,13 @@ def style_no_news(df):
'Deaths.total': 'Total<br>reported<br>deaths',
'last_case_date': 'Date<br>of last<br>reported case',
'last_death_date': 'Date<br>of last<br>reported death',
}
}
df_show = stylers.country_index_emoji_link(df)[cols.keys()].rename(columns=cols)
return (df_show.style
.format('<b>{:,.0f}</b>', subset=[cols['Cases.total.est'], cols['Deaths.total']]))
.format('<b>{:,.0f}</b>', subset=[cols['Cases.total.est'], cols['Deaths.total']]))
# hide
#hide
significant_past = ((df_past['Cases.total.est'] > 1000) & (df_past['Deaths.total'] > 10))
active_in_past = ((df_past['Cases.new'] > 0) | (df_past['Deaths.new'] > 0))
no_cases_filt = ((df_cur['Cases.total'] - df_past['Cases.total']) == 0)
@ -350,8 +350,7 @@ def style_death_burden(df):
return (df_show.style
.bar(subset=cols['needICU.per100k'], color='#b21e3e', vmin=0, vmax=10)
.bar(subset=cols['Deaths.new.per100k'], color='#7b7a7c', vmin=0, vmax=death_norm)
.bar(subset=cols['Deaths.new.per100k.past'], color='#918f93', vmin=0,
vmax=death_norm)
.bar(subset=cols['Deaths.new.per100k.past'], color='#918f93', vmin=0, vmax=death_norm)
.bar(subset=cols['Deaths.total.diff'], color='#6b595d', vmin=0)
.bar(subset=cols['affected_ratio.est'], color='#5dad64', vmin=0, vmax=1.0)
.format('<b>{:.0f}</b>', subset=[cols['Deaths.total.diff'],
@ -365,10 +364,10 @@ def style_death_burden(df):
# hide
death_change_ratio = df_data['Deaths.new.per100k'] / df_data['Deaths.new.per100k.past']
filt = (
(df_data['Deaths.new'] > 10) &
(df_data['Deaths.new.past'] > 10) &
(df_data['Deaths.new.per100k'] > 0.1) &
(death_change_ratio > 2))
(df_data['Deaths.new'] > 10) &
(df_data['Deaths.new.past'] > 10) &
(df_data['Deaths.new.per100k'] > 0.1) &
(death_change_ratio > 2))
higher_death_burden = df_data[filt]['Deaths.diff.per100k'].sort_values(ascending=False).index
# hide_input
@ -387,10 +386,10 @@ infected_plots(higher_death_burden, "Countries with higher death burden (vs. 10
# hide
filt = (
(df_data['Deaths.new'] > 10) &
(df_data['Deaths.new.past'] > 10) &
(df_data['Deaths.new.per100k.past'] > 0.1) &
(death_change_ratio < 0.5))
(df_data['Deaths.new'] > 10) &
(df_data['Deaths.new.past'] > 10) &
(df_data['Deaths.new.per100k.past'] > 0.1) &
(death_change_ratio < 0.5))
lower_death_burden = df_data[filt]['Deaths.diff.per100k'].sort_values(ascending=False).index
# hide_input
@ -428,12 +427,6 @@ df_alt_filt = df_alt_all[(df_alt_all['day'] > -60) &
(df_alt_all['country'].isin(news_countries))]
covid_helpers.altair_sir_plot(df_alt_filt, new_waves[0])
# ## Future World projections (all countries stacked)
# The outputs of the models for all countries in stacked plots.
# > Tip: Hover the mouse of the area to see which country is which and the countries S/I/R ratios at that point.
#
# > Tip: The plots are zoomable and draggable.
#hide
df_tot = df_alt_all.rename(columns={'country': cur_data.COL_REGION}
).set_index(cur_data.COL_REGION)
@ -443,10 +436,28 @@ for c in df_tot.columns[df_alt_all.dtypes == float]:
df_tot = df_tot.reset_index()
df_tot.columns = [c.replace('.', '-') for c in df_tot.columns]
#hide_input
df_now = df_tot[df_tot['day'] == 0]
pop = df_now['population'].sum()
s_now = df_now['Susceptible-total'].sum() / pop
i_now = df_now['Infected-total'].sum() / pop
r_now = df_now['Removed-total'].sum() / pop
Markdown("## World totals:\n"
f"Infected &#128567;: **{i_now:.1%}**, "
f"Removed &#128532;: **{r_now:.1%}**, "
f"Susceptible &#128543;: **{s_now:.1%}**")
# ## Future World projections (all countries stacked)
# The outputs of the models for all countries in stacked plots.
# > Tip: Hover the mouse of the area to see which country is which and the countries S/I/R ratios at that point.
#
# > Tip: The plots are zoomable and draggable.
# +
#hide
# filter by days
df_tot = df_tot[(df_tot['day'].between(-30, 30) & (df_tot['day'] % 3 == 0)) | (df_tot['day'] % 10 == 0)]
days = 30
df_tot = df_tot[df_tot['day'].between(-days, days) | (df_tot['day'] % 10 == 0)]
# filter out noisy countries for actively infected plot:
df_tot_filt = df_tot[df_tot[cur_data.COL_REGION].isin(df_cur.index.unique())]
@ -465,11 +476,12 @@ today_line = (alt.Chart(pd.DataFrame({'x': [0]}))
.encode(x='x', size=alt.value(1)))
# make plot
max_y = df_tot_filt[df_tot_filt['day']==30]['Infected-total'].sum()
max_y = (df_tot_filt[df_tot_filt['day'].between(-days, days)]
.groupby('day')['Infected-total'].sum().max())
stacked_inf = alt.Chart(df_tot_filt).mark_area().encode(
x=alt.X('day:Q',
title=f'days relative to today ({cur_data.cur_date})',
scale=alt.Scale(domain=(-30, 30))),
scale=alt.Scale(domain=(-days, days))),
y=alt.Y("Infected-total:Q", stack=True, title="Number of people",
scale=alt.Scale(domain=(0, max_y))),
color=alt.Color("Country/Region:N", legend=None),
@ -485,11 +497,11 @@ stacked_inf = alt.Chart(df_tot_filt).mark_area().encode(
# +
#hide_input
max_y = df_tot_filt[df_tot_filt['day']==30]['Removed-total'].sum()
max_y = df_tot_filt[df_tot_filt['day']==days]['Removed-total'].sum()
stacked_rem = alt.Chart(df_tot_filt).mark_area().encode(
x=alt.X('day:Q',
title=f'days relative to today ({cur_data.cur_date})',
scale=alt.Scale(domain=(-30, 30))),
scale=alt.Scale(domain=(-days, days))),
y=alt.Y("Removed-total:Q", stack=True, title="Number of people",
scale=alt.Scale(domain=(0, max_y))),
color=alt.Color("Country/Region:N", legend=None),

Просмотреть файл

@ -0,0 +1,253 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.6.0
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# # Risk of deadly infection by age (for the unvaccinated).
# > Monthly risk of death due to COVID-19 infection for unvaccinated or not previosly infected. Mapped by country and age.
#
# - permalink: /micromorts-risk/
# - image: images/micromorts.png
# - author: <a href=https://github.com/artdgn/>artdgn</a>
# - categories: [world, overview, interactive, risk]
# - toc: false
# - hide: false
# > Important: This page contains estimations that were not calculated by an epidemiologist.
# +
#hide
import pandas as pd
try: # using in REPL
from . import covid_helpers
except ImportError:
import covid_helpers
covid_data = covid_helpers.CovidData()
df_all, _, _ = covid_data.table_with_current_rates_and_ratios()
# -
#hide
df_all.columns.sort_values()
#hide_input
from IPython.display import Markdown
Markdown(f"*Based on data up to*: ***{covid_data.cur_date}***")
#hide
df_all['daily_infection_chance'] = (
df_all['transmission_rate'] * df_all['current_active_ratio'] /
(1 - df_all['current_active_ratio'] - df_all['current_recovered_ratio']))
df_all['monthly_infection_chance'] = 1 - (1 - df_all['daily_infection_chance']) ** 30
df_all['monthly_deadly_infection_risk'] = (
df_all['monthly_infection_chance'] * df_all['age_adjusted_ifr'])
df_all['monthly_average_micromorts'] = df_all['monthly_deadly_infection_risk'] * 1e6
df_all['monthly_population_risk'] = df_all['monthly_deadly_infection_risk'] * df_all['population']
#hide
# retrospective empirical risk from recent deaths
df_all['daily_recent_empirical_risk'] = df_all['Deaths.new.per100k'] / 1e5
df_all['monthly_recent_empirical_risk'] = 1 - (1 - df_all['daily_recent_empirical_risk']) ** (30 / 5)
df_all['monthly_recent_empirical_micromorts'] = df_all['monthly_recent_empirical_risk'] * 1e6
df_all['monthly_population_empirical_risk'] = df_all['monthly_recent_empirical_risk'] * df_all['population']
#hide
# add age specific data
ifrs = covid_helpers.AgeAdjustedData.intl_ifrs
cols = covid_helpers.AgeAdjustedData.Cols
age_ifrs = {
'0-29': ifrs.loc[cols.o4:cols.o29].mean(),
'30-44': ifrs.loc[cols.o34:cols.o44].mean(),
'45-59': ifrs.loc[cols.o49:cols.o59].mean(),
'60-64': ifrs.loc[cols.o64],
'65-69': ifrs.loc[cols.o69],
'70-74': ifrs.loc[cols.o74],
'75-79': ifrs.loc[cols.o79],
'80+': ifrs.loc[cols.o84],
}
for age_range, ifr in age_ifrs.items():
df_all[f'monthly_micromorts_{age_range}'] = 1e6 * ifr * df_all['monthly_infection_chance']
#hide
geo_helper = covid_helpers.GeoMap
df_geo = geo_helper.make_geo_df(df_all, cases_filter=1000, deaths_filter=20)
# +
#hide
def micromorts_hover_func(r: pd.Series, age_range=None):
if age_range is None:
ifr, ifr_str = r['age_adjusted_ifr'], "this country's age profile"
micromorts_col='monthly_average_micromorts'
else:
ifr, ifr_str = age_ifrs[age_range], f'age range {age_range}'
micromorts_col=f'monthly_micromorts_{age_range}'
mm = r[micromorts_col]
return (
f"<br>Risk of death due to one month<br>"
f"of exposure is comparable to:<br>"
f" - <b>{mm * 10:.0f}</b> km by Motorcycle<br>"
f" - <b>{mm * 370:.0f}</b> km by Car<br>"
f" - <b>{mm * 1600:.0f}</b> km by Plane<br>"
f" - <b>{mm / 5:.0f}</b> scuba dives<br>"
f" - <b>{mm / 8:.0f}</b> sky diving jumps<br>"
f" - <b>{mm / 430:.0f}</b> base jumping jumps<br>"
f" - <b>{mm / 12000:.0f}</b> Everest climbs<br><br>"
f"Contagious percent of population:"
f" <b>{r['current_active_ratio']:.1%}</b><br>"
f"Susceptible percent of population:"
f" <b>{(1 - r['current_active_ratio'] - r['current_recovered_ratio']):.1%}</b><br>"
f"Transmission rate: <b>{r['transmission_rate']:.1%}</b><br>"
f"Chance of infection over a month:"
f" <b>{r['monthly_infection_chance']:.1%}</b><br>"
f"Chance of death after infection<br> (for {ifr_str}):"
f" <b>{ifr:.2%}</b>"
)
def micromorts_hover_texts_for_age_range(age_range):
return df_geo.apply(micromorts_hover_func, axis=1, age_range=age_range).tolist()
def stats_hover_text_func(r: pd.Series):
return (
"<br>"
f"Cases (reported): {r['Cases.total']:,.0f} (+<b>{r['Cases.new']:,.0f}</b>)<br>"
f"Cases (estimated): {r['Cases.total.est']:,.0f} (+<b>{r['Cases.new.est']:,.0f}</b>)<br>"
f"Deaths: {r['Deaths.total']:,.0f} (+<b>{r['Deaths.new']:,.0f}</b>)<br><br>"
f"Contagious percent of population:"
f" <b>{r['current_active_ratio']:.1%}</b><br>"
f"Susceptible percent of population:"
f" <b>{(1 - r['current_active_ratio'] - r['current_recovered_ratio']):.1%}</b><br>"
f"Transmission rate: <b>{r['transmission_rate']:.1%}</b><br>"
f"Chance of infection over a month:"
f" <b>{r['monthly_infection_chance']:.1%}</b><br>"
)
# -
#hide
import functools
default_age = '60-64'
colorscale = 'RdPu'
fig = geo_helper.make_map_figure(
df_geo,
col=f'monthly_micromorts_{default_age}',
colorbar_title='Micromorts',
subtitle=f"Ages {default_age}: risk of deadly infection due to a month's exposure",
hover_text_func=functools.partial(micromorts_hover_func, age_range=default_age),
scale_max=None,
colorscale=colorscale,
err_col=None,
)
#hide
fig.update_layout(
updatemenus=[
dict(
buttons=[
geo_helper.button_dict(
df_geo[f'monthly_micromorts_{age_range}'],
title=f'<b>Ages {age_range} monthly risk in micromorts</b>',
colorbar_title='Micromorts',
colorscale=colorscale, scale_max=None, percent=False,
subtitle=f"Ages {age_range}: risk of deadly infection due to a month's exposure",
err_series=None,
hover_text_list=micromorts_hover_texts_for_age_range(age_range)
)
for age_range in reversed(list(age_ifrs.keys()))
] + [
geo_helper.button_dict(
df_geo['monthly_average_micromorts'],
title='<b>Average monthly risk in micromorts</b>',
colorbar_title='Micromorts',
colorscale=colorscale, scale_max=None, percent=False,
subtitle="Risk of deadly infection due to a month's exposure",
err_series=None,
hover_text_list=micromorts_hover_texts_for_age_range(None)
),
] + [
geo_helper.button_dict(
df_geo['monthly_infection_chance'],
title='<b>Monthly infection chance</b>',
colorbar_title='%',
colorscale='Reds', scale_max=None, percent=True,
subtitle="Chance of being infected during a month's exposure",
err_series=None,
hover_text_list=df_geo.apply(stats_hover_text_func, axis=1).tolist()
)
] + [
geo_helper.button_dict(
df_geo['monthly_population_risk'],
title='<b>Montly total population risk</b>',
colorbar_title='Possible deaths',
colorscale='amp', scale_max=None, percent=False,
subtitle="Total possible deaths due to a month's exposure",
err_series=None,
hover_text_list=df_geo.apply(stats_hover_text_func, axis=1).tolist()
)
] + [
geo_helper.button_dict(
(df_geo['monthly_average_micromorts'] /
df_geo['monthly_recent_empirical_micromorts']),
title='<b>Ratio of average monthly risk<br>to recent deaths (as risk)</b>',
colorbar_title='%',
colorscale='Bluered', scale_max=200, percent=True,
subtitle="Ratio of average monthly risk to recent deaths expressed as risk",
err_series=None,
hover_text_list=df_geo.apply(stats_hover_text_func, axis=1).tolist()
)
],
direction="down", bgcolor='#dceae1',
pad={"t": 10},
active=list(age_ifrs.keys())[::-1].index(default_age),
showactive=True, x=0.1, xanchor="left", y=1.1, yanchor="top"),
]);
# ### Use dropdown menu to select specific age range
# > <font size=2>- Hover the mouse over a country for a risk comparison to some sports and travel modes.<br>- <a href="https://en.wikipedia.org/wiki/Micromort">"Micromorts"</a> are a measure of risk equal to 1 in a Million probability of death.<br>- Risk of death calculated for the unvaccinated or not previosly infected. </font>
#hide_input
# from IPython.display import HTML
# HTML(fig.to_html())
fig.show()
# > Tip: The map is zoomable and draggable. Double click to reset.
# ### Appendix: assumptions, explanations.
# <a id='appendix'></a>
# - Monthly risk calculation:
# $$
# Monthly\,Risk = Infection\,Fatality\,Rate_{age\,group} \cdot P_{montly\,infection}\\\,\\
# P_{montly\,infection} = 1 - (1 - P_{daily\,infection}) ^ {30\,days}\\\,\\
# P_{daily\,infection} =
# \frac{Actively\,Infected\,\%\cdot Transmission\,Rate\,\%}
# {1 - Actively\,Infected\,\% - Recovered\,or\, Dead\,\%} \\
# $$
# - "Actively Infected" and "Recovered or Dead" population percentages are estimated from past deaths and cases ([See estimations appendix in estimations & projections notebook](/covid-progress-projections/#appendix)).
# - Age specific IFRs are taken from recent [Nature international meta-study of IFRs](https://www.nature.com/articles/s41586-020-2918-0#MOESM1).
# - Country demographics for country average IFRs are taken from [UN demographic data for 2020](https://population.un.org/wpp/Download/Standard/Population/).
# - Micromort deaths risk comparative data (travel and sports) are taken from [Wikipedia article on Micromorts](https://en.wikipedia.org/wiki/Micromort).
# - The calculation is done on daily basis and extrapolated naively to a month.
# - **Why is everything "monthly"**? The main actionable question this analysis aims to help answer is **"How much risk is someone taking by not getting vaccinated now? What is the risk of waiting another month?"**. A daily timescale for this question is too short due to not being actionable, and on a scale much longer than a month the underlying data for calculations will change substantially (e.g. transmission rates, currently infected population) to not offer a reasonable appoximation. So a month felt to me as roughly the right time scale for the risk aggregation that is both easy to think about and should still be roughly correct.
# - Assumptions & limitations:
# - The esposure is assumed to be **average exposure** typical of that country (as it manifests in the recent case and deaths data). Protective measures (e.g. masks) and self isolation should of course reduce the risk (if practiced more than the average for that population at that time).
# - Susceptible population is assumed to not yet be **vaccinated**. When vaccination prevalence will become substantial, data will become available, and calculations can be adjusted. The risk estimates are for **regular susceptible** population. People who have been infected already are excluded (as recovered).
# - All rates and percentages such as: transmission rate, active and recovered percentages are assumed to be **constant** during the month to keep the monthly calculation simple. This is of course NOT true. However although these rates do change, they usually change slowly enough for the likely result to still be of the same order of magnitude. It is possible to use values from a predictive model for this, but they too have errors (as they too are simplistic). For this analysis I preferred to go with the simple to calculate / understand approximation with a well understood error, than with the complex to calculate / understand approximation with an unknown error.
# - All the additional assumptions from [estimations appendix in estimations & projections notebook](/covid-progress-projections/#appendix)
# - Vaccination effect on risk:
# - The risk for the **vaccinated** is not calculated here. It is currently widely assumed that the reported [Moderna](https://en.wikipedia.org/wiki/MRNA-1273) and [Pfizer-BioNTech](https://en.wikipedia.org/wiki/Tozinameran) might reduce the **chance of infection** by around **90%**.
# - While there are well founded estimates for the effect on **infection chance**, the effect on IFR (fatality rate) is much less known: how does vaccination affect the severity of the desease *if* infected? Answering this will require studying millions of vaccinated people, so will only be available later.
# - Additional related analyses:
# - Another map of statistics of cases, deaths, ICU need and affected population percentage can be explored in [world-map part of the estimations & projections notebook](/covid-progress-projections/#World-map-(interactive))
# - Per country predictive models of population ratios can be explored in [trajectories plots in estimations & projections notebook](/covid-progress-projections/#Interactive-plot-of-model-predictions-and-past-data)
# ![](https://artdgn.goatcounter.com/count?p=c19d-morts)

Просмотреть файл

@ -1,5 +1,6 @@
import os
import re
from typing import Tuple, List
from urllib import request
import numpy as np
@ -91,13 +92,33 @@ class AgeAdjustedData:
o99 = '95-99'
o100p = '100+'
# https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3590771
# ny = new york
ny17 = 'ny17' # 0-17
ny44 = 'ny44' # 18-44
ny64 = 'ny64' # 45-64
ny74 = 'ny74' # 65-74
ny75p = 'ny75p' # 75+
# paper: https://www.nature.com/articles/s41586-020-2918-0#MOESM1
# table S3 from supplementary material:
# https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2918-0/MediaObjects/41586_2020_2918_MOESM1_ESM.pdf
intl_ifrs = pd.Series({
Cols.o4: 0.003,
Cols.o9: 0.001,
Cols.o14: 0.001,
Cols.o19: 0.003,
Cols.o24: 0.006,
Cols.o29: 0.013,
Cols.o34: 0.024,
Cols.o39: 0.040,
Cols.o44: 0.075,
Cols.o49: 0.121,
Cols.o54: 0.207,
Cols.o59: 0.323,
Cols.o64: 0.456,
Cols.o69: 1.075,
Cols.o74: 1.674,
Cols.o79: 3.203,
Cols.o84: 8.292, # 80+ is a single bucket in that paper
Cols.o89: 8.292,
Cols.o94: 8.292,
Cols.o99: 8.292,
Cols.o100p: 8.292,
})
intl_ifrs *= 0.01 # convert from percent to ratio
@classmethod
def load(cls):
@ -145,30 +166,8 @@ class AgeAdjustedData:
# convert to ratios
df_pct = (df_num.T / df_num.sum(1)).T
# calulate NY bucket percentages
cols = cls.Cols
df_pct[cols.ny17] = df_pct[[cols.o4, cols.o9,
cols.o14, cols.o19]].sum(1)
df_pct[cols.ny44] = df_pct[[cols.o24, cols.o29,
cols.o34, cols.o39,
cols.o44]].sum(1)
df_pct[cols.ny64] = df_pct[[cols.o49,
cols.o54, cols.o59,
cols.o64]].sum(1)
df_pct[cols.ny74] = df_pct[[cols.o69, cols.o74]].sum(1)
df_pct[cols.ny75p] = df_pct[[cols.o79,
cols.o84, cols.o89,
cols.o94, cols.o99,
cols.o100p]].sum(1)
# check: df_pct[[cols.ny17, cols.ny44, cols.ny64, cols.ny74, cols.ny75p]].sum(1)
# calculate IFR
# https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3590771
# Table 1
ifr_s = pd.Series(np.dot(df_pct
[[cols.ny17, cols.ny44, cols.ny64, cols.ny74, cols.ny75p]],
[0.00002, 0.00087, 0.00822, 0.02626, 0.07137]),
index=df_pct.index)
ifr_s = pd.Series(np.dot(df_pct, cls.intl_ifrs), index=df_pct.index)
## icu need estimation
## https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf
@ -288,16 +287,17 @@ class EmojiFlags(ScrapedTableBase):
class CovidData:
COL_REGION = COL_REGION
ABS_COLS = ['Cases.total', 'Deaths.total', 'Cases.new', 'Deaths.new']
CASES_TOT = 'Cases.total'
CASES_NEW = 'Cases.new'
DEATHS_TOT = 'Deaths.total'
DEATHS_NEW = 'Deaths.new'
PER_100K_COLS = [f'{c}.per100k' for c in ABS_COLS]
CASES_COLS = ABS_COLS[::2] + PER_100K_COLS[::2]
EST_COLS = [f'{c}.est' for c in CASES_COLS]
PER_100K_SUFFIX = '.per100k'
dft_cases = SourceData.get_covid_dataframe('confirmed')
dft_deaths = SourceData.get_covid_dataframe('deaths')
dft_recovered = SourceData.get_covid_dataframe('recovered')
dt_cols_all = SourceData.get_dates(dft_cases)
dft_cases_raw = SourceData.get_covid_dataframe('confirmed')
dft_deaths_raw = SourceData.get_covid_dataframe('deaths')
# dft_recovered = SourceData.get_covid_dataframe('recovered')
dt_cols_all = SourceData.get_dates(dft_cases_raw)
cur_date = pd.to_datetime(dt_cols_all[-1]).date().isoformat()
@ -307,19 +307,17 @@ class CovidData:
## testing bias
death_lag = 8
## ICU spare capacity
# occupancy 66% for us:
# https://www.sccm.org/Blog/March-2020/United-States-Resource-Availability-for-COVID-19
# occupancy average 75% for OECD:
# https://www.oecd-ilibrary.org/social-issues-migration-health/health-at-a-glance-2019_4dd50c09-en
icu_spare_capacity_ratio = 0.3
def __init__(self, days_offset=0):
assert days_offset <= 0, 'day_offest can only be 0 or negative (in the past)'
self.dt_cols = self.dt_cols_all[:(len(self.dt_cols_all) + days_offset)]
self.dft_cases_backfilled = self._cases_with_backfilled_unreported_days()
self.dft_cases_backfilled = self._cases_with_backfilled_unreported_days()[self.dt_cols]
self.dft_deaths = self.dft_deaths_raw.groupby(COL_REGION).sum()[self.dt_cols]
self.dfc_cases = self.dft_cases_backfilled[self.dt_cols[-1]]
self.dfc_deaths = self.dft_deaths.groupby(COL_REGION)[self.dt_cols[-1]].sum()
self.dfc_deaths = self.dft_deaths[self.dt_cols[-1]]
# to be calculated later
self.testing_biases_dft: pd.DataFrame = None
self.cases_est_dft: pd.DataFrame = None
def _cases_with_backfilled_unreported_days(self):
@ -363,7 +361,7 @@ class CovidData:
return pd.Series(out, index=series.index)
cases = self.dft_cases.groupby(self.COL_REGION).sum()[self.dt_cols_all]
cases = self.dft_cases_raw.groupby(self.COL_REGION).sum()[self.dt_cols_all]
diffs = cases.diff(axis=1)
diffs.iloc[:, 0] = cases.iloc[:, 0] # replace resulting nans in first date's data
@ -375,7 +373,7 @@ class CovidData:
return self.dft_cases_backfilled[self.dt_cols[-lag]]
def lagged_deaths(self, lag=PREV_LAG):
return self.dft_deaths.groupby(COL_REGION)[self.dt_cols[-lag]].sum()
return self.dft_deaths[self.dt_cols[-lag]]
def add_last_dates(self, df):
@ -386,9 +384,9 @@ class CovidData:
else:
return float('nan')
df['last_case_date'] = (self.dft_cases.groupby(COL_REGION).sum().diff(axis=1)
df['last_case_date'] = (self.dft_cases_raw.groupby(COL_REGION).sum().diff(axis=1)
.apply(last_date, axis=1))
df['last_death_date'] = (self.dft_deaths.groupby(COL_REGION).sum().diff(axis=1)
df['last_death_date'] = (self.dft_deaths_raw.groupby(COL_REGION).sum().diff(axis=1)
.apply(last_date, axis=1))
return df
@ -400,9 +398,11 @@ class CovidData:
.sort_values(by=['Cases.total', 'Deaths.total'], ascending=[False, False])
.reset_index())
df_table.rename(columns={'index': COL_REGION}, inplace=True)
for c in self.ABS_COLS[:2]:
df_table[c.replace('total', 'new')] = (df_table[c] - df_table[f'{c}.prev']).clip(0) # DATA BUG
df_table['Fatality Rate'] = (100 * df_table['Deaths.total'] / df_table['Cases.total']).round(1)
for c in [self.CASES_TOT, self.DEATHS_TOT]:
df_table[c.replace('total', 'new')] = (
df_table[c] - df_table[f'{c}.prev']).clip(0) # DATA BUG
df_table['Fatality Rate'] = (100 * df_table['Deaths.total'] /
df_table['Cases.total']).round(1)
df_table['Continent'] = df_table[COL_REGION].map(SourceData.mappings['map.continent'])
# remove problematic
@ -426,22 +426,83 @@ class CovidData:
.sort_values('Cases.new', ascending=False))
df['Fatality Rate'] /= 100
# add emoji flags
df['emoji_flag'] = EmojiFlags.load().set_index(COL_REGION)[EmojiFlags.emoji_col]
df['emoji_flag'] = df['emoji_flag'].fillna('')
# last dates
df = self.add_last_dates(df)
# age adjusted data
(df['age_adjusted_ifr'],
df['population'],
df['age_adjusted_icu_percentage']) = AgeAdjustedData.load()
# add per population columns
df.dropna(subset=['population'], inplace=True)
for col in [self.CASES_TOT, self.DEATHS_TOT, self.CASES_NEW, self.DEATHS_NEW]:
df[f'{col}{self.PER_100K_SUFFIX}'] = df[col] * 1e5 / df['population']
for col, per_100k_col in zip(self.ABS_COLS, self.PER_100K_COLS):
df[per_100k_col] = df[col] * 1e5 / df['population']
# add ICU capacity data
df_beds = self.beds_df()
df['icu_capacity_per100k'] = df_beds['icu_per_100k']
return df
def calculate_testing_biases_dft(
self, ifrs: pd.Series, min_window_lag = 60, min_window_deaths = 300
) -> pd.DataFrame:
deaths_dft = self.dft_deaths
cases_dft = self.dft_cases_backfilled
def biases_vec(country: str) -> pd.Series:
d_vec = deaths_dft.loc[country].values
c_vec = cases_dft.loc[country].values
ifr = ifrs.loc[country]
left, right = self.death_lag, self.death_lag + min_window_lag
biases = np.ones_like(c_vec)
# short circuit and fallback if not enough data for windowed calculations
if d_vec[-1] < min_window_deaths:
if d_vec[-1] > 0:
biases[:] = (d_vec[-1] / c_vec[-1]) / ifr
else:
pass # just return ones
else:
def diff_deaths(right, left):
return d_vec[right] - d_vec[left]
def diff_cases(right, left):
return c_vec[right - self.death_lag] - c_vec[left - self.death_lag]
while right <= (len(c_vec) - 1):
if ((right - left) < min_window_lag or
diff_deaths(right, left) < min_window_deaths):
# grow window to the right if needed
right += 1
continue
while ((right - left) > min_window_lag and
diff_deaths(right, left) > min_window_deaths):
# shrink window from the left if possible
left += 1
biases[right] = ((diff_deaths(right, left) / diff_cases(right, left))
/ ifr)
# advance left every time to prevent infinite loop
left += 1
# use first non 1 (initialised) value to fill the initial values
fill_ind = np.where(biases != 1)[0][0]
biases[:fill_ind] = biases[fill_ind]
return pd.Series(biases, index=self.dt_cols)
testing_biases_dft = ifrs.index.to_series().apply(biases_vec)
testing_biases_dft[testing_biases_dft < 1] = 1
return testing_biases_dft
def table_with_estimated_cases(self):
"""
Assumptions:
@ -453,29 +514,27 @@ class CovidData:
didn't change significantly during the last 8 days.
- Recent new cases can be adjusted using the same testing_ratio bias.
"""
df = self.overview_table_with_extra_data()
lagged_mortality_rate = (self.dfc_deaths + 1) / (self.lagged_cases(self.death_lag) + 2)
testing_bias = lagged_mortality_rate / df['age_adjusted_ifr']
testing_bias[testing_bias < 1] = 1
self.testing_biases_dft = self.calculate_testing_biases_dft(
df['age_adjusted_ifr'])
df['lagged_fatality_rate'] = lagged_mortality_rate
df['testing_bias'] = testing_bias
# adjust daily cases by closest approximation of testing bias at that point
cases_dft = self.dft_cases_backfilled
self.cases_est_dft = (cases_dft.diff(axis=1) * self.testing_biases_dft
).cumsum(axis=1).fillna(0).astype(int)
for col, est_col in zip(self.CASES_COLS, self.EST_COLS):
df[est_col] = df['testing_bias'] * df[col]
df['current_testing_bias'] = self.testing_biases_dft.iloc[:, -1]
return df.sort_values('Cases.new.est', ascending=False)
# total cases
df[f'{self.CASES_TOT}.est'] = self.cases_est_dft[self.dt_cols[-1]]
df[f'{self.CASES_TOT}{self.PER_100K_SUFFIX}.est'] = (
df[f'{self.CASES_TOT}.est'] * 1e5 / df['population'])
def table_with_icu_capacities(self):
df = self.table_with_estimated_cases()
# new cases just need adjustments with current bias
for col in [self.CASES_NEW, f'{self.CASES_NEW}{self.PER_100K_SUFFIX}']:
df[f'{col}.est'] = df['current_testing_bias'] * df[col]
df_beds = self.beds_df()
df['icu_capacity_per100k'] = df_beds['icu_per_100k']
df['icu_spare_capacity_per100k'] = df['icu_capacity_per100k'] * self.icu_spare_capacity_ratio
return df
@classmethod
@ -494,9 +553,9 @@ class CovidData:
def smoothed_growth_rates(self, n_days):
recent_dates = self.dt_cols[-n_days:]
cases = (self.dft_cases_backfilled[recent_dates] + 1) # with pseudo counts
cases = self.cases_est_dft[recent_dates] + 1 # with pseudo counts
diffs = self.dft_cases_backfilled.diff(axis=1)[recent_dates]
diffs = self.cases_est_dft.diff(axis=1)[recent_dates]
diffs[diffs < 0] = 0 # total cases cannot go down
cases, diffs = cases.T, diffs.T # broadcasting works correctly this way
@ -515,8 +574,9 @@ class CovidData:
return weighted_mean - 1, weighted_std
def table_with_projections(self, projection_days=(7, 14, 30), debug_dfs=False):
df = self.table_with_icu_capacities()
def table_with_current_rates_and_ratios(
self) -> Tuple[pd.DataFrame, List[pd.Series], List[pd.Series]]:
df = self.table_with_estimated_cases()
df['affected_ratio'] = df['Cases.total'] / df['population']
@ -524,12 +584,21 @@ class CovidData:
past_active, past_recovered = self._calculate_recovered_and_active_until_now(df)
df['transmission_rate'], df['transmission_rate_std'] = Model.growth_to_infection_rate(
df['current_active_ratio'] = past_active[-1].fillna(0)
df['current_recovered_ratio'] = past_recovered[-1].fillna(0)
df['transmission_rate'], df['transmission_rate_std'] = Model.growth_to_transmission_rate(
growth=df['growth_rate'],
rec=past_recovered[-1],
act=past_active[-1],
rec=df['current_recovered_ratio'],
act=df['current_active_ratio'],
growth_std=df['growth_rate_std'])
return df, past_active, past_recovered
def table_with_projections(self, projection_days=(7, 14, 30), debug_dfs=False):
df, past_active, past_recovered = self.table_with_current_rates_and_ratios()
df, traces = Model.run_model_forward(
df,
past_active=past_active.copy(),
@ -546,9 +615,8 @@ class CovidData:
return df
def _calculate_recovered_and_active_until_now(self, df):
# estimated daily cases ratio of population
lagged_cases_ratios = (self.dft_cases_backfilled[self.dt_cols].T *
df['testing_bias'].T / df['population'].T).T
# estimated daily cases ratios of population
lagged_cases_ratios = (self.cases_est_dft[self.dt_cols].T / df['population'].T).T
# protect from testing bias over-inflation
lagged_cases_ratios[lagged_cases_ratios > 1] = 1
@ -557,12 +625,18 @@ class CovidData:
actives, recs = [], []
zeros_series = lagged_cases_ratios[self.dt_cols[0]] * 0 # this is to have consistent types
for day in range(len(self.dt_cols)):
# previous day
prev_rec = recs[day - 1] if day > 0 else zeros_series
# lagged recoveries
tot_lagged_9 = lagged_cases_ratios[self.dt_cols[day - 9]] if day >= 9 else zeros_series
new_recs = prev_rec + (tot_lagged_9 - prev_rec) * Model.recovery_lagged9_rate
new_recs[new_recs > 1] = 1
# clip recoveries by current cases
cur_cases = lagged_cases_ratios[self.dt_cols[day]]
new_recs[new_recs > cur_cases] = cur_cases[new_recs > cur_cases]
new_actives = cur_cases - new_recs
# assign
recs.append(new_recs)
actives.append(lagged_cases_ratios[self.dt_cols[day]] - new_recs)
actives.append(new_actives)
return actives, recs
@ -581,7 +655,7 @@ class Model:
projection_days,
):
sus, act, rec = cls._run_sir_mode(
sus, act, rec = cls._run_sir_model(
past_recovered, past_active, df['growth_rate'], n_days=projection_days[-1])
# sample more growth rates
@ -592,7 +666,7 @@ class Model:
for ratio in np.linspace(-1, 1, 10):
pert_growth = df['growth_rate'] + ratio * df['growth_rate_std']
pert_growth[pert_growth < 0] = 0
sus_other, act_other, rec_other = cls._run_sir_mode(
sus_other, act_other, rec_other = cls._run_sir_model(
past_recovered, past_active, pert_growth, n_days=projection_days[-1])
for s_list, s in zip(sus_lists, sus_other):
s_list.append(s)
@ -636,7 +710,7 @@ class Model:
return df, traces
@classmethod
def growth_to_infection_rate(cls, growth, rec, act, growth_std=None):
def growth_to_transmission_rate(cls, growth, rec, act, growth_std=None):
daily_delta = growth
tot = rec + act
active = act
@ -662,10 +736,10 @@ class Model:
return infect_rate, infect_std
@classmethod
def _run_sir_mode(cls, past_rec, past_act, growth, n_days):
def _run_sir_model(cls, past_rec, past_act, growth, n_days):
rec, act = past_rec.copy(), past_act.copy()
infect_rate, _ = cls.growth_to_infection_rate(growth, rec[-1], act[-1])
infect_rate, _ = cls.growth_to_transmission_rate(growth, rec[-1], act[-1])
# simulate
for i in range(n_days):
@ -771,7 +845,7 @@ def altair_sir_plot(df_alt, default_country):
def altair_multiple_countries_infected(df_alt_all,
countries,
title,
days_back=90,
days_back=120,
marker_day=10):
if not len(countries):
return
@ -889,21 +963,19 @@ class GeoMap:
@classmethod
def make_map_figure(cls,
df_plot_geo,
col='transmission_rate',
title='Transmission rate<br>percent (blue-red)',
subtitle='Transmission rate: over 5% (red) '
'spreading, under 5% (blue) recovering'):
col,
colorbar_title,
subtitle,
err_col=None,
hover_text_func=None,
scale_max=None,
colorscale='Bluered',
):
import plotly.graph_objects as go
df_plot_geo['text'] = (df_plot_geo.apply(
lambda r: (
"<br>"
f"Cases (reported): {r['Cases.total']:,.0f} (+<b>{r['Cases.new']:,.0f}</b>)<br>"
f"Cases (estimated): {r['Cases.total.est']:,.0f} (+<b>{r['Cases.new.est']:,.0f}</b>)<br>"
f"Affected percent: <b>{r['affected_ratio.est']:.1%}</b><br>"
f"Transmission rate: <b>{r['transmission_rate']:.1%}</b> ± {r['transmission_rate_std']:.1%}<br>"
f"Deaths: {r['Deaths.total']:,.0f} (+<b>{r['Deaths.new']:,.0f}</b>)<br>"
), axis=1))
# hover text
hover_text_func = hover_text_func if callable(hover_text_func) else lambda r: ''
df_plot_geo['text'] = df_plot_geo.apply(hover_text_func, axis=1)
percent = ('rate' in col or 'ratio' in col)
@ -913,26 +985,26 @@ class GeoMap:
geojson=df_plot_geo['geometry'].__geo_interface__,
z=df_plot_geo[col].fillna(float('nan')) * (100 if percent else 1),
zmin=0,
zmax=10,
zmax=scale_max,
text=df_plot_geo['text'],
ids=df_plot_geo['country'],
customdata=cls.error_series_to_string_list(
series=df_plot_geo[col],
err_series=df_plot_geo['transmission_rate_std'],
err_series=df_plot_geo[err_col] if err_col else None,
percent=percent
),
hovertemplate="<b>%{id}</b>:<br><b>%{z:.1f}%{customdata}</b><br>%{text}<extra></extra>",
colorscale='BLuered',
colorscale=colorscale,
colorbar={'title': {'text': f'<b>{colorbar_title}</b>'}},
autocolorscale=False,
marker_line_color='#9fa8ad',
marker_line_width=0.5,
colorbar_title=f'<b>{title}</b>',
))
fig.update_layout(
title={'text': f"<b>Map of</b>: {subtitle}", 'y': 0.875, 'x': 0.005},
annotations=[
dict(text="Data<br>choice:", showarrow=False, x=0.005, y=1.075, yref="paper", align="left")
dict(text="Map<br>choice:", showarrow=False, x=0.005, y=1.075, yref="paper", align="left")
],
width=800,
height=450,
@ -966,7 +1038,8 @@ class GeoMap:
@classmethod
def button_dict(cls, series, title, colorscale, scale_max=None,
percent=False, subtitle=None, err_series=None):
percent=False, subtitle=None, err_series=None,
hover_text_list=None, colorbar_title=None):
import plotly.express as px
series = series.fillna(float('nan'))
@ -979,14 +1052,21 @@ class GeoMap:
max_arg = series.max() if scale_max is None else min(scale_max, series.max())
return dict(args=[
{'z': [series.to_list()],
'zmax': [max_arg],
'colorbar': [{'title': {'text': f'<b>{title}</b>'}}],
'colorscale': [scale_arg],
'customdata': [cls.error_series_to_string_list(
series, err_series=err_series, percent=percent)]
},
{'title': {'text': f"<b>Map of</b>: {subtitle}",
'y': 0.875, 'x': 0.005}}],
label=title, method="update")
data_args_dict = {
'z': [series.to_list()],
'zmax': [max_arg],
'colorbar': [{'title': {'text': f'<b>{colorbar_title or title}</b>'}}],
'colorscale': [scale_arg],
'customdata': [cls.error_series_to_string_list(
series, err_series=err_series, percent=percent)]
}
if hover_text_list:
data_args_dict['text'] = [hover_text_list]
return dict(args=[data_args_dict,
{'title': {'text': f"<b>Map of</b>: {subtitle}",
'y': 0.875, 'x': 0.005}}
],
label=title,
method="update")

Просмотреть файл

@ -4,6 +4,7 @@ seaborn
papermill
ipykernel
jupyter
jupytext
folium
plotly
plotnine

Двоичные данные
images/micromorts.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 197 KiB