[news + projections + micromorts]: update from artdgn/pages

- Update age specific IFRs to recent Nature figures. - Update testing bais calculation to be windowed so that it adjusts to changing testing policies. - Add new page of deadly risk in micromorts map and its thumbnail. - Minor textual and graphical updates to maps and graphs. - Add world summary of S/I/R ratios in world stacked graphs. - Update methodology texts. - Add Makefile convenience target for transforming script file into calculated notebook.
2020-12-28 01:02:01 +11:00 · 2020-12-28 01:02:01 +11:00 · 5197fdc001
--- a/8
+++ b/8
@ -48,3 +48,11 @@ restart-jekyll: .FORCE
 	docker-compose restart jekyll

 .FORCE:
+
+# updates a notebook from a given py script file to a similarly named ipynb file
+# usage: make script-to-notebook SCRIPT='path/to/script.py"
+SCRIPT_BASENAME=$(shell basename -s ".py" "$(SCRIPT)")
+script-to-notebook:
+	cd "$(shell dirname $(SCRIPT))" && \
+	jupytext -o "$(SCRIPT_BASENAME).ipynb" "$(SCRIPT_BASENAME).py" && \
+	papermill --kernel python3 "$(SCRIPT_BASENAME).ipynb" "$(SCRIPT_BASENAME).ipynb"
--- a/_notebooks/2020-03-29-covid19-progress-projections.ipynb
+++ b/_notebooks/2020-03-29-covid19-progress-projections.ipynb
--- a/_notebooks/2020-03-29-covid19-progress-projections.py
+++ b/_notebooks/2020-03-29-covid19-progress-projections.py
@ -31,11 +31,11 @@
 # +
 #hide
 import pandas as pd
-import overview_helpers
+import covid_helpers


-covid_data = overview_helpers.CovidData()
-stylers = overview_helpers.PandasStyling
+covid_data = covid_helpers.CovidData()
+stylers = covid_helpers.PandasStyling
 df_all, debug_dfs = covid_data.table_with_projections(debug_dfs=True)
 df = covid_data.filter_df(df_all)
 df.columns
@ -43,12 +43,30 @@ df.columns

 #hide_input
 from IPython.display import Markdown
-Markdown(f"***Based on data up to: {covid_data.cur_date}***")
+Markdown(f"*Based on data up to*: ***{covid_data.cur_date}***")

 #hide
-geo_helper = overview_helpers.GeoMap
+geo_helper = covid_helpers.GeoMap
 df_geo = geo_helper.make_geo_df(df_all, cases_filter=1000, deaths_filter=20)
-fig = geo_helper.make_map_figure(df_geo)
+def hover_text_func(r: pd.Series):
+    return (
+        "<br>"
+        f"Cases (reported): {r['Cases.total']:,.0f} (+<b>{r['Cases.new']:,.0f}</b>)<br>"
+        f"Cases (estimated): {r['Cases.total.est']:,.0f} (+<b>{r['Cases.new.est']:,.0f}</b>)<br>"
+        f"Affected percent: <b>{r['affected_ratio.est']:.1%}</b><br>"
+        f"Transmission rate: <b>{r['transmission_rate']:.1%}</b> ± {r['transmission_rate_std']:.1%}<br>"
+        f"Deaths: {r['Deaths.total']:,.0f} (+<b>{r['Deaths.new']:,.0f}</b>)<br>"
+    )
+fig = geo_helper.make_map_figure(
+    df_geo,
+    col='transmission_rate',
+    err_col='transmission_rate_std',
+    colorbar_title='%',
+    subtitle='Transmission rate: red spreading (>5%), blue recovering (<5%)',
+    hover_text_func=hover_text_func,
+    scale_max=10,
+    colorscale='Bluered',
+)

 #hide
 df_geo['affected_ratio.change.monthly.rate'] = (df_geo['affected_ratio.est.+7d'] -
@ -62,28 +80,34 @@ fig.update_layout(
                geo_helper.button_dict(
                    df_geo['transmission_rate'], 'Transmission rate<br>percent (blue-red)',
                    colorscale='Bluered', scale_max=10, percent=True,
-                    subtitle='Transmission rate: over 5% (red) spreading, under 5% (blue) recovering',
+                    subtitle='Transmission rate: red spreading (>5%), blue recovering (<5%)',
+                    colorbar_title='%',
                    err_series=df_geo['transmission_rate_std']),
                geo_helper.button_dict(
                    df_geo['transmission_rate'], 'Transmission rate<br>percent',
                    colorscale='YlOrRd', scale_max=33, percent=True,
                    subtitle='Transmission rate (related to R0)',
+                    colorbar_title='%',
                    err_series=df_geo['transmission_rate_std']),
                geo_helper.button_dict(
                    df_geo['Cases.new.per100k.est'], 'Recent cases<br>estimated per 100k',
                    colorscale='YlOrRd',
+                    colorbar_title='Cases / 100k',
                    subtitle='Estimated recent cases in last 5 days per 100k population'),
                geo_helper.button_dict(
                    df_geo['Cases.new.est'], 'Recent cases<br>(estimated)',
                    colorscale='YlOrRd',
+                    colorbar_title='Cases',
                    subtitle='Estimated recent cases in last 5 days'),
                geo_helper.button_dict(
                    df_geo['Cases.new.per100k'], 'Recent cases<br>reported per 100k',
                    colorscale='YlOrRd',
+                    colorbar_title='Cases / 100k',
                    subtitle='Reported recent cases in last 5 days per 100k population'),
                geo_helper.button_dict(
                    df_geo['Cases.new'], 'Recent cases<br>(reported)',
                    colorscale='YlOrRd',
+                    colorbar_title='Cases',
                    subtitle='Reported recent cases in last 5 days'),
            ],
            direction="down", bgcolor='#efe9da',
@ -94,94 +118,111 @@ fig.update_layout(
                geo_helper.button_dict(
                    df_geo['affected_ratio.est'], 'Affected percent<br>(Current)',
                    colorscale='Bluyl', percent=True,
+                    colorbar_title='%',
                    subtitle='Estimated current affected population percentage'),
                geo_helper.button_dict(
                    df_geo['affected_ratio.est.+14d'], 'Affected percent<br>(in 14 days)',
                    colorscale='Bluyl', scale_max=25, percent=True,
+                    colorbar_title='%',
                    subtitle='Projected affected population percentage in 14 days',
                    err_series=df_geo['affected_ratio.est.+14d.err']),
                geo_helper.button_dict(
                    df_geo['affected_ratio.est.+30d'], 'Affected percent<br>(in 30 days)',
                    colorscale='Bluyl', scale_max=25, percent=True,
+                    colorbar_title='%',
                    subtitle='Projected affected population percentage in 30 days',
                    err_series=df_geo['affected_ratio.est.+30d.err']),
                geo_helper.button_dict(
                    df_geo['affected_ratio.change.monthly.rate'],
                    title='Affected percent<br>montly change rate',
                    colorscale='Bluyl', scale_max=10, percent=True,
+                    colorbar_title='% per month',
                    subtitle='Current affected population percentage monthly change rate',
                    err_series=df_geo['affected_ratio.est.+30d.err']),
                geo_helper.button_dict(
                    df_geo['Cases.total.per100k.est'], 'Total cases<br>estimated per 100k',
                    colorscale='YlOrRd',
+                    colorbar_title='Cases / 100k',
                    subtitle='Estimated total cases per 100k population'),
                geo_helper.button_dict(
                    df_geo['Cases.total.est'], 'Total cases<br>(estimated)', colorscale='YlOrRd',
+                    colorbar_title='Cases',
                    subtitle='Estimated total cases'),
                geo_helper.button_dict(
                    df_geo['Cases.total.per100k'], 'Total cases<br>reported per 100k',
                    colorscale='YlOrRd',
+                    colorbar_title='Cases / 100k',
                    subtitle='Reported total cases per 100k population'),
                geo_helper.button_dict(
                    df_geo['Cases.total'], 'Total cases<br>(reported)', colorscale='YlOrRd',
+                    colorbar_title='Cases',
                    subtitle='Reported total cases'),
            ],
            direction="down", bgcolor='#dceae1',
            pad={"r": 10, "t": 10},
-            showactive=False, x=0.29, xanchor="left", y=1.1, yanchor="top"),
+            showactive=False, x=0.305, xanchor="left", y=1.1, yanchor="top"),
        dict(
            buttons=[
                geo_helper.button_dict(
                    df_geo['needICU.per100k'], 'ICU need<br>(current)',
                    colorscale='Sunsetdark', scale_max=10,
+                    colorbar_title='ICU beds / 100k',
                    subtitle='Estimated current ICU need per 100k population'),
                geo_helper.button_dict(
                    df_geo['needICU.per100k.+14d'],  'ICU need<br>(in 14 days)',
                    colorscale='Sunsetdark', scale_max=10,
+                    colorbar_title='ICU beds / 100k',
                    subtitle='Projected ICU need per 100k population in 14 days',
                    err_series=df_geo['needICU.per100k.+14d.err']),
                geo_helper.button_dict(
                    df_geo['needICU.per100k.+30d'],  'ICU need<br>(in 30 days)',
                    colorscale='Sunsetdark', scale_max=10,
+                    colorbar_title='ICU beds / 100k',
                    subtitle='Projected ICU need per 100k population in 30 days',
                    err_series=df_geo['needICU.per100k.+30d.err']),
                geo_helper.button_dict(
                    df_geo['icu_capacity_per100k'], 'Pre-COVID<br>ICU Capacity',
+                    colorbar_title='ICU beds / 100k',
                    colorscale='Blues',
                    subtitle='Pre-COVID ICU capacity per 100k population'),
            ],
            direction="down", bgcolor='#efdaee',
            pad={"r": 10, "t": 10},
-            showactive=False, x=0.515, xanchor="left", y=1.1, yanchor="top"),
+            showactive=False, x=0.54, xanchor="left", y=1.1, yanchor="top"),
        dict(
            buttons=[
                geo_helper.button_dict(
                    df_geo['Deaths.total.per100k'], 'Deaths<br>per 100k', colorscale='Reds',
+                    colorbar_title='Deaths / 100k',
                    subtitle='Total deaths per 100k population'),
                geo_helper.button_dict(
                    df_geo['Deaths.total'], 'Deaths<br>Total', colorscale='Reds',
+                    colorbar_title='Deaths',
                    subtitle='Total deaths'),
                geo_helper.button_dict(
                    df_geo['Deaths.new.per100k'], 'Recent deaths<br>per 100k', colorscale='Reds',
+                    colorbar_title='Deaths / 100k',
                    subtitle='Recent deaths in last 5 days per 100k population'),
                geo_helper.button_dict(
                    df_geo['Deaths.new'], 'Recent deaths<br>total', colorscale='Reds',
+                    colorbar_title='Deaths',
                    subtitle='Recent deaths in last 5 days'),
                geo_helper.button_dict(
-                    df_geo['lagged_fatality_rate'], 'Fatality rate %<br>(lagged)',
-                    colorscale='Reds', scale_max=20, percent=True,
-                    subtitle='Reported fatality rate (relative to reported cases 8 days ago)'),
+                    df_geo['current_testing_bias'], 'Current testing<br>bias',
+                    colorscale='YlOrRd', scale_max=20, percent=False,
+                    colorbar_title='Testing bias',
+                    subtitle='Current testing bias'),
            ],
            direction="down", bgcolor='#efdbda',
            pad={"r": 10, "t": 10},
-            showactive=False, x=0.69, xanchor="left", y=1.1, yanchor="top"),
+            showactive=False, x=0.715, xanchor="left", y=1.1, yanchor="top"),
    ]);

 # # World map (interactive)
-# > Includes only countries with at least 1000 reported cases or at least 20 reported deaths.
+# > Hover mouse over map for detailed information.
 #
 # - Details of estimation and prediction calculations are in [Appendix](#appendix) and in [Tables](#tables), as well as [Plots of model predictions](#examples).
-# - New cases and new deaths refer to cases or deaths in the last 5 days.
+# - Recent cases and Recent deaths refer to cases or deaths in the last 5 days.

 # > Tip: Select columns to show on map to from the dropdown menus. The map is zoomable and draggable.

@ -251,8 +292,8 @@ style_icu_table(df[(df['transmission_rate'] < 0.05) & (df['needICU.per100k'] > 0

 #hide_input
 df_alt = pd.concat([d.reset_index() for d in debug_dfs], axis=0)
-df_alt_filt = df_alt[(df_alt['day'] > -60) & (df_alt['country'].isin(df.index))]
-overview_helpers.altair_sir_plot(df_alt_filt, df['Deaths.new.per100k'].idxmax())
+df_alt_filt = df_alt[(df_alt['day'] > -120) & (df_alt['country'].isin(df.index))]
+covid_helpers.altair_sir_plot(df_alt_filt, df['Deaths.new.per100k'].idxmax())

 # ## Projected Affected Population percentages
 # > Top 20 countries with most estimated recent cases. Sorted by number of estimated recent cases during the last 5 days. More details in [Appendix](#appendix).
@ -266,7 +307,7 @@ cols = {'Cases.new.est': 'Estimated <br> <i>recent</i> cases<br>during<br>last 5
       'transmission_rate': 'Estimated<br>daily<br>transmission<br>rate',
       'affected_ratio.est.+14d': 'Projected<br><i>total</i><br>affected<br>percentage<br>In 14 days',
       'affected_ratio.est.+30d': 'Projected<br><i>total</i><br>affected<br>percentage<br>In 30 days',
-       'lagged_fatality_rate': 'Lagged<br>fatality <br> rate',
+       'current_testing_bias': 'Current<br>testing<br>bias',
      }
 df_show = stylers.country_index_emoji_link(df_cases, font_size=1)
 df_show['affected_ratio.est.+14d'] = stylers.with_errs_ratio(
@ -285,10 +326,12 @@ df_show = df_show[cols.keys()].rename(columns=cols)
           s_v=df_cases['transmission_rate']/0.33, subset=cols['transmission_rate'])
    .bar(subset=cols['Cases.new.est'], color='#b57b17')
    .bar(subset=cols['affected_ratio.est'], color='#5dad64', vmin=0, vmax=1.0)
-    .bar(subset=cols['lagged_fatality_rate'], color='#420412', vmin=0, vmax=0.2)
-    .applymap(lambda _: 'color: red', subset=cols['lagged_fatality_rate'])
+    .bar(subset=cols['current_testing_bias'], color='#420412', vmin=1, vmax=20)
+    .applymap(lambda _: 'color: red', subset=cols['current_testing_bias'])
    .format('<b>{:,.0f}</b>', subset=cols['Cases.new.est'])
-    .format('<b>{:.1%}</b>', subset=[cols['lagged_fatality_rate'], cols['affected_ratio.est']]))
+    .format('<b>{:.1%}</b>', subset=cols['affected_ratio.est'])
+    .format('<b>{:.1f}</b>', subset=cols['current_testing_bias'])
+ )
 # -

 # <a id='methodology'></a>
@ -303,9 +346,9 @@ df_show = df_show[cols.keys()].rename(columns=cols)
 #     - Where the rate estimated from [Total Outstanding Cases](https://covid19dashboards.com/outstanding_cases/#Appendix:-Methodology-of-Predicting-Recovered-Cases) is too high (on down-slopes) recovery probability if 1/20 is used (equivalent 20 days to recover).
 # - Total cases are estimated from the reported deaths for each country:
 #     - Each country has a different testing policy and capacity and cases are under-reported in some countries. Using an estimated IFR (fatality rate) we can estimate the number of cases some time ago by using the total deaths until today.
-#     - IFRs for each country is estimated using the age adjusted IFRs from [May 1 New York paper](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3590771) and [UN demographic data for 2020](https://population.un.org/wpp/Download/Standard/Population/). These IFRs can be found in `df['age_adjusted_ifr']` column. Some examples: US - 0.98%, UK - 1.1%, Qatar - 0.25%, Italy - 1.4%, Japan - 1.6%.
+#     - IFRs for each country is estimated using the age adjusted IFRs from [International IFRS study](https://www.nature.com/articles/s41586-020-2918-0#MOESM1) and [UN demographic data for 2020](https://population.un.org/wpp/Download/Standard/Population/). These IFRs can be found in `df['age_adjusted_ifr']` column.
 #     - The average fatality lag is assumed to be 8 days on average for a case to go from being confirmed positive (after incubation + testing lag) to death. This is the same figure used by ["Estimating The Infected Population From Deaths"](https://covid19dashboards.com/covid-infected/).
-#     - Testing bias adjustment: the actual lagged fatality rate is than divided by the IFR to estimate the testing bias in a country. The estimated testing bias then multiplies the reported case numbers to estimate the *true* case numbers (*=case numbers if testing coverage was as comprehensive as in the heavily tested countries*).
+#     - **Testing bias adjustment**: the actual lagged case fatality rate is then divided by the age adjusted IFR to estimate the testing bias in a country. To account for testing bias changes (e.g. increased testing capacity) this is done on a rolling window basis of two months (with at least 300 deaths). The estimated testing bias then multiplies the reported case numbers for each date to estimate the *true* case numbers (*=case numbers that would be consistent with the deaths and the age adjusted IFR*).
 # - ICU need is calculated and age-adjusted as follows:
 #     - UK ICU ratio was reported as [4.4% of active reported cases](https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf).
 #     - Using UKs ICU ratio, UK's testing bias, and IFRs corrected for age demographics we can estimate each country's ICU ratio (the number of cases requiring ICU hospitalisation).
--- a/_notebooks/2020-06-12-covid19-news.ipynb
+++ b/_notebooks/2020-06-12-covid19-news.ipynb
--- a/_notebooks/2020-06-12-covid19-news.py
+++ b/_notebooks/2020-06-12-covid19-news.py
@ -31,7 +31,7 @@
 # +
 #hide
 import pandas as pd
-import overview_helpers as covid_helpers
+import covid_helpers as covid_helpers

 stylers = covid_helpers.PandasStyling

@ -71,7 +71,7 @@ df_data['Deaths.diff.per100k'] = df_data['Deaths.total.diff'] / (df_data['popula
 df_data['transmission_rate.change'] = (df_data['transmission_rate'] / df_data['transmission_rate_past']) - 1
 df_data['affected_ratio.miss'] = (df_cur['affected_ratio.est'] / df_past['affected_ratio.est.+9d']) - 1
 df_data['needICU.per100k.miss'] = (df_cur['needICU.per100k'] / df_past['needICU.per100k.+9d']) - 1
-df_data['testing_bias.change'] = (df_data['testing_bias'] / df_past['testing_bias']) - 1
+df_data['testing_bias.change'] = (df_data['current_testing_bias'] / df_past['current_testing_bias']) - 1

 # -

@ -103,7 +103,8 @@ def style_news_infections(df):
            .apply(stylers.add_bar, color='#f49d5a',
                   s_v=df['transmission_rate'] / rate_norm, subset=cols['transmission_rate'])
            .apply(stylers.add_bar, color='#d8b193',
-                   s_v=df['transmission_rate_past'] / rate_norm, subset=cols['transmission_rate_past'])
+                   s_v=df['transmission_rate_past'] / rate_norm,
+                   subset=cols['transmission_rate_past'])
            .format('<b>{:.2f}</b>', subset=[cols['needICU.per100k']])
            .format('<b>{:,.0f}</b>', subset=cols['Cases.new.est'])
            .format('<b>{:.1%}</b>', subset=[cols['affected_ratio.est'],
@ -136,7 +137,6 @@ style_news_infections(df_data.loc[new_waves])
 # +
 # hide
 df_alt_all = pd.concat([d.reset_index() for d in debug_dfs], axis=0)
-
 def infected_plots(countries, title):
    return covid_helpers.altair_multiple_countries_infected(
        df_alt_all, countries=countries, title=title, marker_day=day_diff)
@ -350,8 +350,7 @@ def style_death_burden(df):
    return (df_show.style
            .bar(subset=cols['needICU.per100k'], color='#b21e3e', vmin=0, vmax=10)
            .bar(subset=cols['Deaths.new.per100k'], color='#7b7a7c', vmin=0, vmax=death_norm)
-            .bar(subset=cols['Deaths.new.per100k.past'], color='#918f93', vmin=0,
-                 vmax=death_norm)
+            .bar(subset=cols['Deaths.new.per100k.past'], color='#918f93', vmin=0, vmax=death_norm)
            .bar(subset=cols['Deaths.total.diff'], color='#6b595d', vmin=0)
            .bar(subset=cols['affected_ratio.est'], color='#5dad64', vmin=0, vmax=1.0)
            .format('<b>{:.0f}</b>', subset=[cols['Deaths.total.diff'],
@ -428,12 +427,6 @@ df_alt_filt = df_alt_all[(df_alt_all['day'] > -60) &
                         (df_alt_all['country'].isin(news_countries))]
 covid_helpers.altair_sir_plot(df_alt_filt, new_waves[0])

-# ## Future World projections (all countries stacked)
-# The outputs of the models for all countries in stacked plots.
-# > Tip: Hover the mouse of the area to see which country is which and the countries S/I/R ratios at that point. 
-#
-# > Tip: The plots are zoomable and draggable.
-
 #hide
 df_tot = df_alt_all.rename(columns={'country': cur_data.COL_REGION}
                          ).set_index(cur_data.COL_REGION)
@ -443,10 +436,28 @@ for c in df_tot.columns[df_alt_all.dtypes == float]:
 df_tot = df_tot.reset_index()
 df_tot.columns = [c.replace('.', '-') for c in df_tot.columns]

+#hide_input
+df_now = df_tot[df_tot['day'] == 0]
+pop = df_now['population'].sum()
+s_now = df_now['Susceptible-total'].sum() / pop
+i_now = df_now['Infected-total'].sum() / pop
+r_now = df_now['Removed-total'].sum() / pop
+Markdown("## World totals:\n"
+         f"Infected &#128567;: **{i_now:.1%}**, "
+         f"Removed &#128532;: **{r_now:.1%}**, "
+         f"Susceptible &#128543;: **{s_now:.1%}**")
+
+# ## Future World projections (all countries stacked)
+# The outputs of the models for all countries in stacked plots.
+# > Tip: Hover the mouse of the area to see which country is which and the countries S/I/R ratios at that point.
+#
+# > Tip: The plots are zoomable and draggable.
+
 # +
 #hide
 # filter by days
-df_tot = df_tot[(df_tot['day'].between(-30, 30) & (df_tot['day'] % 3 == 0)) | (df_tot['day'] % 10 == 0)]
+days = 30
+df_tot = df_tot[df_tot['day'].between(-days, days) | (df_tot['day'] % 10 == 0)]

 # filter out noisy countries for actively infected plot:
 df_tot_filt = df_tot[df_tot[cur_data.COL_REGION].isin(df_cur.index.unique())]
@ -465,11 +476,12 @@ today_line = (alt.Chart(pd.DataFrame({'x': [0]}))
                  .encode(x='x', size=alt.value(1)))

 # make plot
-max_y = df_tot_filt[df_tot_filt['day']==30]['Infected-total'].sum()
+max_y = (df_tot_filt[df_tot_filt['day'].between(-days, days)]
+         .groupby('day')['Infected-total'].sum().max())
 stacked_inf = alt.Chart(df_tot_filt).mark_area().encode(
    x=alt.X('day:Q',
            title=f'days relative to today ({cur_data.cur_date})',
-            scale=alt.Scale(domain=(-30, 30))),
+            scale=alt.Scale(domain=(-days, days))),
    y=alt.Y("Infected-total:Q", stack=True, title="Number of people",
           scale=alt.Scale(domain=(0, max_y))),
    color=alt.Color("Country/Region:N", legend=None),
@ -485,11 +497,11 @@ stacked_inf = alt.Chart(df_tot_filt).mark_area().encode(

 # +
 #hide_input
-max_y = df_tot_filt[df_tot_filt['day']==30]['Removed-total'].sum()
+max_y = df_tot_filt[df_tot_filt['day']==days]['Removed-total'].sum()
 stacked_rem = alt.Chart(df_tot_filt).mark_area().encode(
    x=alt.X('day:Q',
            title=f'days relative to today ({cur_data.cur_date})',
-            scale=alt.Scale(domain=(-30, 30))),
+            scale=alt.Scale(domain=(-days, days))),
    y=alt.Y("Removed-total:Q", stack=True, title="Number of people",
           scale=alt.Scale(domain=(0, max_y))),
    color=alt.Color("Country/Region:N", legend=None),
--- a/_notebooks/2020-12-19-covid19-micromorts.py
+++ b/_notebooks/2020-12-19-covid19-micromorts.py
@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:light
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.6.0
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
+# # Risk of deadly infection by age (for the unvaccinated).
+# > Monthly risk of death due to COVID-19 infection for unvaccinated or not previosly infected. Mapped by country and age.
+#
+# - permalink: /micromorts-risk/
+# - image: images/micromorts.png
+# - author: <a href=https://github.com/artdgn/>artdgn</a>
+# - categories: [world, overview, interactive, risk]
+# - toc: false
+# - hide: false
+
+# > Important: This page contains estimations that were not calculated by an epidemiologist.
+
+# +
+#hide
+import pandas as pd
+try:  # using in REPL
+    from . import covid_helpers
+except ImportError:
+    import covid_helpers
+
+covid_data = covid_helpers.CovidData()
+df_all, _, _ = covid_data.table_with_current_rates_and_ratios()
+# -
+
+#hide
+df_all.columns.sort_values()
+
+#hide_input
+from IPython.display import Markdown
+Markdown(f"*Based on data up to*: ***{covid_data.cur_date}***")
+
+#hide
+df_all['daily_infection_chance'] = (
+    df_all['transmission_rate'] * df_all['current_active_ratio'] /
+    (1 - df_all['current_active_ratio'] - df_all['current_recovered_ratio']))
+df_all['monthly_infection_chance'] = 1 - (1 - df_all['daily_infection_chance']) ** 30
+df_all['monthly_deadly_infection_risk'] = (
+        df_all['monthly_infection_chance'] * df_all['age_adjusted_ifr'])
+df_all['monthly_average_micromorts'] = df_all['monthly_deadly_infection_risk'] * 1e6
+df_all['monthly_population_risk'] = df_all['monthly_deadly_infection_risk'] * df_all['population']
+
+#hide
+# retrospective empirical risk from recent deaths
+df_all['daily_recent_empirical_risk'] = df_all['Deaths.new.per100k'] / 1e5
+df_all['monthly_recent_empirical_risk'] = 1 - (1 - df_all['daily_recent_empirical_risk']) ** (30 / 5)
+df_all['monthly_recent_empirical_micromorts'] = df_all['monthly_recent_empirical_risk'] * 1e6
+df_all['monthly_population_empirical_risk'] = df_all['monthly_recent_empirical_risk'] * df_all['population']
+
+#hide
+# add age specific data
+ifrs = covid_helpers.AgeAdjustedData.intl_ifrs
+cols = covid_helpers.AgeAdjustedData.Cols
+age_ifrs = {
+    '0-29': ifrs.loc[cols.o4:cols.o29].mean(),
+    '30-44': ifrs.loc[cols.o34:cols.o44].mean(),
+    '45-59': ifrs.loc[cols.o49:cols.o59].mean(),
+    '60-64': ifrs.loc[cols.o64],
+    '65-69': ifrs.loc[cols.o69],
+    '70-74': ifrs.loc[cols.o74],
+    '75-79': ifrs.loc[cols.o79],
+    '80+': ifrs.loc[cols.o84],
+}
+for age_range, ifr in age_ifrs.items():
+    df_all[f'monthly_micromorts_{age_range}'] = 1e6 * ifr * df_all['monthly_infection_chance']
+
+#hide
+geo_helper = covid_helpers.GeoMap
+df_geo = geo_helper.make_geo_df(df_all, cases_filter=1000, deaths_filter=20)
+
+# +
+#hide
+def micromorts_hover_func(r: pd.Series, age_range=None):    
+    if age_range is None:
+        ifr, ifr_str = r['age_adjusted_ifr'], "this country's age profile"
+        micromorts_col='monthly_average_micromorts'
+    else:
+        ifr, ifr_str = age_ifrs[age_range], f'age range {age_range}'
+        micromorts_col=f'monthly_micromorts_{age_range}'
+    mm = r[micromorts_col]
+    return (
+        f"<br>Risk of death due to one month<br>"
+        f"of exposure is comparable to:<br>"
+        f"  - <b>{mm * 10:.0f}</b> km by Motorcycle<br>"
+        f"  - <b>{mm * 370:.0f}</b> km by Car<br>"
+        f"  - <b>{mm * 1600:.0f}</b> km by Plane<br>"  
+        f"  - <b>{mm / 5:.0f}</b> scuba dives<br>"       
+        f"  - <b>{mm / 8:.0f}</b> sky diving jumps<br>"         
+        f"  - <b>{mm / 430:.0f}</b> base jumping jumps<br>"
+        f"  - <b>{mm / 12000:.0f}</b> Everest climbs<br><br>"      
+        f"Contagious percent of population:"
+        f"  <b>{r['current_active_ratio']:.1%}</b><br>"
+        f"Susceptible percent of population:"
+        f"  <b>{(1 - r['current_active_ratio'] - r['current_recovered_ratio']):.1%}</b><br>"
+        f"Transmission rate: <b>{r['transmission_rate']:.1%}</b><br>"
+        f"Chance of infection over a month:"
+        f"  <b>{r['monthly_infection_chance']:.1%}</b><br>"
+        f"Chance of death after infection<br> (for {ifr_str}):"
+        f"  <b>{ifr:.2%}</b>"
+    )
+
+def micromorts_hover_texts_for_age_range(age_range):
+    return df_geo.apply(micromorts_hover_func, axis=1, age_range=age_range).tolist()
+
+def stats_hover_text_func(r: pd.Series):
+    return (
+        "<br>"
+        f"Cases (reported): {r['Cases.total']:,.0f} (+<b>{r['Cases.new']:,.0f}</b>)<br>"
+        f"Cases (estimated): {r['Cases.total.est']:,.0f} (+<b>{r['Cases.new.est']:,.0f}</b>)<br>"
+        f"Deaths: {r['Deaths.total']:,.0f} (+<b>{r['Deaths.new']:,.0f}</b>)<br><br>"
+        f"Contagious percent of population:"
+        f"  <b>{r['current_active_ratio']:.1%}</b><br>"
+        f"Susceptible percent of population:"
+        f"  <b>{(1 - r['current_active_ratio'] - r['current_recovered_ratio']):.1%}</b><br>"
+        f"Transmission rate: <b>{r['transmission_rate']:.1%}</b><br>"
+        f"Chance of infection over a month:"
+        f"  <b>{r['monthly_infection_chance']:.1%}</b><br>"
+    )
+
+
+# -
+
+#hide
+import functools
+default_age = '60-64'
+colorscale = 'RdPu'
+fig = geo_helper.make_map_figure(
+    df_geo,
+    col=f'monthly_micromorts_{default_age}',
+    colorbar_title='Micromorts',
+    subtitle=f"Ages {default_age}: risk of deadly infection due to a month's exposure",
+    hover_text_func=functools.partial(micromorts_hover_func, age_range=default_age),
+    scale_max=None,
+    colorscale=colorscale,
+    err_col=None,
+)
+
+#hide
+fig.update_layout(
+    updatemenus=[
+        dict(
+            buttons=[
+                geo_helper.button_dict(
+                    df_geo[f'monthly_micromorts_{age_range}'],
+                    title=f'<b>Ages {age_range} monthly risk in micromorts</b>',
+                    colorbar_title='Micromorts',
+                    colorscale=colorscale, scale_max=None, percent=False,
+                    subtitle=f"Ages {age_range}: risk of deadly infection due to a month's exposure",
+                    err_series=None,
+                    hover_text_list=micromorts_hover_texts_for_age_range(age_range)
+                ) 
+                for age_range in reversed(list(age_ifrs.keys()))
+            ] + [
+                geo_helper.button_dict(
+                    df_geo['monthly_average_micromorts'],
+                    title='<b>Average monthly risk in micromorts</b>',
+                    colorbar_title='Micromorts',
+                    colorscale=colorscale, scale_max=None, percent=False,
+                    subtitle="Risk of deadly infection due to a month's exposure",
+                    err_series=None,
+                    hover_text_list=micromorts_hover_texts_for_age_range(None)
+                ),
+            ] + [
+                geo_helper.button_dict(
+                    df_geo['monthly_infection_chance'],
+                    title='<b>Monthly infection chance</b>',
+                    colorbar_title='%',
+                    colorscale='Reds', scale_max=None, percent=True,
+                    subtitle="Chance of being infected during a month's exposure",
+                    err_series=None, 
+                    hover_text_list=df_geo.apply(stats_hover_text_func, axis=1).tolist()
+                )
+            ] + [
+                geo_helper.button_dict(
+                    df_geo['monthly_population_risk'],
+                    title='<b>Montly total population risk</b>',
+                    colorbar_title='Possible deaths',
+                    colorscale='amp', scale_max=None, percent=False,
+                    subtitle="Total possible deaths due to a month's exposure",
+                    err_series=None,
+                    hover_text_list=df_geo.apply(stats_hover_text_func, axis=1).tolist()
+                )
+            ] + [
+                geo_helper.button_dict(
+                    (df_geo['monthly_average_micromorts'] /
+                     df_geo['monthly_recent_empirical_micromorts']),
+                    title='<b>Ratio of average monthly risk<br>to recent deaths (as risk)</b>',
+                    colorbar_title='%',
+                    colorscale='Bluered', scale_max=200, percent=True,
+                    subtitle="Ratio of average monthly risk to recent deaths expressed as risk",
+                    err_series=None,
+                    hover_text_list=df_geo.apply(stats_hover_text_func, axis=1).tolist()
+                )
+            ],
+            direction="down", bgcolor='#dceae1',
+            pad={"t": 10},
+            active=list(age_ifrs.keys())[::-1].index(default_age),
+            showactive=True, x=0.1, xanchor="left", y=1.1, yanchor="top"),
+    ]);
+
+# ### Use dropdown menu to select specific age range
+# > <font size=2>- Hover the mouse over a country for a risk comparison to some sports and travel modes.<br>- <a href="https://en.wikipedia.org/wiki/Micromort">"Micromorts"</a> are a measure of risk equal to 1 in a Million probability of death.<br>- Risk of death calculated for the unvaccinated or not previosly infected. </font>
+
+#hide_input
+# from IPython.display import HTML
+# HTML(fig.to_html())
+fig.show()
+
+# > Tip: The map is zoomable and draggable. Double click to reset.
+
+# ### Appendix: assumptions, explanations.
+# <a id='appendix'></a>
+# - Monthly risk calculation:
+# $$
+# Monthly\,Risk = Infection\,Fatality\,Rate_{age\,group} \cdot P_{montly\,infection}\\\,\\
+# P_{montly\,infection} = 1 - (1 - P_{daily\,infection}) ^ {30\,days}\\\,\\
+# P_{daily\,infection} = 
+# \frac{Actively\,Infected\,\%\cdot Transmission\,Rate\,\%}
+# {1 - Actively\,Infected\,\% - Recovered\,or\, Dead\,\%} \\
+# $$
+#   - "Actively Infected" and "Recovered or Dead" population percentages are estimated from past deaths and cases ([See estimations appendix in estimations & projections notebook](/covid-progress-projections/#appendix)).
+#   - Age specific IFRs are taken from recent [Nature international meta-study of IFRs](https://www.nature.com/articles/s41586-020-2918-0#MOESM1).
+#   - Country demographics for country average IFRs are taken from [UN demographic data for 2020](https://population.un.org/wpp/Download/Standard/Population/).
+#   - Micromort deaths risk comparative data (travel and sports) are taken from [Wikipedia article on Micromorts](https://en.wikipedia.org/wiki/Micromort).
+#   - The calculation is done on daily basis and extrapolated naively to a month.
+# - **Why is everything "monthly"**? The main actionable question this analysis aims to help answer is **"How much risk is someone taking by not getting vaccinated now? What is the risk of waiting another month?"**. A daily timescale for this question is too short due to not being actionable, and on a scale much longer than a month the underlying data for calculations will change substantially (e.g. transmission rates, currently infected population) to not offer a reasonable appoximation. So a month felt to me as roughly the right time scale for the risk aggregation that is both easy to think about and should still be roughly correct.
+# - Assumptions & limitations:
+#     - The esposure is assumed to be **average exposure** typical of that country (as it manifests in the recent case and deaths data). Protective measures (e.g. masks) and self isolation should of course reduce the risk (if practiced more than the average for that population at that time).
+#     - Susceptible population is assumed to not yet be **vaccinated**. When vaccination prevalence will become substantial, data will become available, and calculations can be adjusted. The risk estimates are for **regular susceptible** population. People who have been infected already are excluded (as recovered).
+#     - All rates and percentages such as: transmission rate, active and recovered percentages are assumed to be **constant** during the month to keep the monthly calculation simple. This is of course NOT true. However although these rates do change, they usually change slowly enough for the likely result to still be of the same order of magnitude. It is possible to use values from a predictive model for this, but they too have errors (as they too are simplistic). For this analysis I preferred to go with the simple to calculate / understand approximation with a well understood error, than with the complex to calculate / understand approximation with an unknown error.
+#     - All the additional assumptions from [estimations appendix in estimations & projections notebook](/covid-progress-projections/#appendix)
+# - Vaccination effect on risk:
+#     - The risk for the **vaccinated** is not calculated here. It is currently widely assumed that the reported [Moderna](https://en.wikipedia.org/wiki/MRNA-1273) and [Pfizer-BioNTech](https://en.wikipedia.org/wiki/Tozinameran) might reduce the **chance of infection** by around **90%**.
+#     - While there are well founded estimates for the effect on **infection chance**, the effect on IFR (fatality rate) is much less known: how does vaccination affect the severity of the desease *if* infected? Answering this will require studying millions of vaccinated people, so will only be available later.
+# - Additional related analyses:
+#     - Another map of statistics of cases, deaths, ICU need and affected population percentage can be explored in [world-map part of the estimations & projections notebook](/covid-progress-projections/#World-map-(interactive))
+#     - Per country predictive models of population ratios can be explored in [trajectories plots in estimations & projections notebook](/covid-progress-projections/#Interactive-plot-of-model-predictions-and-past-data)
+# ![](https://artdgn.goatcounter.com/count?p=c19d-morts)
--- a/_notebooks/overview_helpers.py
+++ b/_notebooks/overview_helpers.py
@ -1,5 +1,6 @@
 import os
 import re
+from typing import Tuple, List
 from urllib import request

 import numpy as np
@ -91,13 +92,33 @@ class AgeAdjustedData:
        o99 = '95-99'
        o100p = '100+'

-        # https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3590771
-        # ny = new york
-        ny17 = 'ny17'  # 0-17
-        ny44 = 'ny44'  # 18-44
-        ny64 = 'ny64'  # 45-64
-        ny74 = 'ny74'  # 65-74
-        ny75p = 'ny75p'  # 75+
+    # paper: https://www.nature.com/articles/s41586-020-2918-0#MOESM1
+    # table S3 from supplementary material:
+    #   https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-020-2918-0/MediaObjects/41586_2020_2918_MOESM1_ESM.pdf
+    intl_ifrs = pd.Series({
+        Cols.o4: 0.003,
+        Cols.o9: 0.001,
+        Cols.o14: 0.001,
+        Cols.o19: 0.003,
+        Cols.o24: 0.006,
+        Cols.o29: 0.013,
+        Cols.o34: 0.024,
+        Cols.o39: 0.040,
+        Cols.o44: 0.075,
+        Cols.o49: 0.121,
+        Cols.o54: 0.207,
+        Cols.o59: 0.323,
+        Cols.o64: 0.456,
+        Cols.o69: 1.075,
+        Cols.o74: 1.674,
+        Cols.o79: 3.203,
+        Cols.o84: 8.292,  # 80+ is a single bucket in that paper
+        Cols.o89: 8.292,
+        Cols.o94: 8.292,
+        Cols.o99: 8.292,
+        Cols.o100p: 8.292,
+    })
+    intl_ifrs *= 0.01  # convert from percent to ratio

    @classmethod
    def load(cls):
@ -145,30 +166,8 @@ class AgeAdjustedData:
        # convert to ratios
        df_pct = (df_num.T / df_num.sum(1)).T

-        # calulate NY bucket percentages
-        cols = cls.Cols
-        df_pct[cols.ny17] = df_pct[[cols.o4, cols.o9,
-                                    cols.o14, cols.o19]].sum(1)
-        df_pct[cols.ny44] = df_pct[[cols.o24, cols.o29,
-                                    cols.o34, cols.o39,
-                                    cols.o44]].sum(1)
-        df_pct[cols.ny64] = df_pct[[cols.o49,
-                                    cols.o54, cols.o59,
-                                    cols.o64]].sum(1)
-        df_pct[cols.ny74] = df_pct[[cols.o69, cols.o74]].sum(1)
-        df_pct[cols.ny75p] = df_pct[[cols.o79,
-                                     cols.o84, cols.o89,
-                                     cols.o94, cols.o99,
-                                     cols.o100p]].sum(1)
-        # check: df_pct[[cols.ny17, cols.ny44, cols.ny64, cols.ny74, cols.ny75p]].sum(1)
-
        # calculate IFR
-        # https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3590771
-        #  Table 1
-        ifr_s = pd.Series(np.dot(df_pct
-                                 [[cols.ny17, cols.ny44, cols.ny64, cols.ny74, cols.ny75p]],
-                                 [0.00002, 0.00087, 0.00822, 0.02626, 0.07137]),
-                          index=df_pct.index)
+        ifr_s = pd.Series(np.dot(df_pct, cls.intl_ifrs), index=df_pct.index)

        ## icu need estimation
        ## https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf
@ -288,16 +287,17 @@ class EmojiFlags(ScrapedTableBase):

 class CovidData:
    COL_REGION = COL_REGION
-    ABS_COLS = ['Cases.total', 'Deaths.total', 'Cases.new', 'Deaths.new']
+    CASES_TOT = 'Cases.total'
+    CASES_NEW = 'Cases.new'
+    DEATHS_TOT = 'Deaths.total'
+    DEATHS_NEW = 'Deaths.new'

-    PER_100K_COLS = [f'{c}.per100k' for c in ABS_COLS]
-    CASES_COLS = ABS_COLS[::2] + PER_100K_COLS[::2]
-    EST_COLS = [f'{c}.est' for c in CASES_COLS]
+    PER_100K_SUFFIX = '.per100k'

-    dft_cases = SourceData.get_covid_dataframe('confirmed')
-    dft_deaths = SourceData.get_covid_dataframe('deaths')
-    dft_recovered = SourceData.get_covid_dataframe('recovered')
-    dt_cols_all = SourceData.get_dates(dft_cases)
+    dft_cases_raw = SourceData.get_covid_dataframe('confirmed')
+    dft_deaths_raw = SourceData.get_covid_dataframe('deaths')
+    # dft_recovered = SourceData.get_covid_dataframe('recovered')
+    dt_cols_all = SourceData.get_dates(dft_cases_raw)

    cur_date = pd.to_datetime(dt_cols_all[-1]).date().isoformat()

@ -307,19 +307,17 @@ class CovidData:
    ## testing bias
    death_lag = 8

-    ## ICU spare capacity
-    # occupancy 66% for us:
-    #   https://www.sccm.org/Blog/March-2020/United-States-Resource-Availability-for-COVID-19
-    # occupancy average 75% for OECD:
-    #   https://www.oecd-ilibrary.org/social-issues-migration-health/health-at-a-glance-2019_4dd50c09-en
-    icu_spare_capacity_ratio = 0.3
-
    def __init__(self, days_offset=0):
        assert days_offset <= 0, 'day_offest can only be 0 or negative (in the past)'
        self.dt_cols = self.dt_cols_all[:(len(self.dt_cols_all) + days_offset)]
-        self.dft_cases_backfilled = self._cases_with_backfilled_unreported_days()
+        self.dft_cases_backfilled = self._cases_with_backfilled_unreported_days()[self.dt_cols]
+        self.dft_deaths = self.dft_deaths_raw.groupby(COL_REGION).sum()[self.dt_cols]
        self.dfc_cases = self.dft_cases_backfilled[self.dt_cols[-1]]
-        self.dfc_deaths = self.dft_deaths.groupby(COL_REGION)[self.dt_cols[-1]].sum()
+        self.dfc_deaths = self.dft_deaths[self.dt_cols[-1]]
+
+        # to be calculated later
+        self.testing_biases_dft: pd.DataFrame = None
+        self.cases_est_dft: pd.DataFrame = None

    def _cases_with_backfilled_unreported_days(self):

@ -363,7 +361,7 @@ class CovidData:

            return pd.Series(out, index=series.index)

-        cases = self.dft_cases.groupby(self.COL_REGION).sum()[self.dt_cols_all]
+        cases = self.dft_cases_raw.groupby(self.COL_REGION).sum()[self.dt_cols_all]
        diffs = cases.diff(axis=1)
        diffs.iloc[:, 0] = cases.iloc[:, 0]  # replace resulting nans in first date's data

@ -375,7 +373,7 @@ class CovidData:
        return self.dft_cases_backfilled[self.dt_cols[-lag]]

    def lagged_deaths(self, lag=PREV_LAG):
-        return self.dft_deaths.groupby(COL_REGION)[self.dt_cols[-lag]].sum()
+        return self.dft_deaths[self.dt_cols[-lag]]

    def add_last_dates(self, df):

@ -386,9 +384,9 @@ class CovidData:
            else:
                return float('nan')

-        df['last_case_date'] = (self.dft_cases.groupby(COL_REGION).sum().diff(axis=1)
+        df['last_case_date'] = (self.dft_cases_raw.groupby(COL_REGION).sum().diff(axis=1)
                                .apply(last_date, axis=1))
-        df['last_death_date'] = (self.dft_deaths.groupby(COL_REGION).sum().diff(axis=1)
+        df['last_death_date'] = (self.dft_deaths_raw.groupby(COL_REGION).sum().diff(axis=1)
                                 .apply(last_date, axis=1))
        return df

@ -400,9 +398,11 @@ class CovidData:
                    .sort_values(by=['Cases.total', 'Deaths.total'], ascending=[False, False])
                    .reset_index())
        df_table.rename(columns={'index': COL_REGION}, inplace=True)
-        for c in self.ABS_COLS[:2]:
-            df_table[c.replace('total', 'new')] = (df_table[c] - df_table[f'{c}.prev']).clip(0)  # DATA BUG
-        df_table['Fatality Rate'] = (100 * df_table['Deaths.total'] / df_table['Cases.total']).round(1)
+        for c in [self.CASES_TOT, self.DEATHS_TOT]:
+            df_table[c.replace('total', 'new')] = (
+                    df_table[c] - df_table[f'{c}.prev']).clip(0)  # DATA BUG
+        df_table['Fatality Rate'] = (100 * df_table['Deaths.total'] /
+                                     df_table['Cases.total']).round(1)
        df_table['Continent'] = df_table[COL_REGION].map(SourceData.mappings['map.continent'])

        # remove problematic
@ -426,22 +426,83 @@ class CovidData:
              .sort_values('Cases.new', ascending=False))
        df['Fatality Rate'] /= 100

+        # add emoji flags
        df['emoji_flag'] = EmojiFlags.load().set_index(COL_REGION)[EmojiFlags.emoji_col]
        df['emoji_flag'] = df['emoji_flag'].fillna('')

+        # last dates
        df = self.add_last_dates(df)

+        # age adjusted data
        (df['age_adjusted_ifr'],
         df['population'],
         df['age_adjusted_icu_percentage']) = AgeAdjustedData.load()

+        # add per population columns
        df.dropna(subset=['population'], inplace=True)
+        for col in [self.CASES_TOT, self.DEATHS_TOT, self.CASES_NEW, self.DEATHS_NEW]:
+            df[f'{col}{self.PER_100K_SUFFIX}'] = df[col] * 1e5 / df['population']

-        for col, per_100k_col in zip(self.ABS_COLS, self.PER_100K_COLS):
-            df[per_100k_col] = df[col] * 1e5 / df['population']
+        # add ICU capacity data
+        df_beds = self.beds_df()
+        df['icu_capacity_per100k'] = df_beds['icu_per_100k']

        return df

+    def calculate_testing_biases_dft(
+            self, ifrs: pd.Series, min_window_lag = 60, min_window_deaths = 300
+    ) -> pd.DataFrame:
+        deaths_dft = self.dft_deaths
+        cases_dft = self.dft_cases_backfilled
+
+        def biases_vec(country: str) -> pd.Series:
+            d_vec = deaths_dft.loc[country].values
+            c_vec = cases_dft.loc[country].values
+            ifr = ifrs.loc[country]
+            left, right = self.death_lag, self.death_lag + min_window_lag
+            biases = np.ones_like(c_vec)
+
+            # short circuit and fallback if not enough data for windowed calculations
+            if d_vec[-1] < min_window_deaths:
+                if d_vec[-1] > 0:
+                    biases[:] = (d_vec[-1] / c_vec[-1]) / ifr
+                else:
+                    pass  # just return ones
+
+            else:
+                def diff_deaths(right, left):
+                    return d_vec[right] - d_vec[left]
+
+                def diff_cases(right, left):
+                    return c_vec[right - self.death_lag] - c_vec[left - self.death_lag]
+
+                while right <= (len(c_vec) - 1):
+                    if ((right - left) < min_window_lag or
+                            diff_deaths(right, left) < min_window_deaths):
+                        # grow window to the right if needed
+                        right += 1
+                        continue
+
+                    while ((right - left) > min_window_lag and
+                           diff_deaths(right, left) > min_window_deaths):
+                        # shrink window from the left if possible
+                        left += 1
+
+                    biases[right] = ((diff_deaths(right, left) / diff_cases(right, left))
+                                     / ifr)
+                    # advance left every time to prevent infinite loop
+                    left += 1
+
+                # use first non 1 (initialised) value to fill the initial values
+                fill_ind = np.where(biases != 1)[0][0]
+                biases[:fill_ind] = biases[fill_ind]
+
+            return pd.Series(biases, index=self.dt_cols)
+
+        testing_biases_dft = ifrs.index.to_series().apply(biases_vec)
+        testing_biases_dft[testing_biases_dft < 1] = 1
+        return testing_biases_dft
+
    def table_with_estimated_cases(self):
        """
        Assumptions:
@ -453,29 +514,27 @@ class CovidData:
                didn't change significantly during the last 8 days.
            - Recent new cases can be adjusted using the same testing_ratio bias.
        """
-
        df = self.overview_table_with_extra_data()

-        lagged_mortality_rate = (self.dfc_deaths + 1) / (self.lagged_cases(self.death_lag) + 2)
-        testing_bias = lagged_mortality_rate / df['age_adjusted_ifr']
-        testing_bias[testing_bias < 1] = 1
+        self.testing_biases_dft = self.calculate_testing_biases_dft(
+            df['age_adjusted_ifr'])

-        df['lagged_fatality_rate'] = lagged_mortality_rate
-        df['testing_bias'] = testing_bias
+        # adjust daily cases by closest approximation of testing bias at that point
+        cases_dft = self.dft_cases_backfilled
+        self.cases_est_dft = (cases_dft.diff(axis=1) * self.testing_biases_dft
+                              ).cumsum(axis=1).fillna(0).astype(int)

-        for col, est_col in zip(self.CASES_COLS, self.EST_COLS):
-            df[est_col] = df['testing_bias'] * df[col]
+        df['current_testing_bias'] = self.testing_biases_dft.iloc[:, -1]

-        return df.sort_values('Cases.new.est', ascending=False)
+        # total cases
+        df[f'{self.CASES_TOT}.est'] = self.cases_est_dft[self.dt_cols[-1]]
+        df[f'{self.CASES_TOT}{self.PER_100K_SUFFIX}.est'] = (
+                df[f'{self.CASES_TOT}.est'] * 1e5 / df['population'])

-    def table_with_icu_capacities(self):
-        df = self.table_with_estimated_cases()
+        # new cases just need adjustments with current bias
+        for col in [self.CASES_NEW, f'{self.CASES_NEW}{self.PER_100K_SUFFIX}']:
+            df[f'{col}.est'] = df['current_testing_bias'] * df[col]

-        df_beds = self.beds_df()
-
-        df['icu_capacity_per100k'] = df_beds['icu_per_100k']
-
-        df['icu_spare_capacity_per100k'] = df['icu_capacity_per100k'] * self.icu_spare_capacity_ratio
        return df

    @classmethod
@ -494,9 +553,9 @@ class CovidData:
    def smoothed_growth_rates(self, n_days):
        recent_dates = self.dt_cols[-n_days:]

-        cases = (self.dft_cases_backfilled[recent_dates] + 1)  # with pseudo counts
+        cases = self.cases_est_dft[recent_dates] + 1  # with pseudo counts

-        diffs = self.dft_cases_backfilled.diff(axis=1)[recent_dates]
+        diffs = self.cases_est_dft.diff(axis=1)[recent_dates]
        diffs[diffs < 0] = 0  # total cases cannot go down

        cases, diffs = cases.T, diffs.T  # broadcasting works correctly this way
@ -515,8 +574,9 @@ class CovidData:

        return weighted_mean - 1, weighted_std

-    def table_with_projections(self, projection_days=(7, 14, 30), debug_dfs=False):
-        df = self.table_with_icu_capacities()
+    def table_with_current_rates_and_ratios(
+            self) -> Tuple[pd.DataFrame, List[pd.Series], List[pd.Series]]:
+        df = self.table_with_estimated_cases()

        df['affected_ratio'] = df['Cases.total'] / df['population']

@ -524,12 +584,21 @@ class CovidData:

        past_active, past_recovered = self._calculate_recovered_and_active_until_now(df)

-        df['transmission_rate'], df['transmission_rate_std'] = Model.growth_to_infection_rate(
+        df['current_active_ratio'] = past_active[-1].fillna(0)
+        df['current_recovered_ratio'] = past_recovered[-1].fillna(0)
+
+        df['transmission_rate'], df['transmission_rate_std'] = Model.growth_to_transmission_rate(
            growth=df['growth_rate'],
-            rec=past_recovered[-1],
-            act=past_active[-1],
+            rec=df['current_recovered_ratio'],
+            act=df['current_active_ratio'],
            growth_std=df['growth_rate_std'])

+        return df, past_active, past_recovered
+
+    def table_with_projections(self, projection_days=(7, 14, 30), debug_dfs=False):
+
+        df, past_active, past_recovered = self.table_with_current_rates_and_ratios()
+
        df, traces = Model.run_model_forward(
            df,
            past_active=past_active.copy(),
@ -546,9 +615,8 @@ class CovidData:
        return df

    def _calculate_recovered_and_active_until_now(self, df):
-        # estimated daily cases ratio of population
-        lagged_cases_ratios = (self.dft_cases_backfilled[self.dt_cols].T *
-                               df['testing_bias'].T / df['population'].T).T
+        # estimated daily cases ratios of population
+        lagged_cases_ratios = (self.cases_est_dft[self.dt_cols].T / df['population'].T).T
        # protect from testing bias over-inflation
        lagged_cases_ratios[lagged_cases_ratios > 1] = 1

@ -557,12 +625,18 @@ class CovidData:
        actives, recs = [], []
        zeros_series = lagged_cases_ratios[self.dt_cols[0]] * 0  # this is to have consistent types
        for day in range(len(self.dt_cols)):
+            # previous day
            prev_rec = recs[day - 1] if day > 0 else zeros_series
+            # lagged recoveries
            tot_lagged_9 = lagged_cases_ratios[self.dt_cols[day - 9]] if day >= 9 else zeros_series
            new_recs = prev_rec + (tot_lagged_9 - prev_rec) * Model.recovery_lagged9_rate
-            new_recs[new_recs > 1] = 1
+            # clip recoveries by current cases
+            cur_cases = lagged_cases_ratios[self.dt_cols[day]]
+            new_recs[new_recs > cur_cases] = cur_cases[new_recs > cur_cases]
+            new_actives = cur_cases - new_recs
+            # assign
            recs.append(new_recs)
-            actives.append(lagged_cases_ratios[self.dt_cols[day]] - new_recs)
+            actives.append(new_actives)

        return actives, recs

@ -581,7 +655,7 @@ class Model:
                          projection_days,
                          ):

-        sus, act, rec = cls._run_sir_mode(
+        sus, act, rec = cls._run_sir_model(
            past_recovered, past_active, df['growth_rate'], n_days=projection_days[-1])

        # sample more growth rates
@ -592,7 +666,7 @@ class Model:
        for ratio in np.linspace(-1, 1, 10):
            pert_growth = df['growth_rate'] + ratio * df['growth_rate_std']
            pert_growth[pert_growth < 0] = 0
-            sus_other, act_other, rec_other = cls._run_sir_mode(
+            sus_other, act_other, rec_other = cls._run_sir_model(
                past_recovered, past_active, pert_growth, n_days=projection_days[-1])
            for s_list, s in zip(sus_lists, sus_other):
                s_list.append(s)
@ -636,7 +710,7 @@ class Model:
        return df, traces

    @classmethod
-    def growth_to_infection_rate(cls, growth, rec, act, growth_std=None):
+    def growth_to_transmission_rate(cls, growth, rec, act, growth_std=None):
        daily_delta = growth
        tot = rec + act
        active = act
@ -662,10 +736,10 @@ class Model:
        return infect_rate, infect_std

    @classmethod
-    def _run_sir_mode(cls, past_rec, past_act, growth, n_days):
+    def _run_sir_model(cls, past_rec, past_act, growth, n_days):
        rec, act = past_rec.copy(), past_act.copy()

-        infect_rate, _ = cls.growth_to_infection_rate(growth, rec[-1], act[-1])
+        infect_rate, _ = cls.growth_to_transmission_rate(growth, rec[-1], act[-1])

        # simulate
        for i in range(n_days):
@ -771,7 +845,7 @@ def altair_sir_plot(df_alt, default_country):
 def altair_multiple_countries_infected(df_alt_all,
                                       countries,
                                       title,
-                                       days_back=90,
+                                       days_back=120,
                                       marker_day=10):
    if not len(countries):
        return
@ -889,21 +963,19 @@ class GeoMap:
    @classmethod
    def make_map_figure(cls,
                        df_plot_geo,
-                        col='transmission_rate',
-                        title='Transmission rate<br>percent (blue-red)',
-                        subtitle='Transmission rate: over 5% (red) '
-                                 'spreading, under 5% (blue) recovering'):
+                        col,
+                        colorbar_title,
+                        subtitle,
+                        err_col=None,
+                        hover_text_func=None,
+                        scale_max=None,
+                        colorscale='Bluered',
+                        ):
        import plotly.graph_objects as go

-        df_plot_geo['text'] = (df_plot_geo.apply(
-            lambda r: (
-                "<br>"
-                f"Cases (reported): {r['Cases.total']:,.0f} (+<b>{r['Cases.new']:,.0f}</b>)<br>"
-                f"Cases (estimated): {r['Cases.total.est']:,.0f} (+<b>{r['Cases.new.est']:,.0f}</b>)<br>"
-                f"Affected percent: <b>{r['affected_ratio.est']:.1%}</b><br>"
-                f"Transmission rate: <b>{r['transmission_rate']:.1%}</b> ± {r['transmission_rate_std']:.1%}<br>"
-                f"Deaths: {r['Deaths.total']:,.0f} (+<b>{r['Deaths.new']:,.0f}</b>)<br>"
-            ), axis=1))
+        # hover text
+        hover_text_func = hover_text_func if callable(hover_text_func) else lambda r: ''
+        df_plot_geo['text'] = df_plot_geo.apply(hover_text_func, axis=1)

        percent = ('rate' in col or 'ratio' in col)

@ -913,26 +985,26 @@ class GeoMap:
                geojson=df_plot_geo['geometry'].__geo_interface__,
                z=df_plot_geo[col].fillna(float('nan')) * (100 if percent else 1),
                zmin=0,
-                zmax=10,
+                zmax=scale_max,
                text=df_plot_geo['text'],
                ids=df_plot_geo['country'],
                customdata=cls.error_series_to_string_list(
                    series=df_plot_geo[col],
-                    err_series=df_plot_geo['transmission_rate_std'],
+                    err_series=df_plot_geo[err_col] if err_col else None,
                    percent=percent
                ),
                hovertemplate="<b>%{id}</b>:<br><b>%{z:.1f}%{customdata}</b><br>%{text}<extra></extra>",
-                colorscale='BLuered',
+                colorscale=colorscale,
+                colorbar={'title': {'text': f'<b>{colorbar_title}</b>'}},
                autocolorscale=False,
                marker_line_color='#9fa8ad',
                marker_line_width=0.5,
-                colorbar_title=f'<b>{title}</b>',
            ))

        fig.update_layout(
            title={'text': f"<b>Map of</b>: {subtitle}", 'y': 0.875, 'x': 0.005},
            annotations=[
-                dict(text="Data<br>choice:", showarrow=False, x=0.005, y=1.075, yref="paper", align="left")
+                dict(text="Map<br>choice:", showarrow=False, x=0.005, y=1.075, yref="paper", align="left")
            ],
            width=800,
            height=450,
@ -966,7 +1038,8 @@ class GeoMap:

    @classmethod
    def button_dict(cls, series, title, colorscale, scale_max=None,
-                    percent=False, subtitle=None, err_series=None):
+                    percent=False, subtitle=None, err_series=None,
+                    hover_text_list=None, colorbar_title=None):
        import plotly.express as px

        series = series.fillna(float('nan'))
@ -979,14 +1052,21 @@ class GeoMap:

        max_arg = series.max() if scale_max is None else min(scale_max, series.max())

-        return dict(args=[
-            {'z': [series.to_list()],
+        data_args_dict = {
+            'z': [series.to_list()],
            'zmax': [max_arg],
-             'colorbar': [{'title': {'text': f'<b>{title}</b>'}}],
+            'colorbar': [{'title': {'text': f'<b>{colorbar_title or title}</b>'}}],
            'colorscale': [scale_arg],
            'customdata': [cls.error_series_to_string_list(
                series, err_series=err_series, percent=percent)]
-             },
+        }
+
+        if hover_text_list:
+            data_args_dict['text'] = [hover_text_list]
+
+        return dict(args=[data_args_dict,
                          {'title': {'text': f"<b>Map of</b>: {subtitle}",
-                       'y': 0.875, 'x': 0.005}}],
-            label=title, method="update")
+                                     'y': 0.875, 'x': 0.005}}
+                          ],
+                    label=title,
+                    method="update")
--- a/_notebooks/requirements.txt
+++ b/_notebooks/requirements.txt
@ -4,6 +4,7 @@ seaborn
 papermill
 ipykernel
 jupyter
+jupytext
 folium
 plotly
 plotnine
--- a/images/micromorts.png
+++ b/images/micromorts.png