Add new method to merge p-values using FDR
The method merges p-values by taking the minimum of the adjusted p-values. This then represents a p-value for a global hypothesis. Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
This commit is contained in:
Родитель
cb82358aba
Коммит
57fd684450
|
@ -42,7 +42,7 @@ from dowhy.gcm.ml.classification import (
|
|||
create_polynom_logistic_regression_classifier,
|
||||
)
|
||||
from dowhy.gcm.ml.regression import create_ada_boost_regressor, create_extra_trees_regressor, create_polynom_regressor
|
||||
from dowhy.gcm.stats import merge_p_values_average
|
||||
from dowhy.gcm.stats import merge_p_values_fdr
|
||||
from dowhy.gcm.util.general import is_categorical, set_random_seed, shape_into_2d
|
||||
from dowhy.graph import get_ordered_predecessors, is_root_node
|
||||
|
||||
|
@ -598,7 +598,7 @@ def _evaluate_invertibility_assumptions(
|
|||
parent_samples[random_indices],
|
||||
)
|
||||
)
|
||||
all_pnl_p_values[node] = merge_p_values_average(tmp_p_values)
|
||||
all_pnl_p_values[node] = merge_p_values_fdr(tmp_p_values)
|
||||
|
||||
if len(all_pnl_p_values) == 0:
|
||||
return all_pnl_p_values
|
||||
|
|
|
@ -3,6 +3,7 @@ from typing import Callable, List, Optional, Union
|
|||
import numpy as np
|
||||
from scipy import stats
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from statsmodels.stats.multitest import multipletests
|
||||
|
||||
from dowhy.gcm.constant import EPS
|
||||
from dowhy.gcm.util.general import shape_into_2d
|
||||
|
@ -85,6 +86,32 @@ def merge_p_values_quantile(
|
|||
return float(min(1.0, np.quantile(p_values / quantile, quantile)))
|
||||
|
||||
|
||||
def merge_p_values_fdr(p_values: Union[np.ndarray, List[float]], fdr_method: str = "fdr_bh") -> float:
|
||||
"""Merges p-values to represent the global null hypothesis that all hypotheses represented by the p-values are true.
|
||||
|
||||
Here, we first adjust the given p-values based on the provided false discovery rate (FDR) control method, and then
|
||||
return the minimum.
|
||||
|
||||
:param p_values: A list or array of p-values.
|
||||
:param fdr_method: The false discovery rate control method. For various options, please refer to
|
||||
`this page <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`_.
|
||||
:return: The minimum p-value after adjusting based on the given FDR method.
|
||||
"""
|
||||
if len(p_values) == 0:
|
||||
raise ValueError("Given list of p-values is empty!")
|
||||
|
||||
p_values = np.array(p_values)
|
||||
|
||||
if np.all(np.isnan(p_values)):
|
||||
return float(np.nan)
|
||||
|
||||
p_values = p_values[~np.isnan(p_values)]
|
||||
|
||||
# Note: The alpha level doesn't matter here.
|
||||
multipletests_result = multipletests(p_values, 0.05, method=fdr_method)
|
||||
return min(multipletests_result[1])
|
||||
|
||||
|
||||
def marginal_expectation(
|
||||
prediction_method: Callable[[np.ndarray], np.ndarray],
|
||||
feature_samples: np.ndarray,
|
||||
|
|
|
@ -9,7 +9,13 @@ from dowhy.gcm.ml import (
|
|||
create_linear_regressor,
|
||||
create_logistic_regression_classifier,
|
||||
)
|
||||
from dowhy.gcm.stats import estimate_ftest_pvalue, marginal_expectation, merge_p_values_average, merge_p_values_quantile
|
||||
from dowhy.gcm.stats import (
|
||||
estimate_ftest_pvalue,
|
||||
marginal_expectation,
|
||||
merge_p_values_average,
|
||||
merge_p_values_fdr,
|
||||
merge_p_values_quantile,
|
||||
)
|
||||
from dowhy.gcm.util.general import geometric_median
|
||||
|
||||
|
||||
|
@ -55,6 +61,15 @@ def test_given_p_values_with_scaling_when_merge_p_values_quantile_then_returns_s
|
|||
assert merge_p_values_quantile(p_values, p_values_scaling, quantile=0.75) == approx(0.193, abs=0.001)
|
||||
|
||||
|
||||
def test_given_p_values_when_merge_p_values_fdr_then_returns_expected_p_vlaue():
|
||||
assert merge_p_values_fdr([0]) == 0
|
||||
assert merge_p_values_fdr([1]) == 1
|
||||
assert merge_p_values_fdr([0.3]) == 0.3
|
||||
assert merge_p_values_fdr([0, 1]) == 0.0
|
||||
assert merge_p_values_fdr([0.1, 0.2, 0.5]) == approx(0.3)
|
||||
assert merge_p_values_fdr([0.1, np.nan, 0.2, 0.5, np.nan]) == approx(0.3)
|
||||
|
||||
|
||||
def test_given_invalid_inputs_when_merge_p_values_quantile_then_raises_error():
|
||||
with pytest.raises(ValueError):
|
||||
assert merge_p_values_quantile(np.array([0.1, 0.5, 1]), quantile=0)
|
||||
|
|
Загрузка…
Ссылка в новой задаче