diff --git a/dowhy/gcm/model_evaluation.py b/dowhy/gcm/model_evaluation.py index f3d714c9c..41ae15dbd 100644 --- a/dowhy/gcm/model_evaluation.py +++ b/dowhy/gcm/model_evaluation.py @@ -42,7 +42,7 @@ from dowhy.gcm.ml.classification import ( create_polynom_logistic_regression_classifier, ) from dowhy.gcm.ml.regression import create_ada_boost_regressor, create_extra_trees_regressor, create_polynom_regressor -from dowhy.gcm.stats import merge_p_values_average +from dowhy.gcm.stats import merge_p_values_fdr from dowhy.gcm.util.general import is_categorical, set_random_seed, shape_into_2d from dowhy.graph import get_ordered_predecessors, is_root_node @@ -598,7 +598,7 @@ def _evaluate_invertibility_assumptions( parent_samples[random_indices], ) ) - all_pnl_p_values[node] = merge_p_values_average(tmp_p_values) + all_pnl_p_values[node] = merge_p_values_fdr(tmp_p_values) if len(all_pnl_p_values) == 0: return all_pnl_p_values diff --git a/dowhy/gcm/stats.py b/dowhy/gcm/stats.py index ea43fad86..d641efaba 100644 --- a/dowhy/gcm/stats.py +++ b/dowhy/gcm/stats.py @@ -3,6 +3,7 @@ from typing import Callable, List, Optional, Union import numpy as np from scipy import stats from sklearn.linear_model import LinearRegression +from statsmodels.stats.multitest import multipletests from dowhy.gcm.constant import EPS from dowhy.gcm.util.general import shape_into_2d @@ -85,6 +86,32 @@ def merge_p_values_quantile( return float(min(1.0, np.quantile(p_values / quantile, quantile))) +def merge_p_values_fdr(p_values: Union[np.ndarray, List[float]], fdr_method: str = "fdr_bh") -> float: + """Merges p-values to represent the global null hypothesis that all hypotheses represented by the p-values are true. + + Here, we first adjust the given p-values based on the provided false discovery rate (FDR) control method, and then + return the minimum. + + :param p_values: A list or array of p-values. + :param fdr_method: The false discovery rate control method. For various options, please refer to + `this page `_. + :return: The minimum p-value after adjusting based on the given FDR method. + """ + if len(p_values) == 0: + raise ValueError("Given list of p-values is empty!") + + p_values = np.array(p_values) + + if np.all(np.isnan(p_values)): + return float(np.nan) + + p_values = p_values[~np.isnan(p_values)] + + # Note: The alpha level doesn't matter here. + multipletests_result = multipletests(p_values, 0.05, method=fdr_method) + return min(multipletests_result[1]) + + def marginal_expectation( prediction_method: Callable[[np.ndarray], np.ndarray], feature_samples: np.ndarray, diff --git a/tests/gcm/test_stats.py b/tests/gcm/test_stats.py index b588a863e..bc3737159 100644 --- a/tests/gcm/test_stats.py +++ b/tests/gcm/test_stats.py @@ -9,7 +9,13 @@ from dowhy.gcm.ml import ( create_linear_regressor, create_logistic_regression_classifier, ) -from dowhy.gcm.stats import estimate_ftest_pvalue, marginal_expectation, merge_p_values_average, merge_p_values_quantile +from dowhy.gcm.stats import ( + estimate_ftest_pvalue, + marginal_expectation, + merge_p_values_average, + merge_p_values_fdr, + merge_p_values_quantile, +) from dowhy.gcm.util.general import geometric_median @@ -55,6 +61,15 @@ def test_given_p_values_with_scaling_when_merge_p_values_quantile_then_returns_s assert merge_p_values_quantile(p_values, p_values_scaling, quantile=0.75) == approx(0.193, abs=0.001) +def test_given_p_values_when_merge_p_values_fdr_then_returns_expected_p_vlaue(): + assert merge_p_values_fdr([0]) == 0 + assert merge_p_values_fdr([1]) == 1 + assert merge_p_values_fdr([0.3]) == 0.3 + assert merge_p_values_fdr([0, 1]) == 0.0 + assert merge_p_values_fdr([0.1, 0.2, 0.5]) == approx(0.3) + assert merge_p_values_fdr([0.1, np.nan, 0.2, 0.5, np.nan]) == approx(0.3) + + def test_given_invalid_inputs_when_merge_p_values_quantile_then_raises_error(): with pytest.raises(ValueError): assert merge_p_values_quantile(np.array([0.1, 0.5, 1]), quantile=0)