Enable calling dowhy from econml (#400)

* add feature of calling dowhy through econml * fix bug for shap when parse const_marginal_effect
2021-02-11 11:34:37 -05:00 · 2021-02-11 11:34:37 -05:00 · f3e46f4097
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -91,6 +91,10 @@ jobs:
  - template: azure-pipelines-steps.yml
    parameters:
      body:
+        # Work around https://github.com/pypa/pip/issues/9542
+        - script: 'pip install -U numpy'
+          displayName: 'Upgrade numpy'
+
        - script: 'python setup.py pytest'
          displayName: 'Unit tests'
          env:
--- a/doc/conf.py
+++ b/doc/conf.py
@ -213,7 +213,9 @@ intersphinx_mapping = {'python': ('https://docs.python.org/3', None),
                       'numpy': ('https://docs.scipy.org/doc/numpy/', None),
                       'sklearn': ('https://scikit-learn.org/stable/', None),
                       'matplotlib': ('https://matplotlib.org/', None),
-                       'shap': ('https://shap.readthedocs.io/en/stable/', None)}
+                       'shap': ('https://shap.readthedocs.io/en/stable/', None),
+                       'dowhy': ('https://microsoft.github.io/dowhy/', None)}
+

 # -- Options for todo extension ----------------------------------------------

--- a/doc/reference.rst
+++ b/doc/reference.rst
@ -219,6 +219,17 @@ Inference Methods
    econml.inference.StatsModelsInferenceDiscrete


+.. _dowhy_api:
+
+Integration with DoWhy
+----------------------
+
+.. autosummary::
+    :toctree: _autosummary
+    
+    econml.dowhy.DoWhyWrapper
+
+
 .. _utilities_api:

 Utilities
--- a/econml/init.py
+++ b/econml/init.py
@ -7,4 +7,4 @@ __all__ = ['automated_ml', 'bootstrap',
           'inference', 'iv',
           'metalearners', 'ortho_forest', 'orf', 'ortho_iv',
           'score', 'sklearn_extensions', 'tree',
-           'two_stage_least_squares', 'utilities']
+           'two_stage_least_squares', 'utilities', 'dowhy']
--- a/econml/_cate_estimator.py
+++ b/econml/_cate_estimator.py
@ -15,6 +15,7 @@ from .inference import StatsModelsInference, StatsModelsInferenceDiscrete, Linea
    LinearModelFinalInferenceDiscrete, NormalInferenceResults, GenericSingleTreatmentModelFinalInference,\
    GenericModelFinalInferenceDiscrete
 from ._shap import _shap_explain_cme, _shap_explain_joint_linear_model_cate
+from .dowhy import DoWhyWrapper


 class BaseCateEstimator(metaclass=abc.ABCMeta):
@ -446,6 +447,18 @@ class BaseCateEstimator(metaclass=abc.ABCMeta):
        """
        pass

+    @property
+    def dowhy(self):
+        """ Get an instance of :class:`.DoWhyWrapper` to allow other functionalities from dowhy package.
+        (e.g. causal graph, refutation test, etc.)
+
+        Returns
+        -------
+        DoWhyWrapper: instance
+            An instance of :class:`.DoWhyWrapper`
+        """
+        return DoWhyWrapper(self)
+

 class LinearCateEstimator(BaseCateEstimator):
    """Base class for all CATE estimators with linear treatment effects in this package."""
--- a/econml/_shap.py
+++ b/econml/_shap.py
@ -84,7 +84,7 @@ def _shap_explain_cme(cme_model, X, d_t, d_y,
    return shap_outs


-def _shap_explain_model_cate(cme_model, models, X, d_t, d_y, feature_names=None,
+def _shap_explain_model_cate(cme_model, models, X, d_t, d_y, featurizer=None, feature_names=None,
                             treatment_names=None, output_names=None,
                             input_names=None, background_samples=100):
    """
@ -100,11 +100,13 @@ def _shap_explain_model_cate(cme_model, models, X, d_t, d_y, feature_names=None,
    models: a single estimator or a list of estimators with one estimator per treatment
        models for the model's final stage model.
    X: (m, d_x) matrix
-        Features for each sample. Should be in the same shape of fitted X in final stage.
+        Features for each sample.
    d_t: tuple of int
        Tuple of number of treatment (exclude control in discrete treatment scenario.
    d_y: tuple of int
        Tuple of number of outcome.
+    featurizer: optional None or instance of featurizer
+        Fitted Featurizer of feature X.
    feature_names: optional None or list of strings of length X.shape[1] (Default=None)
        The names of input features.
    treatment_names: optional None or list (Default=None)
@ -129,12 +131,16 @@ def _shap_explain_model_cate(cme_model, models, X, d_t, d_y, feature_names=None,
    output_names_, input_names_ = output_names, input_names
    (dt, dy, treatment_names, output_names, feature_names) = _define_names(d_t, d_y, treatment_names, output_names,
                                                                           feature_names, input_names)
+    if featurizer is not None:
+        F = featurizer.transform(X)
+    else:
+        F = X
    if not isinstance(models, list):
        models = [models]
    assert len(models) == dt, "Number of final stage models don't equals to number of treatments!"
    # define masker by using entire dataset, otherwise Explainer will only sample 100 obs by default.
-    bg_samples = X.shape[0] if background_samples is None else min(background_samples, X.shape[0])
-    background = shap.maskers.Independent(X, max_samples=bg_samples)
+    bg_samples = F.shape[0] if background_samples is None else min(background_samples, F.shape[0])
+    background = shap.maskers.Independent(F, max_samples=bg_samples)

    shap_outs = defaultdict(dict)
    for i in range(dt):
@ -144,12 +150,12 @@ def _shap_explain_model_cate(cme_model, models, X, d_t, d_y, feature_names=None,
        except Exception as e:
            print("Final model can't be parsed, explain const_marginal_effect() instead!", repr(e))
            return _shap_explain_cme(cme_model, X, d_t_, d_y_,
-                                     feature_names=feature_names_,
+                                     feature_names=None,
                                     treatment_names=treatment_names_,
                                     output_names=output_names_,
                                     input_names=input_names_,
                                     background_samples=background_samples)
-        shap_out = explainer(X)
+        shap_out = explainer(F)
        if dy > 1:
            for j in range(dy):
                base_values = shap_out.base_values[..., j]
@ -243,7 +249,8 @@ def _shap_explain_joint_linear_model_cate(model_final, X, d_t, d_y, fit_cate_int
    return shap_outs


-def _shap_explain_multitask_model_cate(cme_model, multitask_model_cate, X, d_t, d_y, feature_names=None,
+def _shap_explain_multitask_model_cate(cme_model, multitask_model_cate, X, d_t, d_y, featurizer=None,
+                                       feature_names=None,
                                       treatment_names=None, output_names=None,
                                       input_names=None, background_samples=100):
    """
@ -259,11 +266,13 @@ def _shap_explain_multitask_model_cate(cme_model, multitask_model_cate, X, d_t,
        the model's final stage model whose predict represents the const_marginal_effect for
        all treatments (or list of models, one for each outcome)
    X: (m, d_x) matrix
-        Features for each sample. Should be in the same shape of fitted X in final stage.
+        Features for each sample.
    d_t: tuple of int
        Tuple of number of treatment (exclude control in discrete treatment scenario).
    d_y: tuple of int
        Tuple of number of outcome.
+    featurizer: optional None or instance of featurizer
+        Fitted Featurizer of feature X.
    feature_names: optional None or list of strings of length X.shape[1] (Default=None)
        The names of input features.
    treatment_names: optional None or list (Default=None)
@ -288,12 +297,16 @@ def _shap_explain_multitask_model_cate(cme_model, multitask_model_cate, X, d_t,
    output_names_, input_names_ = output_names, input_names
    (dt, dy, treatment_names, output_names, feature_names) = _define_names(d_t, d_y, treatment_names, output_names,
                                                                           feature_names, input_names)
+    if featurizer is not None:
+        F = featurizer.transform(X)
+    else:
+        F = X
    if dy == 1 and (not isinstance(multitask_model_cate, list)):
        multitask_model_cate = [multitask_model_cate]

    # define masker by using entire dataset, otherwise Explainer will only sample 100 obs by default.
-    bg_samples = X.shape[0] if background_samples is None else min(background_samples, X.shape[0])
-    background = shap.maskers.Independent(X, max_samples=bg_samples)
+    bg_samples = F.shape[0] if background_samples is None else min(background_samples, F.shape[0])
+    background = shap.maskers.Independent(F, max_samples=bg_samples)
    shap_outs = defaultdict(dict)
    for j in range(dy):
        try:
@ -302,13 +315,13 @@ def _shap_explain_multitask_model_cate(cme_model, multitask_model_cate, X, d_t,
        except Exception as e:
            print("Final model can't be parsed, explain const_marginal_effect() instead!", repr(e))
            return _shap_explain_cme(cme_model, X, d_t_, d_y_,
-                                     feature_names=feature_names_,
+                                     feature_names=None,
                                     treatment_names=treatment_names_,
                                     output_names=output_names_,
                                     input_names=input_names_,
                                     background_samples=background_samples)

-        shap_out = explainer(X)
+        shap_out = explainer(F)
        if dt > 1:
            for i in range(dt):
                base_values = shap_out.base_values[..., i]
--- a/econml/dml/causal_forest.py
+++ b/econml/dml/causal_forest.py
@ -555,13 +555,11 @@ class CausalForestDML(_BaseDML):
        return imps.reshape(self._d_y + (-1,))

    def shap_values(self, X, *, feature_names=None, treatment_names=None, output_names=None, background_samples=100):
-        if self.featurizer_ is not None:
-            F = self.featurizer_.transform(X)
-        else:
-            F = X
        feature_names = self.cate_feature_names(feature_names)
-        return _shap_explain_multitask_model_cate(self.const_marginal_effect, self.model_cate.estimators_, F,
-                                                  self._d_t, self._d_y, feature_names=feature_names,
+
+        return _shap_explain_multitask_model_cate(self.const_marginal_effect, self.model_cate.estimators_, X,
+                                                  self._d_t, self._d_y, featurizer=self.featurizer_,
+                                                  feature_names=feature_names,
                                                  treatment_names=treatment_names,
                                                  output_names=output_names,
                                                  input_names=self._input_names,
--- a/econml/dml/dml.py
+++ b/econml/dml/dml.py
@ -1177,13 +1177,10 @@ class NonParamDML(_BaseDML):
    refit_final.__doc__ = _OrthoLearner.refit_final.__doc__

    def shap_values(self, X, *, feature_names=None, treatment_names=None, output_names=None, background_samples=100):
-        if self.featurizer_ is not None:
-            F = self.featurizer_.transform(X)
-        else:
-            F = X
        feature_names = self.cate_feature_names(feature_names)

-        return _shap_explain_model_cate(self.const_marginal_effect, self.model_cate, F, self._d_t, self._d_y,
+        return _shap_explain_model_cate(self.const_marginal_effect, self.model_cate, X, self._d_t, self._d_y,
+                                        featurizer=self.featurizer_,
                                        feature_names=feature_names,
                                        treatment_names=treatment_names,
                                        output_names=output_names,
--- a/econml/dowhy.py
+++ b/econml/dowhy.py
@ -0,0 +1,232 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+
+"""Helper class to allow other functionalities from dowhy package.
+
+References
+----------
+DoWhy, https://microsoft.github.io/dowhy/
+
+"""
+
+import inspect
+import pandas as pd
+import numpy as np
+import warnings
+from dowhy import CausalModel
+from econml.utilities import check_input_arrays, reshape_arrays_2dim
+
+
+class DoWhyWrapper:
+    """
+    A wrapper class to allow user call other methods from dowhy package through EconML.
+    (e.g. causal graph, refutation test, etc.)
+
+    Parameters
+    ----------
+    cate_estimator: instance
+        An instance of any CATE estimator we currently support
+    """
+
+    def __init__(self, cate_estimator):
+        self._cate_estimator = cate_estimator
+
+    def _get_params(self):
+        init = self._cate_estimator.__init__
+        # introspect the constructor arguments to find the model parameters
+        # to represent
+        init_signature = inspect.signature(init)
+        parameters = init_signature.parameters.values()
+        for p in parameters:
+            if p.kind == p.VAR_POSITIONAL or p.kind == p.VAR_KEYWORD:
+                raise RuntimeError("cate estimators should always specify their parameters in the signature "
+                                   "of their __init__ (no varargs, no varkwargs). "
+                                   f"{self._cate_estimator} with constructor {init_signature} doesn't "
+                                   "follow this convention.")
+        # Extract and sort argument names excluding 'self'
+        return sorted([p.name for p in parameters])
+
+    def fit(self, Y, T, X=None, W=None, Z=None, *, outcome_names=None, treatment_names=None, feature_names=None,
+            confounder_names=None, instrument_names=None, graph=None, estimand_type="nonparametric-ate",
+            proceed_when_unidentifiable=True, missing_nodes_as_confounders=False,
+            control_value=0, treatment_value=1, target_units="ate", **kwargs):
+        """
+        Estimate the counterfactual model from data through dowhy package.
+
+        Parameters
+        ----------
+        Y: vector of length n
+            Outcomes for each sample
+        T: vector of length n
+            Treatments for each sample
+        X: optional (n, d_x) matrix (Default=None)
+            Features for each sample
+        W: optional (n, d_w) matrix (Default=None)
+            Controls for each sample
+        Z: optional (n, d_z) matrix (Default=None)
+            Instruments for each sample
+        outcome_names: optional list (Default=None)
+            Name of the outcome
+        treatment_names: optional list (Default=None)
+            Name of the treatment
+        feature_names: optional list (Default=None)
+            Name of the features
+        confounder_names: optional list (Default=None)
+            Name of the confounders
+        instrument_names: optional list (Default=None)
+            Name of the instruments
+        graph: optional
+            Path to DOT file containing a DAG or a string containing a DAG specification in DOT format
+        estimand_type: optional string
+            Type of estimand requested (currently only "nonparametric-ate" is supported).
+            In the future, may support other specific parametric forms of identification
+        proceed_when_unidentifiable: optional bool (Default=True)
+            Whether the identification should proceed by ignoring potential unobserved confounders
+        missing_nodes_as_confounders: optional bool (Default=False)
+            Whether variables in the dataframe that are not included in the causal graph should be automatically
+            included as confounder nodes
+        control_value: optional scalar (Default=0)
+            Value of the treatment in the control group, for effect estimation
+        treatment_value: optional scalar (Default=1)
+            Value of the treatment in the treated group, for effect estimation
+        target_units: optional (Default="ate")
+            The units for which the treatment effect should be estimated.
+            This can be of three types:
+
+            1. A string for common specifications of target units (namely, "ate", "att" and "atc"),
+            2. A lambda function that can be used as an index for the data (pandas DataFrame),
+            3. A new DataFrame that contains values of the effect_modifiers and effect will be estimated
+               only for this new data
+
+        kwargs: optional
+            Other keyword arguments from fit method for CATE estimator
+
+        Returns
+        -------
+        self
+        """
+
+        Y, T, X, W, Z = check_input_arrays(Y, T, X, W, Z)
+
+        # create dataframe
+        n_obs = Y.shape[0]
+        Y, T, X, W, Z = reshape_arrays_2dim(n_obs, Y, T, X, W, Z)
+
+        # currently dowhy only support single outcome and single treatment
+        assert Y.shape[1] == 1, "Can only accept single dimensional outcome."
+        assert T.shape[1] == 1, "Can only accept single dimensional treatment."
+
+        # column names
+        if outcome_names is None:
+            outcome_names = [f"Y{i}" for i in range(Y.shape[1])]
+        if treatment_names is None:
+            treatment_names = [f"T{i}" for i in range(T.shape[1])]
+        if feature_names is None:
+            feature_names = [f"X{i}" for i in range(X.shape[1])]
+        if confounder_names is None:
+            confounder_names = [f"W{i}" for i in range(W.shape[1])]
+        if instrument_names is None:
+            instrument_names = [f"Z{i}" for i in range(Z.shape[1])]
+        column_names = outcome_names + treatment_names + feature_names + confounder_names + instrument_names
+        df = pd.DataFrame(np.hstack((Y, T, X, W, Z)), columns=column_names)
+        self.dowhy_ = CausalModel(
+            data=df,
+            treatment=treatment_names,
+            outcome=outcome_names,
+            graph=graph,
+            common_causes=feature_names + confounder_names if X.shape[1] > 0 or W.shape[1] > 0 else None,
+            instruments=instrument_names if Z.shape[1] > 0 else None,
+            effect_modifiers=feature_names if X.shape[1] > 0 else None,
+            estimand_type=estimand_type,
+            proceed_when_unidetifiable=proceed_when_unidentifiable,
+            missing_nodes_as_confounders=missing_nodes_as_confounders
+        )
+        self.identified_estimand_ = self.dowhy_.identify_effect(proceed_when_unidentifiable=True)
+        method_name = "backdoor." + self._cate_estimator.__module__ + "." + self._cate_estimator.__class__.__name__
+        init_params = {}
+        for p in self._get_params():
+            init_params[p] = getattr(self._cate_estimator, p)
+        self.estimate_ = self.dowhy_.estimate_effect(self.identified_estimand_,
+                                                     method_name=method_name,
+                                                     control_value=control_value,
+                                                     treatment_value=treatment_value,
+                                                     target_units=target_units,
+                                                     method_params={
+                                                         "init_params": init_params,
+                                                         "fit_params": kwargs,
+                                                     },
+                                                     )
+        return self
+
+    def refute_estimate(self, *, method_name, **kwargs):
+        """
+        Refute an estimated causal effect.
+
+        If method_name is provided, uses the provided method. In the future, we may support automatic
+        selection of suitable refutation tests.
+        Following refutation methods are supported:
+
+        - Adding a randomly-generated confounder: "random_common_cause"
+        - Adding a confounder that is associated with both treatment and outcome: "add_unobserved_common_cause"
+        - Replacing the treatment with a placebo (random) variable): "placebo_treatment_refuter"
+        - Removing a random subset of the data: "data_subset_refuter"
+
+        For more details, see docs :mod:`dowhy.causal_refuters`
+
+        Parameters
+        ----------
+        method_name: string
+            Name of the refutation method
+        kwargs: optional
+            Additional arguments that are passed directly to the refutation method.
+            Can specify a random seed here to ensure reproducible results ('random_seed' parameter).
+            For method-specific parameters, consult the documentation for the specific method.
+            All refutation methods are in the causal_refuters subpackage.
+
+        Returns
+        -------
+        RefuteResult: an instance of the RefuteResult class
+        """
+        return self.dowhy_.refute_estimate(
+            self.identified_estimand_, self.estimate_, method_name=method_name, **kwargs
+        )
+
+    # We don't allow user to call refit_final from this class, since internally dowhy effect estimate will only update
+    # cate estimator but not the effect.
+    def refit_final(self, inference=None):
+        raise AttributeError(
+            "Method refit_final is not allowed through a dowhy object; please perform a full fit instead.")
+
+    def __getattr__(self, attr):
+        # don't proxy special methods
+        if attr.startswith('__'):
+            raise AttributeError(attr)
+        elif attr in ['_cate_estimator', 'dowhy_',
+                      'identified_estimand_', 'estimate_']:
+            return super().__getattr__(attr)
+        elif attr.startswith('dowhy__'):
+            return getattr(self.dowhy_, attr[len('dowhy__'):])
+        elif hasattr(self.estimate_._estimator_object, attr):
+            if hasattr(self.dowhy_, attr):
+                warnings.warn("This call is ambiguous, "
+                              "we're defaulting to CATE estimator's attribute. "
+                              "Please add 'dowhy__' as prefix if you want to get dowhy attribute.", UserWarning)
+            return getattr(self.estimate_._estimator_object, attr)
+        else:
+            return getattr(self.dowhy_, attr)
+
+    def __setattr__(self, attr, value):
+        if attr in ['_cate_estimator', 'dowhy_',
+                    'identified_estimand_', 'estimate_']:
+            super().__setattr__(attr, value)
+        elif attr.startswith('dowhy__'):
+            setattr(self.dowhy_, attr[len('dowhy__'):], value)
+        elif hasattr(self.estimate_._estimator_object, attr):
+            if hasattr(self.dowhy_, attr):
+                warnings.warn("This call is ambiguous, "
+                              "we're defaulting to CATE estimator's attribute. "
+                              "Please add 'dowhy__' as prefix if you want to set dowhy attribute.", UserWarning)
+            setattr(self.estimate_._estimator_object, attr, value)
+        else:
+            setattr(self.dowhy_, attr, value)
--- a/econml/dr/_drlearner.py
+++ b/econml/dr/_drlearner.py
@ -637,15 +637,12 @@ class DRLearner(_OrthoLearner):
        return self.ortho_learner_model_final_.models_cate

    def shap_values(self, X, *, feature_names=None, treatment_names=None, output_names=None, background_samples=100):
-        if self.featurizer_ is not None:
-            F = self.featurizer_.transform(X)
-        else:
-            F = X
        feature_names = self.cate_feature_names(feature_names)

        if self.ortho_learner_model_final_._multitask_model_final:
-            return _shap_explain_multitask_model_cate(self.const_marginal_effect, self.multitask_model_cate, F,
+            return _shap_explain_multitask_model_cate(self.const_marginal_effect, self.multitask_model_cate, X,
                                                      self._d_t, self._d_y,
+                                                      featurizer=self.featurizer_,
                                                      feature_names=feature_names,
                                                      treatment_names=treatment_names,
                                                      output_names=output_names,
@ -653,7 +650,8 @@ class DRLearner(_OrthoLearner):
                                                      background_samples=background_samples)
        else:
            return _shap_explain_model_cate(self.const_marginal_effect, self.fitted_models_final,
-                                            F, self._d_t, self._d_y,
+                                            X, self._d_t, self._d_y,
+                                            featurizer=self.featurizer_,
                                            feature_names=feature_names,
                                            treatment_names=treatment_names,
                                            output_names=output_names,
--- a/econml/metalearners/_metalearners.py
+++ b/econml/metalearners/_metalearners.py
@ -39,12 +39,7 @@ class TLearner(TreatmentExpansionMixin, LinearCateEstimator):
                 models,
                 categories='auto'):
        self.models = clone(models, safe=False)
-        if categories != 'auto':
-            categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-        self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first')
-        self.transformer = FunctionTransformer(
-            func=_EncoderWrapper(self._one_hot_encoder).encode,
-            validate=False)
+        self.categories = categories
        super().__init__()

    @_deprecate_positional("X should be passed by keyword only. In a future release "
@ -74,8 +69,17 @@ class TLearner(TreatmentExpansionMixin, LinearCateEstimator):
        self : an instance of self.

        """
+
        # Check inputs
        Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
+
+        if self.categories != 'auto':
+            self.categories = [self.categories]  # OneHotEncoder expects a 2D array with features per column
+        self._one_hot_encoder = OneHotEncoder(categories=self.categories, sparse=False, drop='first')
+        self.transformer = FunctionTransformer(
+            func=_EncoderWrapper(self._one_hot_encoder).encode,
+            validate=False)
+
        T = self._one_hot_encoder.fit_transform(T.reshape(-1, 1))
        self._d_t = T.shape[1:]
        T = inverse_onehot(T)
@ -128,15 +132,7 @@ class SLearner(TreatmentExpansionMixin, LinearCateEstimator):
                 overall_model,
                 categories='auto'):
        self.overall_model = clone(overall_model, safe=False)
-        if categories != 'auto':
-            categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-        # Note: unlike other Metalearners, we don't drop the first column because
-        # we concatenate all treatments to the other features;
-        # We might want to revisit, though, since it's linearly determined by the others
-        self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False)
-        self.transformer = FunctionTransformer(
-            func=_EncoderWrapper(self._one_hot_encoder, drop_first=True).encode,
-            validate=False)
+        self.categories = categories
        super().__init__()

    @_deprecate_positional("X should be passed by keyword only. In a future release "
@ -169,6 +165,17 @@ class SLearner(TreatmentExpansionMixin, LinearCateEstimator):
        if X is None:
            X = np.zeros((Y.shape[0], 1))
        Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
+
+        if self.categories != 'auto':
+            self.categories = [self.categories]  # OneHotEncoder expects a 2D array with features per column
+        # Note: unlike other Metalearners, we don't drop the first column because
+        # we concatenate all treatments to the other features;
+        # We might want to revisit, though, since it's linearly determined by the others
+        self._one_hot_encoder = OneHotEncoder(categories=self.categories, sparse=False)
+        self.transformer = FunctionTransformer(
+            func=_EncoderWrapper(self._one_hot_encoder, drop_first=True).encode,
+            validate=False)
+
        T = self._one_hot_encoder.fit_transform(T.reshape(-1, 1))
        self._d_t = (T.shape[1] - 1,)
        feat_arr = np.concatenate((X, T), axis=1)
@ -238,12 +245,7 @@ class XLearner(TreatmentExpansionMixin, LinearCateEstimator):
        self.models = clone(models, safe=False)
        self.cate_models = clone(cate_models, safe=False)
        self.propensity_model = clone(propensity_model, safe=False)
-        if categories != 'auto':
-            categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-        self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first')
-        self.transformer = FunctionTransformer(
-            func=_EncoderWrapper(self._one_hot_encoder).encode,
-            validate=False)
+        self.categories = categories
        super().__init__()

    @_deprecate_positional("X should be passed by keyword only. In a future release "
@ -274,6 +276,14 @@ class XLearner(TreatmentExpansionMixin, LinearCateEstimator):
        """
        # Check inputs
        Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
+
+        if self.categories != 'auto':
+            self.categories = [self.categories]  # OneHotEncoder expects a 2D array with features per column
+        self._one_hot_encoder = OneHotEncoder(categories=self.categories, sparse=False, drop='first')
+        self.transformer = FunctionTransformer(
+            func=_EncoderWrapper(self._one_hot_encoder).encode,
+            validate=False)
+
        if Y.ndim == 2 and Y.shape[1] == 1:
            Y = Y.flatten()
        T = self._one_hot_encoder.fit_transform(T.reshape(-1, 1))
@ -366,12 +376,7 @@ class DomainAdaptationLearner(TreatmentExpansionMixin, LinearCateEstimator):
        self.models = clone(models, safe=False)
        self.final_models = clone(final_models, safe=False)
        self.propensity_model = clone(propensity_model, safe=False)
-        if categories != 'auto':
-            categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-        self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first')
-        self.transformer = FunctionTransformer(
-            func=_EncoderWrapper(self._one_hot_encoder).encode,
-            validate=False)
+        self.categories = categories
        super().__init__()

    @_deprecate_positional("X should be passed by keyword only. In a future release "
@ -402,6 +407,14 @@ class DomainAdaptationLearner(TreatmentExpansionMixin, LinearCateEstimator):
        """
        # Check inputs
        Y, T, X, _ = check_inputs(Y, T, X, multi_output_T=False)
+
+        if self.categories != 'auto':
+            self.categories = [self.categories]  # OneHotEncoder expects a 2D array with features per column
+        self._one_hot_encoder = OneHotEncoder(categories=self.categories, sparse=False, drop='first')
+        self.transformer = FunctionTransformer(
+            func=_EncoderWrapper(self._one_hot_encoder).encode,
+            validate=False)
+
        T = self._one_hot_encoder.fit_transform(T.reshape(-1, 1))
        self._d_t = T.shape[1:]
        T = inverse_onehot(T)
@ -468,6 +481,7 @@ class DomainAdaptationLearner(TreatmentExpansionMixin, LinearCateEstimator):

    def shap_values(self, X, *, feature_names=None, treatment_names=None, output_names=None, background_samples=100):
        return _shap_explain_model_cate(self.const_marginal_effect, self.final_models, X, self._d_t, self._d_y,
+                                        featurizer=None,
                                        feature_names=feature_names,
                                        treatment_names=treatment_names,
                                        output_names=output_names,
--- a/econml/orf/_ortho_forest.py
+++ b/econml/orf/_ortho_forest.py
@ -247,6 +247,7 @@ class BaseOrthoForest(TreatmentExpansionMixin, LinearCateEstimator):
        self.backend = backend
        self.verbose = verbose
        self.batch_size = batch_size
+        self.categories = categories
        super().__init__()

    @_deprecate_positional("X and W should be passed by keyword only. In a future release "
@ -581,10 +582,6 @@ class DMLOrthoForest(BaseOrthoForest):
            self.lambda_reg)
        # Define
        moment_and_mean_gradient_estimator = _DMLOrthoForest_moment_and_mean_gradient_estimator_func
-        if discrete_treatment:
-            if categories != 'auto':
-                categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-            self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first')
        super().__init__(
            nuisance_estimator,
            second_stage_nuisance_estimator,
@ -640,6 +637,11 @@ class DMLOrthoForest(BaseOrthoForest):
        -------
        self: an instance of self.
        """
+        if self.discrete_treatment:
+            if self.categories != 'auto':
+                self.categories = [self.categories]  # OneHotEncoder expects a 2D array with features per column
+            self._one_hot_encoder = OneHotEncoder(categories=self.categories, sparse=False, drop='first')
+
        self._set_input_names(Y, T, X, set_flag=True)
        Y, T, X, W = check_inputs(Y, T, X, W)
        if self.discrete_treatment:
@ -910,6 +912,7 @@ class DROrthoForest(BaseOrthoForest):
                 verbose=3,
                 batch_size='auto',
                 random_state=None):
+        self.lambda_reg = lambda_reg
        # Copy and/or define models
        self.propensity_model = clone(propensity_model, safe=False)
        self.model_Y = clone(model_Y, safe=False)
@ -928,12 +931,9 @@ class DROrthoForest(BaseOrthoForest):
        # Define parameter estimators
        parameter_estimator = DROrthoForest.parameter_estimator_func
        second_stage_parameter_estimator = DROrthoForest.second_stage_parameter_estimator_gen(
-            lambda_reg)
+            self.lambda_reg)
        # Define moment and mean gradient estimator
        moment_and_mean_gradient_estimator = DROrthoForest.moment_and_mean_gradient_estimator_func
-        if categories != 'auto':
-            categories = [categories]  # OneHotEncoder expects a 2D array with features per column
-        self._one_hot_encoder = OneHotEncoder(categories=categories, sparse=False, drop='first')

        super().__init__(
            nuisance_estimator,
@ -983,6 +983,10 @@ class DROrthoForest(BaseOrthoForest):
        -------
        self: an instance of self.
        """
+        if self.categories != 'auto':
+            self.categories = [self.categories]  # OneHotEncoder expects a 2D array with features per column
+        self._one_hot_encoder = OneHotEncoder(categories=self.categories, sparse=False, drop='first')
+
        self._set_input_names(Y, T, X, set_flag=True)
        Y, T, X, W = check_inputs(Y, T, X, W)
        # Check that T is shape (n, )
--- a/econml/tests/test_dowhy.py
+++ b/econml/tests/test_dowhy.py
@ -0,0 +1,66 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import numpy as np
+import unittest
+from econml.dml import LinearDML, CausalForestDML
+from econml.orf import DROrthoForest
+from econml.dr import DRLearner
+from econml.metalearners import XLearner
+from econml.iv.dml import DMLATEIV
+from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso
+
+
+class TestDowhy(unittest.TestCase):
+
+    def _get_data(self):
+        X = np.random.normal(0, 1, size=(500, 5))
+        T = np.random.binomial(1, .5, size=(500,))
+        Y = np.random.normal(0, 1, size=(500,))
+        Z = np.random.normal(0, 1, size=(500,))
+        return Y, T, X[:, [0]], X[:, 1:], Z
+
+    def test_dowhy(self):
+        def reg():
+            return LinearRegression()
+
+        def clf():
+            return LogisticRegression()
+
+        Y, T, X, W, Z = self._get_data()
+        # test at least one estimator from each category
+        models = {"dml": LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
+                                   linear_first_stages=False),
+                  "dr": DRLearner(model_propensity=clf(), model_regression=reg(),
+                                  model_final=reg()),
+                  "xlearner": XLearner(models=reg(), cate_models=reg(), propensity_model=clf()),
+                  "cfdml": CausalForestDML(model_y=reg(), model_t=clf(), discrete_treatment=True),
+                  "orf": DROrthoForest(n_trees=10, propensity_model=clf(), model_Y=reg()),
+                  "dmlateiv": DMLATEIV(model_Y_W=reg(),
+                                       model_T_W=clf(),
+                                       model_Z_W=reg(),
+                                       discrete_treatment=True,
+                                       discrete_instrument=False)}
+        for name, model in models.items():
+            with self.subTest(name=name):
+                est = model
+                if name == "xlearner":
+                    est_dowhy = est.dowhy.fit(Y, T, X=np.hstack((X, W)), W=None)
+                elif name == "dmlateiv":
+                    est_dowhy = est.dowhy.fit(Y, T, W=W, Z=Z)
+                else:
+                    est_dowhy = est.dowhy.fit(Y, T, X=X, W=W)
+                # test causal graph
+                est_dowhy.view_model()
+                # test refutation estimate
+                est_dowhy.refute_estimate(method_name="random_common_cause")
+                if name != "orf":
+                    est_dowhy.refute_estimate(method_name="add_unobserved_common_cause",
+                                              confounders_effect_on_treatment="binary_flip",
+                                              confounders_effect_on_outcome="linear",
+                                              effect_strength_on_treatment=0.1,
+                                              effect_strength_on_outcome=0.1,)
+                    est_dowhy.refute_estimate(method_name="placebo_treatment_refuter", placebo_type="permute",
+                                              num_simulations=3)
+                    est_dowhy.refute_estimate(method_name="data_subset_refuter", subset_fraction=0.8,
+                                              num_simulations=3)
--- a/econml/tests/test_shap.py
+++ b/econml/tests/test_shap.py
@ -39,7 +39,6 @@ class TestShap(unittest.TestCase):
                        ]
                    for est in est_list:
                        with self.subTest(est=est, featurizer=featurizer, d_y=d_y, d_t=d_t):
-                            fd_x = featurizer.fit_transform(X).shape[1] if featurizer is not None else d_x
                            est.fit(Y, T, X, W)
                            shap_values = est.shap_values(X[:10], feature_names=["a", "b", "c"],
                                                          background_samples=None)
@ -50,19 +49,13 @@ class TestShap(unittest.TestCase):
                                mean_cate = mean_cate.flatten()[0] if not np.isscalar(mean_cate) else mean_cate
                                self.assertAlmostEqual(shap_values["Y0"]["T0"].base_values[0], mean_cate, delta=1e-2)

-                            if isinstance(est, (CausalForestDML, DMLOrthoForest)):
-                                fd_x = d_x
-
                            # test shape of shap values output is as expected
                            self.assertEqual(len(shap_values["Y0"]), d_t)
                            self.assertEqual(len(shap_values), d_y)
                            # test shape of attribute of explanation object is as expected
-                            self.assertEqual(shap_values["Y0"]["T0"].values.shape, (10, fd_x))
-                            self.assertEqual(shap_values["Y0"]["T0"].data.shape, (10, fd_x))
+                            self.assertEqual(shap_values["Y0"]["T0"].values.shape[0], 10)
+                            self.assertEqual(shap_values["Y0"]["T0"].data.shape[0], 10)
                            self.assertEqual(shap_values["Y0"]["T0"].base_values.shape, (10,))
-                            ind = 6
-                            self.assertEqual(len(shap_values["Y0"]["T0"].feature_names), fd_x)
-                            self.assertEqual(len(shap_values["Y0"]["T0"][ind].feature_names), fd_x)

    def test_discrete_t(self):
        n = 100
@ -97,7 +90,6 @@ class TestShap(unittest.TestCase):
                                     ForestDRLearner()]
                    for est in est_list:
                        with self.subTest(est=est, featurizer=featurizer, d_y=d_y, d_t=d_t):
-                            fd_x = featurizer.fit_transform(X).shape[1] if featurizer is not None else d_x
                            if isinstance(est, (TLearner, SLearner, XLearner, DomainAdaptationLearner)):
                                est.fit(Y, T, X)
                            else:
@ -111,19 +103,13 @@ class TestShap(unittest.TestCase):
                                mean_cate = mean_cate.flatten()[0] if not np.isscalar(mean_cate) else mean_cate
                                self.assertAlmostEqual(shap_values["Y0"]["T0"].base_values[0], mean_cate, delta=1e-2)

-                            if isinstance(est, (TLearner, SLearner, XLearner, DomainAdaptationLearner, CausalForestDML,
-                                                ForestDRLearner, DROrthoForest)):
-                                fd_x = d_x
                            # test shape of shap values output is as expected
                            self.assertEqual(len(shap_values["Y0"]), d_t - 1)
                            self.assertEqual(len(shap_values), d_y)
                            # test shape of attribute of explanation object is as expected
-                            self.assertEqual(shap_values["Y0"]["T0"].values.shape, (10, fd_x))
-                            self.assertEqual(shap_values["Y0"]["T0"].data.shape, (10, fd_x))
+                            self.assertEqual(shap_values["Y0"]["T0"].values.shape[0], 10)
+                            self.assertEqual(shap_values["Y0"]["T0"].data.shape[0], 10)
                            self.assertEqual(shap_values["Y0"]["T0"].base_values.shape, (10,))
-                            ind = 6
-                            self.assertEqual(len(shap_values["Y0"]["T0"].feature_names), fd_x)
-                            self.assertEqual(len(shap_values["Y0"]["T0"][ind].feature_names), fd_x)

    def test_identical_output(self):
        # Treatment effect function
--- a/econml/utilities.py
+++ b/econml/utilities.py
@ -1266,6 +1266,34 @@ def transpose_dictionary(d):
    return output


+def reshape_arrays_2dim(length, *args):
+    """
+    Reshape the input arrays as two dimensional.
+    If None, will be reshaped as (n, 0).
+
+    Parameters
+    ----------
+    length: scalar
+        Number of samples
+    args: arrays
+        Inputs to be reshaped
+
+    Returns
+    -------
+    new_args: arrays
+        Output of reshaped arrays
+    """
+    new_args = []
+    for arg in args:
+        if arg is None:
+            new_args.append(np.array([]).reshape(length, 0))
+        elif arg.ndim == 1:
+            new_args.append(arg.reshape((-1, 1)))
+        else:
+            new_args.append(arg)
+    return new_args
+
+
 class _RegressionWrapper:
    """
    A simple wrapper that makes a binary classifier behave like a regressor.
--- a/notebooks/CustomerScenarios/Case
+++ b/notebooks/CustomerScenarios/Case
--- a/notebooks/CustomerScenarios/Case
+++ b/notebooks/CustomerScenarios/Case
--- a/notebooks/CustomerScenarios/Case
+++ b/notebooks/CustomerScenarios/Case
--- a/notebooks/CustomerScenarios/Case
+++ b/notebooks/CustomerScenarios/Case
--- a/setup.cfg
+++ b/setup.cfg
@ -53,6 +53,7 @@ install_requires =
    matplotlib
    pandas
    shap ~= 0.38.1
+    dowhy
 test_suite = econml.tests
 tests_require =
    pytest
@ -63,7 +64,6 @@ tests_require =
    nbformat
    seaborn
    lightgbm
-    dowhy
    xgboost

 [options.extras_require]