initial commit

2020-01-29 15:05:56 -08:00 · 2020-01-29 15:05:56 -08:00 · 0f189e729b
--- a/mct/BiasTester.py
+++ b/mct/BiasTester.py
@ -0,0 +1,194 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import json
+import logging
+import random
+
+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.preprocessing import StandardScaler
+
+import mct.Constants as Constants
+import mct.Utilities as Utils
+from mct.HypothesisTester import chi_square_bias_test
+
+
+class BiasTester(object):
+    """
+    Perform a bias check between the control and treatment dataframes.
+    """
+    __group_control = 'group_control'
+    __group_treatment = 'group_treatment'
+    __index_group = 'index_group'
+    __rf_propensity_scores = 'rf_propensity_scores'
+
+    def __init__(self, config: json):
+        self.config = config
+        self.__logger = logging.getLogger("mct")
+        return
+
+    def check_bias(self, control_df: pd.DataFrame, treatment_df: pd.DataFrame) -> \
+            (pd.DataFrame, pd.DataFrame, pd.DataFrame, bool):
+        """
+        # Compares the destruction of invariant features separately and flag any stat sig
+        # difference that satisfies the given minimum percentage deviation threshold
+        :param control_df: control dataframe
+        :param treatment_df: treatment dataframe
+        :return:
+        """
+
+        self.__logger.debug('Checking for Population Bias')
+
+        invariant_features = self.config[Constants.invariant_columns]
+        p_value_threshold = self.config[Constants.p_value_threshold]
+        percentage_deviation_threshold = self.config[Constants.resample_threshold]
+        small_bin_percent_threshold = self.config[Constants.small_bin_percent_threshold]
+
+        bias_results, deviation, is_biased = chi_square_bias_test(control_df[invariant_features],
+                                                                  treatment_df[invariant_features],
+                                                                  groups=[Constants.control_group,
+                                                                          Constants.treatment_group],
+                                                                  group_column_name=Constants.group_column_name,
+                                                                  other_threshold=small_bin_percent_threshold,
+                                                                  p_value=0.01)
+
+        bias_results[Constants.num_of_bins] = bias_results[Constants.degree_of_freedom] + 1
+
+        bias_results[Constants.resample] = 'no'
+        bias_results.loc[(bias_results[Constants.percentage_deviation] > percentage_deviation_threshold)
+                         & (bias_results[Constants.p_value_threshold] < p_value_threshold),
+                         Constants.resample] = 'yes'
+
+        # Sort and round Bias results
+        bias_results = bias_results.sort_values(by=Constants.percentage_deviation, ascending=False)
+        bias_results.sort_values(by=[Constants.percentage_deviation, Constants.feature], ascending=False, inplace=True)
+
+        is_biased = is_biased and (bias_results[Constants.resample] == 'yes').any()
+        self.__logger.info("Is Data biased: {0}".format(is_biased))
+
+        # Sort and round deviations.
+        deviation.sort_values(
+            by=[Constants.feature, Constants.bin_column],
+            ascending=False,
+            inplace=True)
+
+        return bias_results, deviation, is_biased
+
+    def normalize_bias(self, control: pd.DataFrame, treatment: pd.DataFrame, bias_results: pd.DataFrame,
+                       random_state=None) -> (pd.DataFrame, pd.DataFrame):
+        """
+        Normalize and correct for the major biases.
+
+        bias_results - needs to include columns to normalize, and dof
+        """
+        self.__logger.debug("Bias Normalization: started")
+
+        Utils.add_group_columns(control, treatment)
+
+        if self.config[Constants.normalization_type] != 'rf':
+            message = 'Currently only supported normalization type is random forest'
+            self.__logger.error(message)
+            raise Exception(message)
+
+        if not bias_results.empty:
+            resample_columns = bias_results[Constants.feature]
+            max_categories = bias_results[Constants.num_of_bins]
+
+            data_splits = [(self.__group_control, control), (self.__group_treatment, treatment)]
+
+            feature_transforms = [('categorical', x, y) for x, y in zip(resample_columns, max_categories)]
+
+            self.__logger.info('Using RF propensity scores with caliper based matching.')
+
+            # Get data after sampling.
+            df_metric = self.__sample_propensity(data_splits, feature_transforms, random_state=random_state)
+            df_control = df_metric[df_metric[Constants.group_column_name] == Constants.control_group]
+            df_treatment = df_metric[df_metric[Constants.group_column_name] == Constants.treatment_group]
+
+            return df_control, df_treatment
+        else:
+            self.__logger.info("Bias Normalization skipped.")
+        self.__logger.debug("Bias Normalization finished. ")
+
+    # Transform the input data
+    def __transform(self, input_frame, features):
+        train = pd.DataFrame(index=input_frame.index)
+        for func, feat, max_categories in features:
+            # Reduce cardinality of input_frame
+            dt = input_frame[feat].astype(str)
+            feat_counts = dt.value_counts()
+            if len(feat_counts) > max_categories:
+                dt[~dt.isin(feat_counts[:max_categories].index)] = Constants.other_feature_cluster_name
+            # OneHot encode the features
+            train = train.join(pd.get_dummies(dt, prefix=feat))
+
+        return train
+
+    def __rf_propensity(self, data, target, random_state=None):
+
+        scalar = StandardScaler()
+        data_transformed = scalar.fit_transform(data)
+
+        clf = RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=random_state, n_estimators=10)
+        clf.fit(data_transformed, target)
+        scores = clf.predict_proba(data_transformed)
+
+        return scores[:, 1]
+
+    def ___matching_1_1(self, df, random_state=None):
+        df_c = df[df[self.__index_group] == self.__group_control]
+        df_t = df[df[self.__index_group] == self.__group_treatment]
+        df_ps = pd.DataFrame(df_c[self.__rf_propensity_scores].value_counts()).join(
+            pd.DataFrame(df_t[self.__rf_propensity_scores].value_counts()),
+            on=None,
+            how='inner',
+            lsuffix='l',
+            rsuffix='r',
+            sort=False)
+        df_ps['num2use'] = df_ps[['rf_propensity_scoresl', 'rf_propensity_scoresr']].min(axis=1)
+        index_c = []
+        index_t = []
+
+        random.seed(a=random_state)
+        for i in df_ps.index:
+            kk = df_ps.loc[i]['num2use']
+            index_c += random.sample([ind for ind in df_c[df_c[self.__rf_propensity_scores] == i].index_original], k=kk)
+            index_t += random.sample([ind for ind in df_t[df_t[self.__rf_propensity_scores] == i].index_original], k=kk)
+
+        return index_c, index_t
+
+    def __matching_caliper(self, df, caliper_coeff=0.1, random_state=None):
+        caliper_width = caliper_coeff * df[self.__rf_propensity_scores].std()
+        df[self.__rf_propensity_scores] = (df[self.__rf_propensity_scores] / caliper_width).astype(int)
+        return self.___matching_1_1(df, random_state=random_state)
+
+    def __sample_propensity(self, splits, feats, caliper_coeff=0.1, match_type='caliper', random_state=None):
+        # concatenates the split dataframes, keeping the labels
+
+        df = pd.concat([i for _, i in splits], keys=[splits[0][0], splits[1][0]],
+                       names=[self.__index_group, 'index_original'])
+
+        # Note: resetting index, to prevent potential problems with having the same index values after the concat.
+        df.reset_index(inplace=True)
+
+        # Set up data frame for classification algorithm.
+        pred_frame = self.__transform(df, feats)
+
+        # Get propensity scores using RF algorithm.s
+        df[self.__rf_propensity_scores] = self.__rf_propensity(pred_frame, df[self.__index_group],
+                                                               random_state=random_state)
+
+        # Perform 1-1 matching based on the propensity scores.
+        if match_type == 'caliper':
+            ind_c, ind_t = self.__matching_caliper(df, caliper_coeff=caliper_coeff, random_state=random_state)
+        else:
+            ind_c, ind_t = self.___matching_1_1(df, random_state=random_state)
+
+        self.__logger.info("Resampled data size: {}, Percent of retained data: {}:"
+                           .format(len(ind_c) * 2, int(len(ind_c) * 2 / len(df) * 100)))
+        self.__logger.info("Percent retained in Control {}, Percent of retained in Treatment {}:"
+                           .format(int(len(ind_c) / len(df[df[self.__index_group] == self.__group_control]) * 100),
+                                   int(len(ind_c) / len(df[df[self.__index_group] == self.__group_treatment]) * 100)))
+
+        return pd.concat([splits[0][1].filter(ind_c, axis=0), splits[1][1].filter(ind_t, axis=0)])
--- a/mct/Constants.py
+++ b/mct/Constants.py
@ -0,0 +1,49 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+# Code Constants
+skip = 'skip'
+sorting_type_delta_count = 'delta_count'
+sorting_type_delta_percent = 'delta_percent'
+# Reserved column names
+group_column_name = "group"
+other_feature_cluster_name = 'other'
+control_group = 'control'
+treatment_group = 'treatment'
+# Computed columns
+feature = 'feature'
+resample = 'resample'
+expected_failures = '# of Expected Failures in Treatment'
+actual_failures = '# of Actual Failures in Treatment'
+num_of_bins = 'num bins'
+degree_of_freedom = 'dof'
+percentage_deviation = 'Percentage Deviation'
+mean_difference = 'Percent Difference'
+mean_control = 'Percent Control'
+control_percent = 'control_percent'
+treatment_percent = 'treatment_percent'
+mean_treatment = 'Percent Treatment'
+percent_delta = '% Delta'
+count_delta = 'Delta (Count)'
+hazard_score = 'Hazard Score'
+bin_column = 'bin'
+p_value = 'P-Value'
+is_stat_sig = 'Is Stat-Sig'
+# Config parameters
+small_bin_percent_threshold = 'small_bin_percent_threshold'
+p_value_threshold = 'p_value'
+sort_type = 'sort_type'
+normalization_type = 'normalization_type'
+metric_column = "metric_col"
+invariant_columns = 'invariant_columns'
+feature_columns = 'feature_columns'
+results_dir = "results_dir"
+add_is_null_column = 'add_null'
+resample_threshold = 'resample_threshold'
+decomposition_type = 'decomposition_type'
+required_config_keys = [metric_column, invariant_columns, feature_columns, resample_threshold, results_dir,
+                        p_value_threshold, decomposition_type, normalization_type, sort_type, add_is_null_column,
+                        'apply_feature_target_metric_dependence_test']
+
+log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+log_file_name = 'mct.log'
--- a/mct/FeatureBinGenerator.py
+++ b/mct/FeatureBinGenerator.py
@ -0,0 +1,177 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import logging
+
+import numpy as np
+import pandas as pd
+import scipy.stats as sp
+
+import mct.Constants as Constants
+from mct.Utilities import get_is_null_column_name
+
+
+class FeatureBinGenerator(object):
+    """
+    Class to bin numeric features
+    """
+
+    def __init__(self):
+        self.__logger = logging.getLogger("mct")
+        return
+
+    @staticmethod
+    def create_top_bins(data: pd.DataFrame, column_name: str, number_of_bins: int,
+                        minimum_size: int, other_bin_name: str, add_is_null_column: bool) -> pd.DataFrame:
+        """
+        Create maximum of number_of_bins bins by selecting the top number_of_bins frequent values
+        and combining the rest into other bin.
+
+        :param data: dataframe
+        :param column_name: column name to bin
+        :param number_of_bins:
+        :param minimum_size:
+        :param other_bin_name:
+        :param add_is_null_column:
+        :return: binned column
+        """
+
+        column = data[column_name].astype(str)
+        feats = column.value_counts(dropna=(not add_is_null_column)).to_frame('count').reset_index().sort_values(
+            ['count', 'index'], ascending=[False, True])
+        feats = feats[:number_of_bins]
+        feats = feats.loc[feats['count'] >= minimum_size, 'index']
+        column[~column.isin(feats)] = other_bin_name
+
+        return column
+
+    def create_percentile_bins(self, df: pd.DataFrame, num_cols: list, add_null: bool = False,
+                               num_bins: int = 4) -> pd.DataFrame:
+        """
+        Method to bin numerical features by their percentile.
+
+        Numerical Variables
+        * Bins data by percentile.
+        * Encodes the new variables with variable name, GTE (greater than or equal) to LTE syntax
+        * Add a dummy *_nan* variable for recording the fact that feature was null
+
+        Categorical Variables
+        * Returns a warning. Doesn't bin the feature.
+
+        :param df: input pandas dataframe
+        :param num_cols: a list of the names of the numerical columns in df to bin.
+        :param add_null: whether to add the *_nan* features to data (default False)
+        :param num_bins: the number of bins to break the data into the percent width of each bin is 100/num_bins.
+                         (default 4, is quartile break down, set low to avoid blow up.)
+
+        :returns: Transformed pandas dataframe
+        """
+        dummy_frames = []
+
+        num_cols = [col for col in num_cols if col in df.columns]
+
+        for col in num_cols:
+            # make sure numerical column for binning.
+            if df[col].dtype == np.object:
+                self.__logger.warning("Warning: Feature {0} is not numerical and wasn't binned.".format(col))
+                continue
+
+            # get percentiles
+            dt = df[col]
+            dt = ((dt.rank() - 1) * num_bins / len(dt.dropna())).apply(np.floor)
+
+            dt_agg = df.groupby(dt)[col].agg([np.min, np.max]).rename(columns={'amin': 'min', 'amax': 'max'})
+
+            for bin_num in dt.unique():
+                if np.isnan(bin_num):
+                    continue
+                if dt_agg.loc[bin_num]['min'] == dt_agg.loc[bin_num]['max']:
+                    dt.replace(bin_num, 'is_{}'.format(dt_agg.loc[bin_num]['min']), inplace=True)
+                else:
+                    dt.replace(bin_num, 'GTE_{}_LTE_{}'.format(dt_agg.loc[bin_num]['min'], dt_agg.loc[bin_num]['max']),
+                               inplace=True)
+
+            add_is_null = add_null and df[col].isnull().any()
+            dummy_frames.append(pd.get_dummies(dt, prefix=col, dummy_na=add_is_null))
+
+            df.drop(col, axis=1, inplace=True)
+
+        df = df.join(dummy_frames, sort=True)
+        return df
+
+    @staticmethod
+    def get_feature_dummies(df: pd.DataFrame, feature_cols, target_col, add_null=True, p_thresh=.01,
+                            min_data_points=500, max_categories=5, apply_feature_target_metric_dependence_test=True):
+        """
+        Method to transform a dataframe wrt to a target variable to be used with classification models.
+
+        Numerical Variables
+        * Null values are left in dataframe.
+        * Add a dummy *is_null* variable for recording the fact that feature was null
+
+        Categorical Variables
+        * One hot encode categorical variables
+        * To avoid blow up:
+            max_categories: Maximum number of categories a feature can have.
+                            The rest are collected into a category 'other'
+        * To maintain informativeness:
+            min_data_points:  The minimum number of points required to create a bin.
+            p_thresh:  A chi-squared test is run against target_col.
+                       Variable is kept if resulting p_value < p_thresh.  Otherwise dropped.
+
+        :param apply_feature_target_metric_dependence_test:
+        :param add_null:
+        :param target_col:
+        :param df: input pandas dataframe
+        :param feature_cols: A list of the feature names in the df to be transformed.
+        :param max_categories: Maximum number of categories.
+        :param min_data_points: Minimum number of point in categorical bin.
+        :param p_thresh: Critical value for chi-squared test.
+
+        :returns: Transformed pandas dataframe, a list of columns that were treated as numerical.
+
+        Future Work:  Algorithm that selects the candidate categorical binning based on information gain as
+        opposed to data size.
+        """
+
+        is_null_frame = {}
+        numerical_columns = []
+        dummy_frames = []
+
+        # Drop columns with only single value including null
+        for col in feature_cols:
+            if df[col].nunique(dropna=False) == 1:
+                df.drop(col, axis=1, inplace=True)
+                continue
+
+            # For numerical columns {int, float} create is_null.
+            if not (df[col].dtype == np.object or df[col].dtype == np.bool):
+                if df[col].isnull().any() and add_null:
+                    is_null_frame[get_is_null_column_name(col)] = np.isnan(df[col])
+
+                if df[col].nunique(dropna=True) == 1:
+                    df.drop(col, axis=1, inplace=True)
+                else:
+                    numerical_columns.append(col)
+            else:
+                # For categorical columns create feature dummies.
+                dt_col = FeatureBinGenerator.create_top_bins(df, col, max_categories, min_data_points,
+                                                             Constants.other_feature_cluster_name,
+                                                             add_null)
+
+                if apply_feature_target_metric_dependence_test:
+                    chi2, p_val, dof, expected = sp.chi2_contingency(pd.crosstab(dt_col, df[target_col]))
+                    if p_val > p_thresh:
+                        df.drop(col, axis=1, inplace=True)
+                        continue
+                # TODO: Refactor the code to create dummies when we need them to optimize the performance
+                dummy_frames.append(pd.get_dummies(dt_col, prefix=col))
+                df.drop(col, axis=1, inplace=True)
+
+        if add_null:
+            null_frame = pd.DataFrame(is_null_frame)
+            df = df.join(null_frame, sort=True)
+
+        df = df.join(dummy_frames, sort=True)
+
+        return df, numerical_columns
--- a/mct/FeatureRanker.py
+++ b/mct/FeatureRanker.py
@ -0,0 +1,258 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import logging
+
+import numpy as np
+import pandas as pd
+
+import mct.Constants as Constants
+from mct.FeatureBinGenerator import FeatureBinGenerator
+from mct.HypothesisTester import ci_proportion_bounds
+
+
+class FeatureRanker(object):
+    """
+    Feature ranking for metric delta.
+    """
+
+    def __init__(self, config):
+        self.__config = config
+        self.__bin_generator = FeatureBinGenerator()
+        self.__logger = logging.getLogger("mct")
+        return
+
+    def compute_ranks(self, df_metric: pd.DataFrame, df_metric_not_normalized: pd.DataFrame,
+                      numerical_cols: list) -> pd.DataFrame:
+        """
+        Feature ranking for metric delta.
+        """
+        config = self.__config
+        target_col = config[Constants.metric_column]
+        add_null = config[Constants.add_is_null_column]
+        sorting_type = config[Constants.sort_type]
+        # Compute featuring ranking
+        df_feature_ranking = self.__decompose_metric_univar(df_metric, df_metric_not_normalized, target_col,
+                                                            numerical_cols, add_null)
+
+        if sorting_type == Constants.sorting_type_delta_count:
+            sorted_feature = df_feature_ranking.sort_values(Constants.count_delta, ascending=False, inplace=False)
+        elif sorting_type == Constants.sorting_type_delta_percent:
+            sorted_feature = df_feature_ranking.sort_values(Constants.percent_delta, ascending=False, inplace=False)
+        else:
+            sorted_feature = df_feature_ranking.sort_values(Constants.hazard_score, ascending=False, inplace=False)
+
+        sorted_feature.reset_index(inplace=True, drop=True)
+
+        return sorted_feature
+
+    def __decompose_metric_univar(self, df_metric, df_metric_not_normalized, target_col, numerical_cols, add_null):
+        """
+        Computes the univariate feature ranking.
+        """
+        df_uni_var_un_norm, df_uni_var_norm, categorical_cols = self.__set_univar_frames(df_metric,
+                                                                                         df_metric_not_normalized,
+                                                                                         numerical_cols, target_col,
+                                                                                         add_null)
+
+        expected = []
+        actual = []
+        contribution = []
+        is_sig = []
+        feature_results = pd.DataFrame(categorical_cols, columns=[Constants.feature])
+
+        # categorical feature ranking
+        for col in categorical_cols:
+            sig, exp, act, con = self.__feat_cat_significant(col,
+                                                             df_uni_var_norm,
+                                                             df_uni_var_un_norm,
+                                                             target_col,
+                                                             Constants.group_column_name)
+            is_sig.append(sig)
+            contribution.append(con)
+            expected.append(exp)
+            actual.append(act)
+
+        feature_results[Constants.is_stat_sig] = is_sig
+        feature_results[Constants.count_delta] = contribution
+        feature_results[Constants.expected_failures] = expected
+        feature_results[Constants.actual_failures] = actual
+
+        # numerical feature ranking
+        for col in numerical_cols:
+            sig, binned_feats, is_bin_sig, exp, act, con = self.__feat_num_significant(col, df_uni_var_norm,
+                                                                                       df_uni_var_un_norm,
+                                                                                       target_col,
+                                                                                       Constants.group_column_name,
+                                                                                       add_null)
+            if sig:
+                feature_results = feature_results.append(
+                    pd.DataFrame(list(zip(*[binned_feats, is_bin_sig, exp, act, con])),
+                                 columns=[Constants.feature,
+                                          Constants.is_stat_sig,
+                                          Constants.expected_failures,
+                                          Constants.actual_failures,
+                                          Constants.count_delta]),
+                    sort=True)
+
+                # Set up columns for output feature ranking.
+        feature_results[Constants.percent_delta] = np.abs(
+            feature_results[Constants.count_delta] / feature_results[Constants.expected_failures] * 100)
+        n_fail_exp = \
+            df_metric_not_normalized.loc[
+                df_metric_not_normalized[Constants.group_column_name] == Constants.control_group][
+                target_col].sum() / len(
+                df_metric_not_normalized.loc[
+                    df_metric_not_normalized[Constants.group_column_name] == Constants.control_group]) * len(
+                df_metric_not_normalized.loc[
+                    df_metric_not_normalized[Constants.group_column_name] == Constants.treatment_group])
+        n_fail_act = df_metric_not_normalized.loc[
+            df_metric_not_normalized[Constants.group_column_name] == Constants.treatment_group][target_col].sum()
+        feature_results[Constants.hazard_score] = (feature_results[Constants.actual_failures] / n_fail_act -
+                                                   feature_results[Constants.expected_failures] / n_fail_exp) * 100
+
+        feature_results.reset_index(inplace=True, drop=True)
+
+        output_columns = [Constants.feature, Constants.hazard_score, Constants.expected_failures,
+                          Constants.actual_failures, Constants.count_delta, Constants.percent_delta]
+
+        stat_sig_features = feature_results[feature_results[Constants.is_stat_sig] == True][output_columns]
+        return stat_sig_features
+
+    def __set_univar_frames(self, df_metric, df_metric_not_normalized, numerical_cols, target_col, add_null):
+        """
+        Prepares data_frames for univariate feature ranking. One before and one after bias normalization.
+        Sets up categorical and numerical features.
+        """
+
+        config = self.__config
+        invar_target_cols = config[Constants.invariant_columns] + [target_col]
+
+        df_invar_target = df_metric_not_normalized[invar_target_cols]
+        frame_invar_target, num_cols = self.__bin_generator.get_feature_dummies(
+            df_invar_target.copy(),
+            config[Constants.invariant_columns],
+            target_col,
+            min_data_points=1,
+            max_categories=10,
+            p_thresh=0.25,
+            add_null=add_null,
+            apply_feature_target_metric_dependence_test=self.__config['apply_feature_target_metric_dependence_test'])
+
+        # normalized and non-normalized data_frames for feature ranking
+        df_uni_var_un_norm = df_metric_not_normalized[
+            config[Constants.feature_columns] + [Constants.group_column_name]].merge(frame_invar_target,
+                                                                                     left_index=True,
+                                                                                     right_index=True)
+        df_uni_var_norm = df_metric[config[Constants.feature_columns] + [Constants.group_column_name]].merge(
+            frame_invar_target, left_index=True, right_index=True)
+
+        feature_cols = list(df_uni_var_un_norm.columns)
+        feature_cols.remove(Constants.group_column_name)
+        feature_cols.remove(target_col)
+        categorical_cols = [col for col in feature_cols if col not in numerical_cols]
+
+        return df_uni_var_un_norm, df_uni_var_norm, categorical_cols
+
+    def __feat_cat_significant(self, col, df_uni_var_norm, df_uni_var_un_norm, target_col, group_col):
+        """
+        Determines if categorical col is significant on the normalized dataset.  If it is it computes the impact
+        on the non-normalized data set and returns the contribution.
+        """
+        num_c, len_c, num_t, len_t = self.__feat_info_cat(col, df_uni_var_norm, target_col, group_col)
+
+        sig = self.__sig_check(num_c, len_c, num_t, len_t)
+        if not sig:
+            return False, 0, 0, 0
+
+        # If number of drops is significant return the number of impacted calls on the original data set.
+        num_c, len_c, num_t, len_t = self.__feat_info_cat(col, df_uni_var_un_norm, target_col, group_col)
+
+        return True, num_c * len_t / len_c, num_t, num_t - num_c * len_t / len_c
+
+    def __feat_info_cat(self, col, df_uni_var, target_col, group_col):
+        """
+        sets up the appropriate dataframe and returns the number of failure associated with the given feature
+        on the treatment and control datasets
+        """
+
+        return self.__feat_info(df_uni_var[[target_col, group_col, col]], col, target_col, group_col)
+
+    @staticmethod
+    def __feat_info(df_col, col, target_col, group_col):
+        """
+        computes the number of failure associated with the given feature on the treatment and control datasets
+        """
+        df = df_col.copy()
+
+        df['targ_and_col'] = df[target_col] & df[col]
+        col_info = df.groupby(group_col)['targ_and_col'].agg({'size', 'sum'})
+
+        num_c = col_info.loc[Constants.control_group, 'sum']
+        len_c = col_info.loc[Constants.control_group, 'size']
+        num_t = col_info.loc[Constants.treatment_group, 'sum']
+        len_t = col_info.loc[Constants.treatment_group, 'size']
+
+        return num_c, len_c, num_t, len_t
+
+    @staticmethod
+    def __sig_check(num_c, len_c, num_t, len_t):
+        """
+        checks if the change in failures is significant between treatment and control datasets
+        """
+        low_c, up_c = ci_proportion_bounds(num_c / len_c, len_c)
+        low_t, up_t = ci_proportion_bounds(num_t / len_t, len_t)
+
+        if (low_c <= low_t) and (up_c >= low_t):
+            return False
+        if (low_t <= low_c) and (up_t >= low_c):
+            return False
+
+        return True
+
+    def __feat_num_significant(self, col, df_uni_var_norm, df_uni_var_un_norm, target_col, group_col, add_null):
+        """
+        Determines if the binning for a numerical col is significant on the normalized dataset.
+        If it is it computes the impact on the non-normalized data set by creating a new binning,
+        and returning significance and contributions.
+        """
+        df_target_col = df_uni_var_norm[[target_col, group_col, col]]
+        df_bin_col = self.__bin_generator.create_percentile_bins(df_target_col.copy(), [col], num_bins=10,
+                                                                 add_null=add_null)
+        binned_feats = [feat for feat in df_bin_col.columns if col in feat]
+
+        sig = False
+
+        for feat in binned_feats:
+            num_c, len_c, num_t, len_t = self.__feat_info(df_bin_col[[target_col, group_col, feat]],
+                                                          feat,
+                                                          target_col,
+                                                          group_col)
+            sig = self.__sig_check(num_c, len_c, num_t, len_t)
+            if sig:
+                break
+
+        # if none of the binned features are significant return False, 0 impact
+        if not sig:
+            return False, 0, 0, 0, 0, 0
+
+        # contribution on the non-normalized data set
+        df_target_col = df_uni_var_un_norm[[target_col, group_col, col]]
+        df_bin_col = self.__bin_generator.create_percentile_bins(df_target_col.copy(), [col], num_bins=10,
+                                                                 add_null=add_null)
+        binned_feats = [feat for feat in df_bin_col.columns if col in feat]
+
+        expected = []
+        actual = []
+        contribution = []
+        is_sig = []
+
+        for feat in binned_feats:
+            num_c, len_c, num_t, len_t = self.__feat_info(df_bin_col[[target_col, group_col, feat]], feat, target_col,
+                                                          group_col)
+            contribution.append(num_t - num_c * len_t / len_c)
+            actual.append(num_t)
+            expected.append(num_c * len_t / len_c)
+            is_sig.append(self.__sig_check(num_c, len_c, num_t, len_t))
+
+        return True, binned_feats, is_sig, expected, actual, contribution
--- a/mct/HypothesisTester.py
+++ b/mct/HypothesisTester.py
@ -0,0 +1,230 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import scipy.stats as sp
+import statsmodels.stats.api as sms
+
+import mct.Constants as Constants
+
+
+# t-test
+def get_t_test_result(a, b):
+    # Run t-test on control and treatment.
+    (mean_diff, control_diff, stat, p_value) = __two_sample_t_test(a, b)
+    (lower, upper) = __t_test_conf_interval(a, b)
+    mean_rel = mean_diff / control_diff
+    lower = lower
+    upper = upper
+    return mean_diff, mean_rel, lower, upper, stat, p_value
+
+
+def __two_sample_t_test(a, b):
+    # Run t-test on control and treatment.
+    (stat, p_value) = sp.ttest_ind(a, b, equal_var=False)
+    control_mean = np.mean(a)
+    mean_diff = control_mean - np.mean(b)
+    return mean_diff, control_mean, stat, p_value
+
+
+def __t_test_conf_interval(a, b):
+    cm = sms.CompareMeans(sms.DescrStatsW(a), sms.DescrStatsW(b))
+    return cm.tconfint_diff(usevar='pooled')
+
+
+# bernoulli test - Test of Proportions
+def chi_squared_results(a, b):
+    # Run Chi-Squared Test on control and treatment.
+    mean_control = np.mean(a)
+    mean_treatment = np.mean(b)
+    mean_diff = mean_treatment - mean_control
+
+    df = pd.concat([
+        pd.DataFrame(data={'metric': a, 'label': Constants.control_group}),
+        pd.DataFrame(data={'metric': b, 'label': Constants.treatment_group})
+    ])
+
+    cont_table = pd.crosstab(df['label'], df['metric'])
+    chi2, p_val, dof, expected = sp.chi2_contingency(cont_table)
+
+    return mean_diff, mean_control, mean_treatment, chi2, p_val
+
+
+def ci_proportion_bounds(p, n):
+    err = __ci_proportion(p, n)
+    return (p - err), (p + err)
+
+
+def __se_proportion(p, n):
+    return np.sqrt((p * (1 - p)) / n)
+
+
+def __ci_proportion(p, n):
+    return 1.96 * __se_proportion(p, n)
+
+
+"""
+Created on Fri Apr  8 09:48:53 2016
+
+Python utility to perform if data is biased between two groups.
+
+NOTE: This only performs a bias check for categorical values.
+It does not perform a bias check for numeric variables.
+
+See examples in https://onlinecourses.science.psu.edu/stat414/node/311
+
+@author: jayagup
+"""
+
+
+def chi_square_bias_test(control, treatment, groups, group_column_name, other_threshold, p_value):
+    """
+    Compute the chi square test of homogeneity between two
+    groups.
+
+    See details in https://onlinecourses.science.psu.edu/stat414/node/311
+
+    :input data_1: The first data frame.
+    :input data_2: The second data frame.
+    :input groups: The name of the groups.
+    :input p_value: The p-value with which to evaluate univariate table.
+
+    :returns is_biased: True if biased else False
+    :returns table_biased: A table of results with differences..
+    """
+
+    contingency_table_c = __get_contingency_table(control)
+    contingency_table_t = __get_contingency_table(treatment)
+
+    # Add group information to the dataframes.
+    contingency_table_c[group_column_name] = groups[0]
+    contingency_table_t[group_column_name] = groups[1]
+
+    # Create a single contingency table including both groups.
+    contingency_table = contingency_table_c.append(contingency_table_t)
+
+    # Run the bias check.
+    return chi_square_bias_test_contingency(contingency_table, other_threshold, p_value)
+
+
+def chi_square_bias_test_contingency(df_cont_table, other_threshold, p_value):
+    """
+    Chi square test of homogeneity over all features.
+    """
+    # Perform the bias check for all features, one feature at a time.
+    bias_results = []
+    deviation = pd.DataFrame()
+    for feature in df_cont_table[Constants.feature].unique():
+        chi2, p_val, dof, perc_dev, feature_deviation = bias_check_covariate(
+            df_cont_table, feature, other_threshold)
+        feature_deviation[Constants.feature] = feature
+        deviation = deviation.append(feature_deviation)
+
+        bias_results.append({
+            Constants.feature: feature,
+            "chi_square": chi2,
+            "p_value": p_val,
+            Constants.degree_of_freedom: dof,
+            Constants.percentage_deviation: perc_dev})
+
+    df_bias_results = pd.DataFrame(bias_results)
+
+    # Test whether each feature meets the p-value criterion.
+    p_value_check = (np.sum(df_bias_results.p_value < p_value) > 0)
+
+    return df_bias_results, deviation, p_value_check
+
+
+def bias_check_covariate(df_cont_table, feature, other_threshold=1.0):
+    """
+    Chi square test of homogeneity for single feature.
+
+    :input df_cont_table: Counts for the feature.
+    :input feature: The name of the feature.
+    :return outcome of the chi square bias check.
+    """
+    # Filter the feature.
+    df_cont_feature = df_cont_table[df_cont_table.feature == feature]
+
+    # Pivot the counts to create R X C format.
+    df_cont_pivot = pd.pivot_table(
+        df_cont_feature,
+        values='count',
+        columns=Constants.group_column_name,
+        index=Constants.bin_column)
+
+    df_cont_pivot, grps = __combine_small_bins(df_cont_pivot, other_threshold)
+
+    # Feed the contingency table to chi square test.
+    chi2, p_val, dof, expected = sp.chi2_contingency(df_cont_pivot)
+
+    # Compute the probability deviation from expected.
+    diff_percent = np.abs(expected - df_cont_pivot) / sum(expected) * 100.0
+
+    # Compute percentage for each bin.
+    grp_percent = list(map(lambda x: x + "_percent", grps))  # control_percent, treatment_percent
+    diff_percent[grp_percent] = df_cont_pivot[grps] / df_cont_pivot[grps].sum() * 100.0
+    diff_percent.reset_index(inplace=True)
+
+    # Sum of differences of expected probability and observed probability
+    # Note that the sum should be the same for the two columns.
+    perc_dev = np.max(diff_percent[grps.tolist()].max())
+
+    return chi2, p_val, dof, perc_dev, diff_percent
+
+
+def __combine_small_bins(df_cont_pivot, other_threshold):
+    """
+    Combine bins that are too small in both control and treatment group into Constants.other_feature_cluster_name
+
+    :input df_cont_pivot: contingency pivot table.
+    """
+    # if there are bins which are too small in both groups
+    # then set them to the other group.
+    grps = df_cont_pivot.columns
+    df_cont_pivot.reset_index(inplace=True)
+    df_cont_pivot.fillna(0, inplace=True)
+    other_grp_1 = df_cont_pivot[grps[0]] / df_cont_pivot[grps[0]].sum() * 100.0 < other_threshold
+    other_grp_2 = df_cont_pivot[grps[1]] / df_cont_pivot[grps[1]].sum() * 100.0 < other_threshold
+    other_grp = other_grp_1 & other_grp_2
+    df_cont_pivot.loc[other_grp, Constants.bin_column] = Constants.other_feature_cluster_name
+
+    # Combine all the others by grouping by bin again
+    df_cont_pivot = df_cont_pivot.groupby(Constants.bin_column).sum()
+    df_cont_pivot.fillna(0, inplace=True)
+
+    return df_cont_pivot, grps
+
+
+def __get_contingency_table(data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Create a contingency table for the dataframe.
+
+        :input df: The dataframe.
+        :returns The contingency table data frame with [Constants.bin_column ,'count', Constants.feature] columns
+    """
+    contingency_data_frame = pd.DataFrame()
+
+    for c in data.columns:
+        contingency_data_frame = contingency_data_frame.append(_get_feature_values_distribution(data[c]))
+
+    return contingency_data_frame
+
+
+def _get_feature_values_distribution(feature):
+    """
+        Get the count for each feature value.
+    """
+
+    # TODO: this would perform poorly when we have a numerical feature.
+    distribution = pd.DataFrame(feature
+                                .groupby(feature.values)
+                                .agg('count')
+                                .reset_index()
+                                .rename(columns={"index": Constants.bin_column, feature.name: 'count'})
+                                )
+
+    distribution[Constants.feature] = feature.name
+
+    return distribution
--- a/mct/MCT.py
+++ b/mct/MCT.py
@ -0,0 +1,59 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import json
+import logging
+import os
+
+from pandas import DataFrame
+
+import mct.Constants as Constants
+from mct.BiasTester import BiasTester
+from mct.FeatureRanker import FeatureRanker
+from mct.MetricComparer import MetricComparer
+from mct.PreProcessor import PreProcessor
+from mct.Visualizer import Visualizer
+
+
+class MCT(object):
+    def __init__(self, config: json):
+        self.__config = config
+        log_file = os.path.join(config[Constants.results_dir], Constants.log_file_name)
+        logger = logging.getLogger("mct")
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setFormatter(logging.Formatter(Constants.log_format))
+        file_handler.setLevel(logging.INFO)
+        logger.addHandler(file_handler)
+
+    def process(self, control: DataFrame, treatment: DataFrame, random_state=None):
+        preprocessor = PreProcessor(self.__config)
+        visualizer = Visualizer(self.__config)
+
+        df_metric, numerical_cols = preprocessor.pre_process_data(control, treatment)
+        df_metric_not_norm = df_metric.copy()
+
+        # Compare Control vs Treatment
+        delta_comparer = MetricComparer(self.__config)
+        control = df_metric[df_metric[Constants.group_column_name] == Constants.control_group]
+        treatment = df_metric[df_metric[Constants.group_column_name] == Constants.treatment_group]
+        metric_delta = delta_comparer.compare(control, treatment)
+
+        # Bias checker
+        bias_tester = BiasTester(self.__config)
+        visualizer.create_metric_delta_report(metric_delta, "initial_metric_comparison.html")
+
+        bias_results, deviation, is_biased = bias_tester.check_bias(control, treatment)
+        visualizer.create_bias_result_report(bias_results, deviation)
+
+        if is_biased and (self.__config[Constants.normalization_type] != Constants.skip):
+            n_control, n_treatment = bias_tester.normalize_bias(control, treatment, bias_results, random_state)
+
+            df_metric = n_control.append(n_treatment)
+
+            b_metric_delta = delta_comparer.compare(n_control, n_treatment)
+            visualizer.create_metric_delta_report(b_metric_delta, "normalized_metric_comparison.html")
+            n_bias_results, n_deviation, n_is_biased = bias_tester.check_bias(n_control, n_treatment)
+
+        feature_ranker = FeatureRanker(self.__config)
+        feature_rank = feature_ranker.compute_ranks(df_metric, df_metric_not_norm, numerical_cols)
+        visualizer.create_feature_rank_report(feature_rank)
--- a/mct/MetricComparer.py
+++ b/mct/MetricComparer.py
@ -0,0 +1,46 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import logging
+
+import pandas as pd
+
+import mct.Constants as Constants
+from mct.HypothesisTester import chi_squared_results
+
+
+class MetricComparer(object):
+    """
+    Class to compare a metric on two datasets
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.__logger = logging.getLogger("mct")
+        return
+
+    def compare(self, control: pd.DataFrame, treatment: pd.DataFrame) -> pd.DataFrame:
+        """
+        :param control: control dataframe
+        :param treatment: treatment dataframe
+        :return: dataframe [Constants.mean_difference,
+                            Constants.mean_control,
+                            Constants.mean_treatment,
+                            Constants.p_value,
+                            Constants.is_stat_sig]
+        """
+        control_metric = control[self.config[Constants.metric_column]]
+        treatment_metric = treatment[self.config[Constants.metric_column]]
+
+        mean_diff, mean_control, mean_treatment, chi2, p_val = chi_squared_results(control_metric, treatment_metric)
+
+        metric_delta = pd.DataFrame(
+            [{
+                Constants.mean_difference: mean_diff,
+                Constants.mean_control: mean_control,
+                Constants.mean_treatment: mean_treatment,
+                Constants.p_value: p_val,
+                Constants.is_stat_sig: (p_val < self.config[Constants.p_value_threshold])
+            }])
+
+        return metric_delta
--- a/mct/PreProcessor.py
+++ b/mct/PreProcessor.py
@ -0,0 +1,216 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import json
+import logging
+import os
+
+import numpy as np
+import pandas as pd
+
+import mct.Constants as Constants
+import mct.Utilities as Utils
+from mct.FeatureBinGenerator import FeatureBinGenerator
+
+
+class PreProcessor(object):
+
+    def __init__(self, config: json):
+        self.__config = config
+        self.__logger = logging.getLogger("mct")
+        return
+
+    def pre_process_data(self, control_df: pd.DataFrame, treatment_df: pd.DataFrame) -> \
+            (pd.DataFrame, list):
+        """
+        # Validating data against the config input.
+        # Adding is_null column based on config
+        # OneHot-encoding the Categorical features
+        # Dropping non-informative features:
+        #   - Categorical: based on Chi^2 test
+        #   - Numerical: when the feature has only single non-null value
+        """
+
+        self.__validate_config()
+        self.__validate_column_types(control_df, treatment_df)
+
+        # Partition columns into target, invariant and variant features
+        df = Utils.merge_control_treatment(control_df, treatment_df)
+        df_feature_target, df_invariant_columns, df_metric_columns, feature_columns = self.__partition_columns(df)
+        self.__validate_data_set(df)
+
+        # Encode Categorical features - remove ones with 0 variation, or with no impact to the metric.
+        # Keep track of numerical columns (possibly containing NULL values)
+        df_feature_target_binned, num_cols = FeatureBinGenerator.get_feature_dummies(
+            df_feature_target,
+            feature_columns,
+            self.__config[Constants.metric_column],
+            add_null=self.__config[Constants.add_is_null_column],
+            p_thresh=0.25,
+            min_data_points=1,
+            max_categories=10,
+            apply_feature_target_metric_dependence_test=self.__config['apply_feature_target_metric_dependence_test'])
+
+        # Drop target metric column
+        df_metric = self.__merge_columns(df_feature_target_binned, df_invariant_columns, df_metric_columns)
+
+        return df_metric, num_cols
+
+    def __merge_columns(self, df_feature_target_binned, df_invariant_columns, df_metric_columns):
+        metric_column = self.__config[Constants.metric_column]
+        df_feature_target_binned.drop(metric_column, axis=1, inplace=True)
+        feature_columns = list(df_feature_target_binned.columns)
+        if not feature_columns:
+            raise Exception("There is no feature left, that meets the threshold criteria")
+        self.__config[Constants.feature_columns] = feature_columns
+        # Join the feature, invariant and target data_frames.
+        df_metric = df_feature_target_binned.merge(df_invariant_columns, copy=False, left_index=True, right_index=True)
+        df_metric = df_metric.merge(df_metric_columns, left_index=True, right_index=True)
+        df_metric.reset_index(drop=True, inplace=True)
+        return df_metric
+
+    def __partition_columns(self, df):
+        # Set the metric columns: contains the metric column and Constants.group_column_name column
+        metric_column = self.__config[Constants.metric_column]
+        df_metric_columns = df[[Constants.group_column_name, metric_column]]
+        # Set invariant columns.
+        invariant_columns = self.__get_available_features(df, self.__config[Constants.invariant_columns])
+        df[invariant_columns] = df[invariant_columns].astype('object')
+        df[invariant_columns] = df[invariant_columns].fillna('NULL')
+        df_invariant = df[invariant_columns]
+        # Set feature columns.
+        feature_columns = self.__get_available_features(df, self.__config[Constants.feature_columns])
+        df_feature_columns = df[feature_columns]
+        # Merge features and metric column.
+        df_feature_target = df_feature_columns.merge(
+            pd.DataFrame(df_metric_columns[metric_column]),
+            left_index=True,
+            right_index=True)
+        return df_feature_target, df_invariant, df_metric_columns, feature_columns
+
+    def __validate_config(self):
+        config = self.__config
+        successful = True
+
+        # Check access to the output folder
+        output_folder = config[Constants.results_dir]
+        if not os.access(output_folder, os.W_OK):
+            successful = False
+            self.__logger.error('There is no write access to the output folder: {0}'.format(output_folder))
+
+        # Make sure all config parameters exist
+        missing_keys = [key for key in Constants.required_config_keys if key not in config.keys()]
+        if missing_keys:
+            missing = ','.join(str(x) for x in missing_keys)
+            successful = False
+            self.__logger.error('Following config parameters are missing: {0}'.format(missing))
+
+        # Make sure there is no intersection of metric_col, invariant_columns and feature_columns
+        # and deduplicate if there have common features. Giving priorities in the following order:
+        # 1) metric_col
+        # 2) invariant_columns
+        # 3) feature_columns
+        config[Constants.feature_columns] = list(set(config[Constants.feature_columns]))
+        config[Constants.invariant_columns] = list(set(config[Constants.invariant_columns]))
+
+        if config[Constants.metric_column] in config[Constants.invariant_columns]:
+            config[Constants.invariant_columns].remove(config[Constants.metric_column])
+            self.__logger.warning(
+                'Metric column {0} cannot be part of invariant columns.'.format(config[Constants.metric_column]))
+
+        if config[Constants.metric_column] in config[Constants.feature_columns]:
+            config[Constants.feature_columns].remove(config[Constants.metric_column])
+            self.__logger.warning(
+                'Metric column {0} cannot be part of feature columns.'.format(config[Constants.metric_column]))
+
+        intersection = set(config[Constants.feature_columns]).intersection(config[Constants.invariant_columns])
+        if len(intersection) > 0:
+            config[Constants.feature_columns] = [feat for feat in config[Constants.feature_columns] if
+                                                 feat not in intersection]
+            common = ','.join(str(x) for x in intersection)
+            self.__logger.warning('Features {0} are set as invariant and cannot be part of a features.'.format(common))
+
+        if not successful:
+            raise Exception('The config-file validation has failed!')
+        return
+
+    def __validate_data_set(self, data: pd.DataFrame):
+        # Check:
+        # 1) There are no duplicate columns
+        # 2) No reserved prefix/suffix is not used in column name
+        # 3) No reserved values is used; e.g 'others'
+
+        successful = True
+
+        feature_columns_set = set(data.columns)
+
+        if len(feature_columns_set) != len(data.columns):
+            successful = False
+            self.__logger.error('Dataset has duplicate features.')
+
+        if self.__config[Constants.add_is_null_column]:
+            for column in feature_columns_set:
+                is_null_name = Utils.get_is_null_column_name(column)
+                if is_null_name in feature_columns_set:
+                    successful = False
+                    self.__logger.error('{0} suffix is reserved for a computed is_null column for feature {1} '.format(
+                        Utils.get_is_null_column_name(''), column))
+
+        for feature in self.__config[Constants.feature_columns]:
+            if Constants.other_feature_cluster_name in data[feature]:
+                successful = False
+                self.__logger.error('Value {0} is a reserved name and it appears as a value in feature {1}'.format(
+                    Constants.other_feature_cluster_name, feature))
+
+        if not successful:
+            raise Exception('The data-set validation has failed!')
+        return
+
+    def __validate_column_types(self, control: pd.DataFrame, treatment: pd.DataFrame):
+        # 1) Validate reserved column names are not used in the control/treatment data
+        # 2) Validate that the target metric is either 0/1 or True/False.
+        # 3) There is no column of type DateTime or TimeDelta
+
+        successful = True
+        reserved_column_names: set = {Constants.group_column_name}
+        feature_columns_list = set(control.columns).union(set(treatment.columns))
+        reserved_in_use: set = reserved_column_names.intersection(set(feature_columns_list))
+
+        if len(reserved_in_use) > 0:
+            successful = False
+            self.__logger.error('Dataset has features called {0}. These are reserved keywords.'.format(
+                ','.join(list(reserved_in_use))))
+
+        metric_values_c = control[self.__config[Constants.metric_column]].unique().astype(int)
+        metric_values_t = treatment[self.__config[Constants.metric_column]].unique().astype(int)
+        if (len([value for value in metric_values_c if value not in [1, 0]]) > 0) or (
+                len([value for value in metric_values_t if value not in [1, 0]]) > 0):
+            successful = False
+            self.__logger.error('We currently only support binary target metric.')
+
+        features = set(self.__config[Constants.feature_columns]).union(set(self.__config[Constants.invariant_columns]))
+        for feature in features:
+            if (control[feature].dtype == np.datetime64) or (treatment[feature].dtype == np.datetime64) or \
+                    (control[feature].dtype == np.timedelta64) or (treatment[feature].dtype == np.timedelta64):
+                if feature in self.__config[Constants.feature_columns]:
+                    self.__config[Constants.feature_columns].remove(feature)
+                if feature in self.__config[Constants.invariant_columns]:
+                    self.__config[Constants.invariant_columns].remove(feature)
+                message = 'Date/Time features are not supported. We have removed column {0} in our the analysis'.format(
+                    feature)
+                self.__logger.warning(message)
+
+        if not successful:
+            raise Exception('The column-type validation has failed!')
+        return
+
+    def __get_available_features(self, df: pd.DataFrame, feature_set: list) -> list:
+        df_cols = set(df.columns)
+        feature_cols = set()
+        for feature in feature_set:
+            if feature in df_cols:
+                feature_cols.add(feature)
+            else:
+                self.__logger.warning('Feature {0} is missing in the data-set.'.format(feature))
+
+        return list(feature_cols)
--- a/mct/Utilities.py
+++ b/mct/Utilities.py
@ -0,0 +1,28 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import pandas as pd
+
+import mct.Constants as Constants
+
+
+def get_is_null_column_name(col):
+    return col + "_is_null"
+
+
+def add_group_columns(control: pd.DataFrame, treatment: pd.DataFrame,
+                      group_column_name: str = Constants.group_column_name, force: bool = False):
+    if force or (group_column_name not in control.columns):
+        control[group_column_name] = Constants.control_group
+    if force or (group_column_name not in treatment.columns):
+        treatment[group_column_name] = Constants.treatment_group
+    return
+
+
+def merge_control_treatment(control: pd.DataFrame,
+                            treatment: pd.DataFrame,
+                            group_column_name: str = Constants.group_column_name) -> pd.DataFrame:
+    add_group_columns(control, treatment, group_column_name=group_column_name, force=True)
+    df = control.append(treatment)
+    df.reset_index(drop=True, inplace=True)
+    return df
--- a/mct/Visualizer.py
+++ b/mct/Visualizer.py
@ -0,0 +1,78 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import json
+import logging
+import os
+
+import numpy as np
+import pandas as pd
+
+import mct.Constants as Constants
+
+
+# Class to create a visualization of the result of the comparison
+
+
+class Visualizer(object):
+    """
+    Class to create a visualization/report of the result of the comparison
+    """
+
+    def __init__(self, config: json):
+        self.config = config
+        self.__logger = logging.getLogger("mct")
+        return
+
+    def create_metric_delta_report(self, metric_delta: pd.DataFrame, result_file: str):
+        # Output metric_delta as HTML.
+        metric_delta.sort_values(
+            by=[Constants.mean_difference, Constants.mean_control, Constants.mean_treatment],
+            inplace=True)
+        metric_delta[Constants.mean_difference] = np.round(metric_delta[Constants.mean_difference] * 100, 2)
+        metric_delta[Constants.mean_control] = np.round(metric_delta[Constants.mean_control] * 100, 2)
+        metric_delta[Constants.mean_treatment] = np.round(metric_delta[Constants.mean_treatment] * 100, 2)
+        metric_delta[Constants.p_value] = np.round(metric_delta[Constants.p_value], 4)
+        # Output metric_delta as HTML.
+        result_file = os.path.join(self.config[Constants.results_dir], result_file)
+        metric_delta[
+            [Constants.mean_control, Constants.mean_treatment, Constants.mean_difference, Constants.p_value,
+             Constants.is_stat_sig]].to_html(
+            result_file, index=False, justify='center', index_names=False)
+
+    def create_bias_result_report(self, bias_results: pd.DataFrame, deviation: pd.DataFrame):
+        results_dir = self.config[Constants.results_dir]
+
+        bias_results[Constants.p_value_threshold] = np.round(bias_results[Constants.p_value_threshold], 4)
+        bias_results[Constants.percentage_deviation] = np.round(bias_results[Constants.percentage_deviation], 2)
+
+        # Sort and round Bias results
+        bias_results = bias_results.sort_values(by=Constants.percentage_deviation, ascending=False)
+        bias_results[Constants.percentage_deviation] = np.round(bias_results[Constants.percentage_deviation], 2)
+        bias_results.sort_values(by=[Constants.percentage_deviation, Constants.feature], ascending=False, inplace=True)
+
+        bias_file = os.path.join(results_dir, "bias_results.html")
+        bias_result_columns = [Constants.feature, Constants.num_of_bins, Constants.p_value_threshold,
+                               Constants.percentage_deviation,
+                               Constants.resample]
+        bias_results[bias_result_columns].to_html(bias_file, index=False, justify='center', index_names=False)
+
+        # Sort and round deviations
+        deviation.sort_values(by=[Constants.control_group, Constants.feature, Constants.bin_column], ascending=False,
+                              inplace=True)
+        deviation_file = os.path.join(results_dir, "bias_deviations.html")
+        deviation_result_columns = [Constants.feature, Constants.bin_column, Constants.control_percent,
+                                    Constants.treatment_percent]
+        deviation[Constants.control_percent] = np.round(deviation[Constants.control_percent], 2)
+        deviation[Constants.treatment_percent] = np.round(deviation[Constants.treatment_percent], 2)
+        deviation[deviation_result_columns].to_html(deviation_file, index=False, justify='center', index_names=False)
+
+    def create_feature_rank_report(self, ranked_feature: pd.DataFrame):
+        feature_ranking_file_csv = os.path.join(self.config[Constants.results_dir], "feature_ranking.csv")
+        sorted_feature = ranked_feature.sort_values(
+            by=[Constants.hazard_score, Constants.percent_delta, Constants.count_delta, Constants.feature,
+                Constants.expected_failures],
+            ascending=False, inplace=False)
+
+        sorted_feature.reset_index(inplace=True, drop=True)
+        sorted_feature.to_csv(feature_ranking_file_csv)
--- a/mct/init.py
+++ b/mct/init.py
@ -0,0 +1,2 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,9 @@
+numpy==1.16.3
+scipy==1.2.1
+scikit-learn==0.21.1
+pandas==0.25.3
+IPython==7.5.0
+matplotlib==3.0.3
+statsmodels==0.9.0
+jinja2==2.10.1
+patsy==0.5.1
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,2 @@
+[wheel]
+universal = 1
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,46 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+from setuptools import find_packages
+from setuptools import setup
+
+with open('README.rst') as readme_file:
+    readme = readme_file.read()
+
+with open('HISTORY.rst') as history_file:
+    history = history_file.read().replace('.. :changelog:', '')
+
+requirements = [
+    # TODO: put package requirements here
+]
+
+test_requirements = [
+    # TODO: put package test requirements here
+]
+
+setup(
+    name='mct',
+    version='1.0.0',
+    description="Tools to compare metrics between datasets, accounting for population differences "
+                "and invariant features.",
+    long_description=readme + '\n\n' + history,
+    author="Jamie Pool, Ashkan Aazami, Ebrahim Beyrami, Jay Gupchup, Martin Ellis",
+    author_email='',
+    url='https://github.com/microsoft/MS-MCT',
+    packages=find_packages(),
+    package_dir={'mct': 'mct'},
+    include_package_data=True,
+    install_requires=requirements,
+    zip_safe=False,
+    keywords=['mct'],
+    classifiers=[
+        'Natural Language :: English',
+        'Programming Language :: Python :: 3.7',
+    ],
+    test_suite='tests',
+    tests_require=test_requirements
+)