renaming penaly parameters

2019-03-04 13:20:02 -08:00 · 2019-03-04 13:20:02 -08:00 · 0302907e96
--- a/SparseSC/cross_validation.py
+++ b/SparseSC/cross_validation.py
@ -18,7 +18,7 @@ def score_train_test(X,
                     grad_splits=None,
                     **kwargs):
    """ Presents a unified api for ct_v_matrix and loo_v_matrix
-        and returns the v_mat, l2_pen_w (possibly calculated, possibly a parameter), and the score
+        and returns the v_mat, w_pen (possibly calculated, possibly a parameter), and the score

        :param X: Matrix of covariates for untreated units
        :type X: coercible to :class:`numpy.matrix`
@ -83,10 +83,10 @@ def score_train_test(X,
            raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" %
                             (X.shape[0], Y.shape[0],))

-        # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
+        # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen
        # note that the weights, score, and loss function value returned here
        # are for the in-sample predictions
-        _, v_mat, _, _, l2_pen_w, _ = \
+        _, v_mat, _, _, w_pen, _ = \
                    ct_v_matrix(X = np.vstack((X,X_treat[train, :])),
                                Y = np.vstack((Y,Y_treat[train, :])),
                                treated_units = [X.shape[0] + i for i in  range(len(train))],
@ -97,7 +97,7 @@ def score_train_test(X,
                     Y = np.vstack((Y,Y_treat[test, :])),
                     treated_units = [X.shape[0] + i for i in  range(len(test))],
                     V = v_mat,
-                     L2_PEN_W = l2_pen_w)
+                     w_pen = w_pen)

    else: # X_treat *is* None
        # >> K-fold validation on the only control units; assuming that Y
@ -118,10 +118,10 @@ def score_train_test(X,

                grad_splits = [ (match(train,_X),match(train,_Y) ) for _X,_Y in grad_splits]

-            # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
+            # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen
            # note that the weights, score, and loss function value returned
            # here are for the in-sample predictions
-            _, v_mat, _, _, l2_pen_w, _ = \
+            _, v_mat, _, _, w_pen, _ = \
                    fold_v_matrix(X = X[train, :],
                                  Y = Y[train, :],
                                  # treated_units = [X.shape[0] + i for i in  range(len(train))],
@ -132,15 +132,15 @@ def score_train_test(X,
            s = ct_score(X = X, Y = Y,  # formerly: fold_score
                         treated_units = test,
                         V = v_mat,
-                         L2_PEN_W = l2_pen_w)
+                         w_pen = w_pen)

        else:

-            # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
+            # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen
            # note that the weights, score, and loss function value returned
            # here are for the in-sample predictions
            try:
-                _, v_mat, _, _, l2_pen_w, _ = \
+                _, v_mat, _, _, w_pen, _ = \
                        loo_v_matrix(X = X[train, :],
                                     Y = Y[train, :],
                                     # treated_units = [X.shape[0] + i for i in  range(len(train))],
@ -153,31 +153,31 @@ def score_train_test(X,
            s = ct_score(X = X, Y = Y,
                         treated_units = test,
                         V = v_mat,
-                         L2_PEN_W = l2_pen_w)
+                         w_pen = w_pen)

-    return v_mat, l2_pen_w, s
+    return v_mat, w_pen, s


-def score_train_test_sorted_lambdas(LAMBDA,
+def score_train_test_sorted_lambdas(v_pen,
                                    start=None,
                                    cache=False,
                                    progress=False,
                                    FoldNumber=None,
                                    **kwargs):
    """ a wrapper which calls  score_train_test() for each element of an
-        array of `LAMBDA`'s, optionally caching the optimized v_mat and using it
+        array of `v_pen`'s, optionally caching the optimized v_mat and using it
        as the start position for the next iteration.
    """

    # DEFAULTS
-    values = [None]*len(LAMBDA)
+    values = [None]*len(v_pen)

    if progress > 0:
        import time
        t0 = time.time()

-    for i,Lam in enumerate(LAMBDA):
-        v_mat, _, _ = values[i] = score_train_test( LAMBDA = Lam, start = start, **kwargs)
+    for i,Lam in enumerate(v_pen):
+        v_mat, _, _ = values[i] = score_train_test( v_pen = Lam, start = start, **kwargs)

        if cache:
            start = np.diag(v_mat)
@ -185,21 +185,21 @@ def score_train_test_sorted_lambdas(LAMBDA,
            t1 = time.time()
            if FoldNumber is None:
                print("lambda: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
-                      (Lam, i+1, len(LAMBDA), t1 - t0, ))
+                      (Lam, i+1, len(v_pen), t1 - t0, ))
                #print("iteration %s of %s time: %0.4f ,lambda: %0.4f, diags: %s" %
-                #      (i+1, len(LAMBDA), t1 - t0, Lam, np.diag(v_mat),))
+                #      (i+1, len(v_pen), t1 - t0, Lam, np.diag(v_mat),))
            else:
                print("Fold %s,lambda: %0.4f, value %s of %s, time elapsed: %0.4f sec." %
-                      (FoldNumber, Lam, i+1, len(LAMBDA), t1 - t0, ))
+                      (FoldNumber, Lam, i+1, len(v_pen), t1 - t0, ))
                #print("Fold %s, iteration %s of %s, time: %0.4f ,lambda: %0.4f, diags: %s" %
-                #      (FoldNumber, i+1, len(LAMBDA), t1 - t0, Lam, np.diag(v_mat),))
+                #      (FoldNumber, i+1, len(v_pen), t1 - t0, Lam, np.diag(v_mat),))
            t0 = time.time()

    return list(zip(*values))


 def CV_score(X,Y,
-             LAMBDA,
+             v_pen,
             X_treat=None,
             Y_treat=None,
             splits=5,
@ -232,7 +232,7 @@ def CV_score(X,Y,
                         (X.shape[0], Y.shape[0],))

    try:
-        _LAMBDA = iter(LAMBDA)
+        _v_pen = iter(v_pen)
    except TypeError:
        # Lambda is a single value
        multi_lambda = False
@ -293,7 +293,7 @@ def CV_score(X,Y,
                promises = [ _worker_pool.submit(__score_train_test__,
                                                 X = X,
                                                 Y = Y,
-                                                 LAMBDA = LAMBDA,
+                                                 v_pen = v_pen,
                                                 X_treat = X_treat,
                                                 Y_treat = Y_treat,
                                                 train = train,
@ -313,7 +313,7 @@ def CV_score(X,Y,
                                             Y = Y,
                                             X_treat = X_treat,
                                             Y_treat = Y_treat,
-                                             LAMBDA = LAMBDA,
+                                             v_pen = v_pen,
                                             train = train,
                                             test = test,
                                             FoldNumber = fold,
@ -355,7 +355,7 @@ def CV_score(X,Y,
                promises = [ _worker_pool.submit(__score_train_test__,
                                                 X = X,
                                                 Y = Y,
-                                                 LAMBDA = LAMBDA,
+                                                 v_pen = v_pen,
                                                 train = train,
                                                 test = test,
                                                 FoldNumber = fold,
@ -371,7 +371,7 @@ def CV_score(X,Y,
        else:
            results = [ __score_train_test__(X = X,
                                             Y = Y,
-                                             LAMBDA = LAMBDA,
+                                             v_pen = v_pen,
                                             train = train,
                                             test = test,
                                             FoldNumber = fold,
--- a/SparseSC/fit.py
+++ b/SparseSC/fit.py
@ -199,13 +199,13 @@ def fit(X,Y,
        if covariate_penalties is None:
            if grid is None:
                grid = np.exp(np.linspace(np.log(Lambda_min),np.log(Lambda_max),grid_points))
-            # GET THE MAXIMUM LAMBDAS: quick ~ ( seconds to tens of seconds )
-            LAMBDA_max = get_max_lambda(Xtrain,
+            # GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
+            v_pen_max = get_max_lambda(Xtrain,
                                        Ytrain,
-                                        L2_PEN_W = weight_penalty,
+                                        w_pen = weight_penalty,
                                        grad_splits = gradient_folds,
                                        verbose=verbose)
-            covariate_penalties = grid * LAMBDA_max
+            covariate_penalties = grid * v_pen_max


        if model_type == "retrospective":
@ -219,9 +219,9 @@ def fit(X,Y,
            scores = CV_score( X = Xtrain,
                               Y = Ytrain,
                               splits = cv_folds,
-                               LAMBDA = covariate_penalties,
+                               v_pen = covariate_penalties,
                               progress = progress,
-                               L2_PEN_W = weight_penalty,
+                               w_pen = weight_penalty,
                               grad_splits = gradient_folds,
                               random_state = gradient_seed, # TODO: Cleanup Task 1
                               quiet = not progress,
@ -236,7 +236,7 @@ def fit(X,Y,

            best_V = tensor(X = Xtrain,
                            Y = Ytrain,
-                            LAMBDA = best_V_lambda,
+                            v_pen = best_V_lambda,
                            grad_splits = gradient_folds,
                            random_state = gradient_seed, # TODO: Cleanup Task 1
                            **kwargs)
@ -277,9 +277,9 @@ def fit(X,Y,
            scores = CV_score( X = X,
                               Y = Y,
                               splits = cv_folds,
-                               LAMBDA = covariate_penalties,
+                               v_pen = covariate_penalties,
                               progress = progress,
-                               L2_PEN_W = weight_penalty,
+                               w_pen = weight_penalty,
                               grad_splits = gradient_folds,
                               random_state = gradient_seed, # TODO: Cleanup Task 1
                               quiet = not progress,
@ -294,7 +294,7 @@ def fit(X,Y,

            best_V = tensor(X = X,
                            Y = Y,
-                            LAMBDA = best_V_lambda,
+                            v_pen = best_V_lambda,
                            grad_splits = gradient_folds,
                            random_state = gradient_seed, # TODO: Cleanup Task 1
                            **kwargs)
@ -315,9 +315,9 @@ def fit(X,Y,
                               X_treat = Xtest,
                               Y_treat = Ytest,
                               splits = cv_folds,
-                               LAMBDA = covariate_penalties,
+                               v_pen = covariate_penalties,
                               progress = progress,
-                               L2_PEN_W = weight_penalty,
+                               w_pen = weight_penalty,
                               quiet = not progress,
                               **kwargs)

@ -332,7 +332,7 @@ def fit(X,Y,
                            Y = Ytrain,
                            X_treat = Xtest,
                            Y_treat = Ytest,
-                            LAMBDA = best_V_lambda,
+                            v_pen = best_V_lambda,
                            **kwargs)


@ -350,11 +350,11 @@ def fit(X,Y,
        sc_weights[treated_units,:] = weights(Xtrain,
                                              Xtest,
                                              V = best_V,
-                                              L2_PEN_W = weight_penalty,
+                                              w_pen = weight_penalty,
                                              custom_donor_pool = custom_donor_pool_t)
        sc_weights[control_units,:] = weights(Xtrain,
                                              V = best_V,
-                                              L2_PEN_W = weight_penalty,
+                                              w_pen = weight_penalty,
                                              custom_donor_pool = custom_donor_pool_c)
    else:

@ -369,13 +369,13 @@ def fit(X,Y,
        if covariate_penalties is None:
            if grid is None:
                grid = np.exp(np.linspace(np.log(Lambda_min),np.log(Lambda_max),grid_points))
-            # GET THE MAXIMUM LAMBDAS: quick ~ ( seconds to tens of seconds )
-            LAMBDA_max = get_max_lambda(X,
+            # GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
+            v_pen_max = get_max_lambda(X,
                                        Y,
-                                        L2_PEN_W = weight_penalty,
+                                        w_pen = weight_penalty,
                                        grad_splits = gradient_folds,
                                        verbose=verbose)
-            covariate_penalties = grid * LAMBDA_max
+            covariate_penalties = grid * v_pen_max

        # Get the L2 penalty guestimate:  very quick ( milliseconds )
        if weight_penalty is None:
@ -389,9 +389,9 @@ def fit(X,Y,
        scores = CV_score(X = X,
                          Y = Y,
                          splits = cv_folds,
-                          LAMBDA = covariate_penalties,
+                          v_pen = covariate_penalties,
                          progress = progress,
-                          L2_PEN_W = weight_penalty,
+                          w_pen = weight_penalty,
                          grad_splits = gradient_folds,
                          random_state = gradient_seed, # TODO: Cleanup Task 1
                          quiet = not progress,
@ -406,7 +406,7 @@ def fit(X,Y,

        best_V = tensor(X = X,
                        Y = Y,
-                        LAMBDA = best_V_lambda,
+                        v_pen = best_V_lambda,
                        grad_splits = gradient_folds,
                        random_state = gradient_seed, # TODO: Cleanup Task 1
                        **kwargs)
@ -414,7 +414,7 @@ def fit(X,Y,
        # GET THE BEST SET OF WEIGHTS
        sc_weights = weights(X,
                             V = best_V,
-                             L2_PEN_W = weight_penalty,
+                             w_pen = weight_penalty,
                             custom_donor_pool = custom_donor_pool)

    return SparseSCFit(X,
--- a/SparseSC/fit_ct.py
+++ b/SparseSC/fit_ct.py
@ -9,11 +9,11 @@ warnings.filterwarnings('ignore')

 def ct_v_matrix(X,
                Y,
-                LAMBDA = 0,
+                v_pen = 0,
                treated_units = None,
                control_units = None,
                start = None,
-                L2_PEN_W = None,
+                w_pen = None,
                method = cdl_search,
                max_lambda = False,  # this is terrible at least without documentation...
                verbose = False,
@ -29,8 +29,8 @@ def ct_v_matrix(X,
    :param Y: Matrix of Outcomes
    :type Y: coercible to :class:`numpy.matrix`

-    :param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
-    :type LAMBDA: float
+    :param v_pen: penalty parameter used to shrink L1 norm of v/v.max() toward zero
+    :type v_pen: float

    :param treated_units: a list containing the position (rows) of the treated units within X and Y
    :type treated_units: int[] or numpy.ndarray
@ -41,9 +41,9 @@ def ct_v_matrix(X,
    :param start: initial values for the diagonals of the tensor matrix
    :type start: float[] or numpy.ndarray

-    :param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight
+    :param w_pen: L2 penalty on the magnitude of the deviance of the weight
                     vector from null. Optional.
-    :type L2_PEN_W: float
+    :type w_pen: float

    :param method: The name of a method to be used by scipy.optimize.minimize,
                   or a callable with the same API as scipy.optimize.minimize
@ -95,14 +95,14 @@ def ct_v_matrix(X,
    if X.shape[0] != Y.shape[0]:
        raise ValueError("X and Y have different number of rows (%s and %s)"
                         % (X.shape[0], Y.shape[0],))
-    if not isinstance(LAMBDA, (float, int)):
-        raise TypeError( "LAMBDA is not a number")
-    if L2_PEN_W is None:
-        L2_PEN_W = mean(var(X, axis = 0))
+    if not isinstance(v_pen, (float, int)):
+        raise TypeError( "v_pen is not a number")
+    if w_pen is None:
+        w_pen = mean(var(X, axis = 0))
    else:
-        L2_PEN_W = float(L2_PEN_W)
-    if not isinstance(L2_PEN_W, (float, int)):
-        raise TypeError( "L2_PEN_W is not a number")
+        w_pen = float(w_pen)
+    if not isinstance(w_pen, (float, int)):
+        raise TypeError( "w_pen is not a number")

    # CONSTANTS
    N0, N1, K = len(control_units), len(treated_units), X.shape[1]
@ -123,7 +123,7 @@ def ct_v_matrix(X,
        Ey = (Y_treated - weights.T.dot(Y_control)).getA()
        # note that (...).copy() assures that x.flags.writeable is True:
        # also einsum is faster than the equivalent (Ey **2).sum()
-        return (np.einsum('ij,ij->',Ey,Ey) + LAMBDA * absolute(V).sum()).copy()
+        return (np.einsum('ij,ij->',Ey,Ey) + v_pen * absolute(V).sum()).copy()

    def _grad(V):
        """ Calculates just the diagonal of dGamma0_dV
@ -146,25 +146,25 @@ def ct_v_matrix(X,
                dPI_dV = linalg.solve(A,(dB - dA.dot(AinvB)))
            except linalg.LinAlgError as exc:
                print("Unique weights not possible.")
-                if L2_PEN_W==0:
-                    print("Try specifying a very small L2_PEN_W rather than 0.")
+                if w_pen==0:
+                    print("Try specifying a very small w_pen rather than 0.")
                raise exc
            #dPI_dV = Ai.dot(dB - dA.dot(AinvB))
            # faster than the equivalent (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
            dGamma0_dV_term2[k] = np.einsum("ij,kj,ki->",Ey, Y_control, dPI_dV)
-        return LAMBDA + 2 * dGamma0_dV_term2
+        return v_pen + 2 * dGamma0_dV_term2

-    L2_PEN_W_mat = 2 * L2_PEN_W * diag(ones(X_control.shape[0]))
+    w_pen_mat = 2 * w_pen * diag(ones(X_control.shape[0]))
    def _weights(V):
        weights = zeros((N0, N1))
-        A = X_control.dot(2*V).dot(X_control.T) + L2_PEN_W_mat # 5
-        B = X_treated.dot(2*V).dot(X_control.T).T + 2 * L2_PEN_W / X_control.shape[0] # 6
+        A = X_control.dot(2*V).dot(X_control.T) + w_pen_mat # 5
+        B = X_treated.dot(2*V).dot(X_control.T).T + 2 * w_pen / X_control.shape[0] # 6
        try:
            b = linalg.solve(A,B)
        except linalg.LinAlgError as exc:
            print("Unique weights not possible.")
-            if L2_PEN_W==0:
-                print("Try specifying a very small L2_PEN_W rather than 0.")
+            if w_pen==0:
+                print("Try specifying a very small w_pen rather than 0.")
            raise exc
        return weights, A, B,b

@ -187,11 +187,11 @@ def ct_v_matrix(X,
    ts_loss = opt.fun
    ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))

-    return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
+    return weights, v_mat, ts_score, ts_loss, w_pen, opt

 def ct_weights(X,
               V,
-               L2_PEN_W,
+               w_pen,
               treated_units = None,
               control_units = None,
               custom_donor_pool = None):
@ -205,26 +205,26 @@ def ct_weights(X,
    if control_units is None:
        control_units = list(set(range(X.shape[0])) - set(treated_units))

-    def _calc_W_ct(X_treated, X_control, V, L2_PEN_W):
-        A = X_control.dot(2*V).dot(X_control.T)   + 2 * L2_PEN_W * diag(ones(X_control.shape[0])) # 5
-        B = X_treated.dot(2*V).dot(X_control.T).T + 2 * L2_PEN_W / X_control.shape[0]# 6
+    def _calc_W_ct(X_treated, X_control, V, w_pen):
+        A = X_control.dot(2*V).dot(X_control.T)   + 2 * w_pen * diag(ones(X_control.shape[0])) # 5
+        B = X_treated.dot(2*V).dot(X_control.T).T + 2 * w_pen / X_control.shape[0]# 6
        try:
            weights = linalg.solve(A,B).T
        except linalg.LinAlgError as exc:
            print("Unique weights not possible.")
-            if L2_PEN_W==0:
-                print("Try specifying a very small L2_PEN_W rather than 0.")
+            if w_pen==0:
+                print("Try specifying a very small w_pen rather than 0.")
            raise exc
        return weights


    if custom_donor_pool is None:
-        weights = _calc_W_ct(X[treated_units,:], X[control_units,:], V, L2_PEN_W)
+        weights = _calc_W_ct(X[treated_units,:], X[control_units,:], V, w_pen)
    else:
        weights = np.zeros((len(treated_units),len(control_units)))
        for i, treated_unit in enumerate(treated_units):
            donors = np.where(custom_donor_pool[treated_unit,:])
-            weights[i,donors] = _calc_W_ct(X[treated_unit,:], X[donors,:], V, L2_PEN_W)
+            weights[i,donors] = _calc_W_ct(X[treated_unit,:], X[donors,:], V, w_pen)


    return weights
@ -232,8 +232,8 @@ def ct_weights(X,
 def ct_score(Y,
             X,
             V,
-             L2_PEN_W,
-             LAMBDA = 0,
+             w_pen,
+             v_pen = 0,
             treated_units = None,
             control_units = None,
             **kwargs):
@ -248,12 +248,12 @@ def ct_score(Y,
        control_units = list(set(range(X.shape[0])) - set(treated_units))
    weights = ct_weights(X = X,
                         V = V,
-                         L2_PEN_W = L2_PEN_W,
+                         w_pen = w_pen,
                         treated_units = treated_units,
                         control_units = control_units,
                         **kwargs)
    Y_tr = Y[treated_units, :]
    Y_c = Y[control_units, :]
    Ey = (Y_tr - weights.dot(Y_c)).getA()
-    return np.einsum('ij,ij->',Ey,Ey) + LAMBDA * V.sum() # (Ey **2).sum() -> einsum
+    return np.einsum('ij,ij->',Ey,Ey) + v_pen * V.sum() # (Ey **2).sum() -> einsum

--- a/SparseSC/fit_fold.py
+++ b/SparseSC/fit_fold.py
@ -11,12 +11,12 @@ warnings.filterwarnings('ignore')

 def fold_v_matrix(X,
                  Y,
-                  LAMBDA = 0,
+                  v_pen = 0,
                  treated_units = None,
                  control_units = None,
                  non_neg_weights = False,
                  start = None,
-                  L2_PEN_W = None,
+                  w_pen = None,
                  method = cdl_search,
                  max_lambda = False,  # this is terrible at least without documentation...
                  grad_splits = 5,
@ -34,8 +34,8 @@ def fold_v_matrix(X,
    :param Y: Matrix of Outcomes
    :type Y: coercible to :class:`numpy.matrix`

-    :param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
-    :type LAMBDA: float
+    :param v_pen: penalty parameter used to shrink L1 norm of v/v.max() toward zero
+    :type v_pen: float

    :param treated_units: a list containing the position (rows) of the treated units within X and Y
    :type treated_units: int[] or numpy.ndarray
@ -46,9 +46,9 @@ def fold_v_matrix(X,
    :param start: initial values for the diagonals of the tensor matrix
    :type start: float[] or numpy.ndarray

-    :param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight
+    :param w_pen: L2 penalty on the magnitude of the deviance of the weight
                     vector from null. Optional.
-    :type L2_PEN_W: float
+    :type w_pen: float

    :param method: The name of a method to be used by scipy.optimize.minimize,
                   or a callable with the same API as scipy.optimize.minimize
@ -119,14 +119,14 @@ def fold_v_matrix(X,
    if X.shape[0] != Y.shape[0]:
        raise ValueError("X and Y have different number of rows (%s and %s)" %
                         (X.shape[0], Y.shape[0],))
-    if not isinstance(LAMBDA, (float, int)):
-        raise TypeError( "LAMBDA is not a number")
-    if L2_PEN_W is None:
-        L2_PEN_W = mean(var(X, axis = 0))
+    if not isinstance(v_pen, (float, int)):
+        raise TypeError( "v_pen is not a number")
+    if w_pen is None:
+        w_pen = mean(var(X, axis = 0))
    else:
-        L2_PEN_W = float(L2_PEN_W)
-    if not isinstance(L2_PEN_W, (float, int)):
-        raise TypeError( "L2_PEN_W is not a number")
+        w_pen = float(w_pen)
+    if not isinstance(w_pen, (float, int)):
+        raise TypeError( "w_pen is not a number")
    assert not non_neg_weights, "Bounds not implemented"

    splits = grad_splits # for readability...
@ -185,7 +185,7 @@ def fold_v_matrix(X,
        Ey = (Y_treated - weights.T.dot(Y_control)).getA()
        # (...).copy() assures that x.flags.writeable is True
        # also einsum is faster than the equivalent (Ey **2).sum()
-        return (np.einsum('ij,ij->',Ey,Ey) + LAMBDA * absolute(V).sum()).copy()
+        return (np.einsum('ij,ij->',Ey,Ey) + v_pen * absolute(V).sum()).copy()

    def _grad(V):
        """ Calculates just the diagonal of dGamma0_dV
@ -211,19 +211,19 @@ def fold_v_matrix(X,
                    b = linalg.solve(A[in_controls2[i]],dB - dA.dot(b_i[i]))
                except linalg.LinAlgError as exc:
                    print("Unique weights not possible.")
-                    if L2_PEN_W==0:
-                        print("Try specifying a very small L2_PEN_W rather than 0.")
+                    if w_pen==0:
+                        print("Try specifying a very small w_pen rather than 0.")
                    raise exc
                dPI_dV[np.ix_(in_controls[i], treated_units[test])] = b
            # einsum is faster than the equivalent (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
            dGamma0_dV_term2[k] = 2 * np.einsum("ij,kj,ki->",
                                                (weights.T.dot(Y_control) - Y_treated),
                                                Y_control, dPI_dV)
-        return LAMBDA + dGamma0_dV_term2
+        return v_pen + dGamma0_dV_term2

    def _weights(V):
        weights = zeros((N0, N1))
-        A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
+        A = X.dot(V + V.T).dot(X.T) + 2 * w_pen * diag(ones(X.shape[0])) # 5
        B = X.dot(V + V.T).dot(X.T).T # 6
        for i, (_,test) in enumerate(splits):
            if verbose >=2:  # for large sample sizes, linalg.solve is a huge bottle neck,
@ -232,11 +232,11 @@ def fold_v_matrix(X,
            try:
                b = b_i[i] = linalg.solve(A[in_controls2[i]],
                                          B[np.ix_(in_controls[i], treated_units[test])]
-                                          + 2 * L2_PEN_W / len(in_controls[i]) )
+                                          + 2 * w_pen / len(in_controls[i]) )
            except linalg.LinAlgError as exc:
                print("Unique weights not possible.")
-                if L2_PEN_W==0:
-                    print("Try specifying a very small L2_PEN_W rather than 0.")
+                if w_pen==0:
+                    print("Try specifying a very small w_pen rather than 0.")
                raise exc
            weights[np.ix_(out_controls[i], test)] = b
        return weights, A, B
@ -259,13 +259,13 @@ def fold_v_matrix(X,
    ts_loss = opt.fun
    ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))

-    return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
+    return weights, v_mat, ts_score, ts_loss, w_pen, opt



 def fold_weights(X,
                 V,
-                 L2_PEN_W = None,
+                 w_pen = None,
                 treated_units = None,
                 control_units = None,
                 grad_splits = 5,
@ -273,8 +273,8 @@ def fold_weights(X,
                 verbose=False):
    """ fit the weights using the k-fold gradient approach
    """
-    if L2_PEN_W is None:
-        L2_PEN_W = mean(var(X, axis = 0))
+    if w_pen is None:
+        w_pen = mean(var(X, axis = 0))
    if treated_units is None:
        if control_units is None:
            # both not provided, include all samples as both treat and control unit.
@ -310,7 +310,7 @@ def fold_weights(X,
    out_controls = [ctrl_rng[np.logical_not(np.isin(control_units, treated_units[test]))] for _,test in splits]  #pylint: disable=line-too-long

    weights = zeros((N0, N1))
-    A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
+    A = X.dot(V + V.T).dot(X.T) + 2 * w_pen * diag(ones(X.shape[0])) # 5
    B = X.dot(V + V.T).dot(X.T).T # 6

    for i, (_,test) in enumerate(splits):
@ -318,11 +318,11 @@ def fold_weights(X,
            print("Calculating weights, linalg.solve() call %s of %s" % (i,len(splits),)) #pylint: disable=line-too-long
        try:
            b = linalg.solve(A[in_controls2[i]],
-                             B[np.ix_(in_controls[i], treated_units[test])] + 2 * L2_PEN_W / len(in_controls[i])) #pylint: disable=line-too-long
+                             B[np.ix_(in_controls[i], treated_units[test])] + 2 * w_pen / len(in_controls[i])) #pylint: disable=line-too-long
        except linalg.LinAlgError as exc:
            print("Unique weights not possible.")
-            if L2_PEN_W==0:
-                print("Try specifying a very small L2_PEN_W rather than 0.")
+            if w_pen==0:
+                print("Try specifying a very small w_pen rather than 0.")
            raise exc

        indx2 = np.ix_(out_controls[i], test)
@ -333,8 +333,8 @@ def fold_weights(X,
 def fold_score(Y,
               X,
               V,
-               L2_PEN_W,
-               LAMBDA = 0,
+               w_pen,
+               v_pen = 0,
               treated_units = None,
               control_units = None,
               **kwargs):
@ -354,11 +354,11 @@ def fold_score(Y,
            control_units = list(set(range(X.shape[0])) - set(treated_units))
    weights = fold_weights(X = X,
                           V = V,
-                           L2_PEN_W = L2_PEN_W,
+                           w_pen = w_pen,
                           treated_units = treated_units,
                           control_units = control_units,
                           **kwargs)
    Y_tr = Y[treated_units, :]
    Y_c = Y[control_units, :]
    Ey = (Y_tr - weights.dot(Y_c)).getA()
-    return np.einsum('ij,ij->',Ey,Ey) + LAMBDA * V.sum() # (Ey **2).sum() -> einsum
+    return np.einsum('ij,ij->',Ey,Ey) + v_pen * V.sum() # (Ey **2).sum() -> einsum
--- a/SparseSC/fit_loo.py
+++ b/SparseSC/fit_loo.py
@ -30,12 +30,12 @@ def complete_treated_control_list(N, treated_units = None, control_units = None)

 def loo_v_matrix(X,
                 Y,
-                 LAMBDA = 0,
+                 v_pen = 0,
                 treated_units = None,
                 control_units = None,
                 non_neg_weights = False,
                 start = None,
-                 L2_PEN_W = None,
+                 w_pen = None,
                 method = cdl_search,
                 max_lambda = False,  # this is terrible at least without documentation...
                 solve_method = "standard",
@ -52,8 +52,8 @@ def loo_v_matrix(X,
    :param Y: Matrix of Outcomes
    :type Y: coercible to :class:`numpy.matrix`

-    :param LAMBDA: penalty parameter used to shrink L1 norm of v/v.max() toward zero
-    :type LAMBDA: float
+    :param v_pen: penalty parameter used to shrink L1 norm of v/v.max() toward zero
+    :type v_pen: float

    :param treated_units: a list containing the position (rows) of the treated units within X and Y
    :type treated_units: int[] or numpy.ndarray
@ -64,9 +64,9 @@ def loo_v_matrix(X,
    :param start: initial values for the diagonals of the tensor matrix
    :type start: float[] or numpy.ndarray

-    :param L2_PEN_W: L2 penalty on the magnitude of the deviance of the weight
+    :param w_pen: L2 penalty on the magnitude of the deviance of the weight
                     vector from null. Optional.
-    :type L2_PEN_W: float
+    :type w_pen: float

    :param method: The name of a method to be used by scipy.optimize.minimize,
                   or a callable with the same API as scipy.optimize.minimize
@ -122,14 +122,14 @@ def loo_v_matrix(X,
    if X.shape[0] != Y.shape[0]:
        raise ValueError("X and Y have different number of rows (%s and %s)" %
                         (X.shape[0], Y.shape[0],))
-    if not isinstance(LAMBDA, (float, int)):
-        raise TypeError( "LAMBDA is not a number")
-    if L2_PEN_W is None:
-        L2_PEN_W = mean(var(X, axis = 0))
+    if not isinstance(v_pen, (float, int)):
+        raise TypeError( "v_pen is not a number")
+    if w_pen is None:
+        w_pen = mean(var(X, axis = 0))
    else:
-        L2_PEN_W = float(L2_PEN_W)
-    if not isinstance(L2_PEN_W, (float, int)):
-        raise TypeError( "L2_PEN_W is not a number")
+        w_pen = float(w_pen)
+    if not isinstance(w_pen, (float, int)):
+        raise TypeError( "w_pen is not a number")
    assert not non_neg_weights, "Bounds not implemented"

    # CONSTANTS
@ -184,7 +184,7 @@ def loo_v_matrix(X,

        # (...).copy() assures that x.flags.writeable is True:
        # also einsum is faster than the equivalent (Ey **2).sum()
-        return (np.einsum('ij,ij->',Ey,Ey) + LAMBDA * absolute(V).sum()).copy()  #
+        return (np.einsum('ij,ij->',Ey,Ey) + v_pen * absolute(V).sum()).copy()  #

    def _grad(V):
        """ Calculates just the diagonal of dGamma0_dV
@ -216,21 +216,21 @@ def loo_v_matrix(X,
                        b = linalg.solve(A[in_controls2[i]],dB - dA.dot(b_i[i]))
                    except linalg.LinAlgError as exc:
                        print("Unique weights not possible.")
-                        if L2_PEN_W==0:
-                            print("Try specifying a very small L2_PEN_W rather than 0.")
+                        if w_pen==0:
+                            print("Try specifying a very small w_pen rather than 0.")
                        raise exc
                dPI_dV[index, i] = b.flatten() # TODO: is the Transpose  an error???

            # einsum is faster than the equivalent (Ey * Y_control.T.dot(dPI_dV).T.getA()).sum()
            dGamma0_dV_term2[k] = 2 * np.einsum("ij,kj,ki->",Ey, Y_control, dPI_dV)
-        return LAMBDA + dGamma0_dV_term2
+        return v_pen + dGamma0_dV_term2

    def _weights(V):
        weights = zeros((N0, N1))
        if solve_method == "step-down":
            raise NotImplementedError("The solve_method 'step-down' is currently not implemented")
            # A = (X_control.dot(V + V.T).dot(X_control.T)
-            #      + 2 * L2_PEN_W * diag(ones(X_control.shape[0]))) # 5
+            #      + 2 * w_pen * diag(ones(X_control.shape[0]))) # 5
            # B = X_treated.dot(V + V.T).dot(X_control.T) # 6
            # Ai = A.I
            # for i, trt_unit in enumerate(treated_units):
@ -241,7 +241,7 @@ def loo_v_matrix(X,
            #     b_i[i] = b
            #     weights[out_controls[i], i] = b.flatten()
        elif solve_method == "standard":
-            A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
+            A = X.dot(V + V.T).dot(X.T) + 2 * w_pen * diag(ones(X.shape[0])) # 5
            B = X.dot(V + V.T).dot(X.T).T # 6
            for i, trt_unit in enumerate(treated_units):
                if verbose >= 2:  # for large sample sizes, linalg.solve is a huge bottle neck,
@ -249,11 +249,11 @@ def loo_v_matrix(X,
                          (i,len(in_controls),))
                try:
                    (b) = b_i[i] = linalg.solve(A[in_controls2[i]],
-                                                B[in_controls[i], trt_unit] + 2 * L2_PEN_W / len(in_controls[i])) #pylint: disable=line-too-long
+                                                B[in_controls[i], trt_unit] + 2 * w_pen / len(in_controls[i])) #pylint: disable=line-too-long
                except linalg.LinAlgError as exc:
                    print("Unique weights not possible.")
-                    if L2_PEN_W==0:
-                        print("Try specifying a very small L2_PEN_W rather than 0.")
+                    if w_pen==0:
+                        print("Try specifying a very small w_pen rather than 0.")
                    raise exc
                weights[out_controls[i], i] = b.flatten()
        else:
@ -278,11 +278,11 @@ def loo_v_matrix(X,
    ts_loss = opt.fun
    ts_score = linalg.norm(errors) / sqrt(prod(errors.shape))

-    return weights, v_mat, ts_score, ts_loss, L2_PEN_W, opt
+    return weights, v_mat, ts_score, ts_loss, w_pen, opt

 def loo_weights(X,
                V,
-                L2_PEN_W,
+                w_pen,
                treated_units = None,
                control_units = None,
                solve_method = "standard",
@ -315,7 +315,7 @@ def loo_weights(X,
    if solve_method == "step-down":
        raise NotImplementedError("The solve_method 'step-down' is currently not implemented")
        # A = (X_control.dot(V + V.T).dot(X_control.T)
-        #      + 2 * L2_PEN_W * diag(ones(X_control.shape[0]))) # 5
+        #      + 2 * w_pen * diag(ones(X_control.shape[0]))) # 5
        # B = X_treat.dot(  V + V.T).dot(X_control.T) # 6
        # Ai = A.I
        # for i, trt_unit in enumerate(treated_units):
@ -326,32 +326,32 @@ def loo_weights(X,
        #     weights[out_controls[i], i] = b.flatten()
    elif solve_method == "standard":
        if custom_donor_pool is None:
-            A = X.dot(V + V.T).dot(X.T) + 2 * L2_PEN_W * diag(ones(X.shape[0])) # 5
+            A = X.dot(V + V.T).dot(X.T) + 2 * w_pen * diag(ones(X.shape[0])) # 5
            B = X.dot(V + V.T).dot(X.T).T # 6
            for i, trt_unit in enumerate(treated_units):
                if verbose >= 2:  # for large sample sizes, linalg.solve is a huge bottle neck,
                    print("Calculating weights, linalg.solve() call %s of %s" % (i,len(treated_units),)) #pylint: disable=line-too-long
                try:
                    (b) = linalg.solve(A[in_controls2[i]],
-                                       B[in_controls[i], trt_unit] + 2 * L2_PEN_W / len(in_controls[i]))
+                                       B[in_controls[i], trt_unit] + 2 * w_pen / len(in_controls[i]))
                except linalg.LinAlgError as exc:
                    print("Unique weights not possible.")
-                    if L2_PEN_W==0:
-                        print("Try specifying a very small L2_PEN_W rather than 0.")
+                    if w_pen==0:
+                        print("Try specifying a very small w_pen rather than 0.")
                    raise exc

                weights[out_controls[i], i] = b.flatten()
        else:
            for i, trt_unit in enumerate(treated_units):
                donors = np.where(custom_donor_pool[trt_unit,:])
-                A = X[donors,:].dot(2*V).dot(X[donors,:].T)   + 2 * L2_PEN_W * diag(ones(X[donors,:].shape[0])) # 5
-                B = X[trt_unit,:].dot(2*V).dot(X[donors,:].T).T + 2 * L2_PEN_W / X[donors,:].shape[0]# 6
+                A = X[donors,:].dot(2*V).dot(X[donors,:].T)   + 2 * w_pen * diag(ones(X[donors,:].shape[0])) # 5
+                B = X[trt_unit,:].dot(2*V).dot(X[donors,:].T).T + 2 * w_pen / X[donors,:].shape[0]# 6
                try:
                    weights[donors,i] = linalg.solve(A,B)
                except linalg.LinAlgError as exc:
                    print("Unique weights not possible.")
-                    if L2_PEN_W==0:
-                        print("Try specifying a very small L2_PEN_W rather than 0.")
+                    if w_pen==0:
+                        print("Try specifying a very small w_pen rather than 0.")
                    raise exc
    else:
        raise ValueError("Unknown Solve Method: " + solve_method)
@ -361,8 +361,8 @@ def loo_weights(X,
 def loo_score(Y,
              X,
              V,
-              L2_PEN_W,
-              LAMBDA = 0,
+              w_pen,
+              v_pen = 0,
              treated_units = None,
              control_units = None,
              **kwargs):
@ -373,12 +373,12 @@ def loo_score(Y,
                                                                 control_units)
    weights = loo_weights(X = X,
                          V = V,
-                          L2_PEN_W = L2_PEN_W,
+                          w_pen = w_pen,
                          treated_units = treated_units,
                          control_units = control_units,
                          **kwargs)
    Y_tr = Y[treated_units, :]
    Y_c = Y[control_units, :]
    Ey = (Y_tr - weights.dot(Y_c)).getA()
-    return np.einsum('ij,ij->',Ey,Ey) + LAMBDA * V.sum() # (Ey **2).sum() -> einsum
+    return np.einsum('ij,ij->',Ey,Ey) + v_pen * V.sum() # (Ey **2).sum() -> einsum

--- a/SparseSC/lambda_utils.py
+++ b/SparseSC/lambda_utils.py
@ -15,7 +15,7 @@ def L2_pen_guestimate(X):
    """
    return np.mean(np.var(X, axis = 0))

-def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
+def get_max_lambda(X,Y,w_pen=None,X_treat=None,Y_treat=None,**kwargs):
    """ returns the maximum value of the L1 penalty for which the elements of
        tensor matrix (V) are not all zero.

@ -42,8 +42,8 @@ def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
    if X.shape[0] != Y.shape[0]:
        raise ValueError("X and Y have different number of rows (%s and %s)" %
                         (X.shape[0], Y.shape[0],))
-    if L2_PEN_W is None:
-        L2_PEN_W = np.mean(np.var(X, axis = 0))
+    if w_pen is None:
+        w_pen = np.mean(np.var(X, axis = 0))

    if X_treat is not None:

@ -64,12 +64,12 @@ def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
        treated_units = np.arange(X.shape[0],X.shape[0] + X_treat.shape[0])

        try:
-            _LAMBDA = iter(L2_PEN_W)
+            _v_pen = iter(w_pen)
        except TypeError:
-            # L2_PEN_W is a single value
+            # w_pen is a single value
            return ct_v_matrix(X = np.vstack((X,X_treat)),
                               Y = np.vstack((Y,Y_treat)),
-                               L2_PEN_W = L2_PEN_W,
+                               w_pen = w_pen,
                               control_units = control_units,
                               treated_units = treated_units,
                               max_lambda = True,
@ -77,35 +77,35 @@ def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
                               **kwargs)

        else:
-            # L2_PEN_W is an iterable of values
+            # w_pen is an iterable of values
            return [ ct_v_matrix(X = np.vstack((X,X_treat)),
                                 Y = np.vstack((Y,Y_treat)),
                                 control_units = control_units,
                                 treated_units = treated_units,
                                 max_lambda = True,
                                 gradient_message = _GRADIENT_MESSAGE,
-                                 L2_PEN_W = l2_pen,
+                                 w_pen = l2_pen,
                                 **kwargs)
-                     for l2_pen in L2_PEN_W ]
+                     for l2_pen in w_pen ]

    else:

        try:
-            _LAMBDA = iter(L2_PEN_W)
+            _v_pen = iter(w_pen)
        except TypeError:
            if "grad_splits" in kwargs:
-                # L2_PEN_W is a single value
+                # w_pen is a single value
                return fold_v_matrix(X = X,
                                     Y = Y,
-                                     L2_PEN_W = L2_PEN_W,
+                                     w_pen = w_pen,
                                     max_lambda = True,
                                     gradient_message = _GRADIENT_MESSAGE,
                                     **kwargs)
-            # L2_PEN_W is a single value
+            # w_pen is a single value
            try:
                return loo_v_matrix(X = X,
                                    Y = Y,
-                                    L2_PEN_W = L2_PEN_W,
+                                    w_pen = w_pen,
                                    max_lambda = True,
                                    gradient_message = _GRADIENT_MESSAGE,
                                    **kwargs)
@ -114,23 +114,23 @@ def get_max_lambda(X,Y,L2_PEN_W=None,X_treat=None,Y_treat=None,**kwargs):
        else:
            if "grad_splits" in kwargs:

-                # L2_PEN_W is an iterable of values
+                # w_pen is an iterable of values
                return [ fold_v_matrix(X = X,
                                       Y = Y,
-                                       L2_PEN_W = l2_pen,
+                                       w_pen = l2_pen,
                                       max_lambda = True,
                                       gradient_message = _GRADIENT_MESSAGE,
                                       **kwargs)
-                         for l2_pen in L2_PEN_W ]
+                         for l2_pen in w_pen ]

-            # L2_PEN_W is an iterable of values
+            # w_pen is an iterable of values
            try:
                return [ loo_v_matrix(X = X,
                                      Y = Y,
-                                      L2_PEN_W = l2_pen,
+                                      w_pen = l2_pen,
                                      max_lambda = True,
                                      gradient_message = _GRADIENT_MESSAGE,
                                      **kwargs)
-                         for l2_pen in L2_PEN_W ]
+                         for l2_pen in w_pen ]
            except MemoryError:
                raise RuntimeError("MemoryError encountered.  Try setting `grad_splits` parameter to reduce memory requirements.") #pylint: disable=line-too-long
--- a/SparseSC/tensor.py
+++ b/SparseSC/tensor.py
@ -49,7 +49,7 @@ def tensor(X, Y, X_treat=None, Y_treat=None, grad_splits=None, **kwargs):
            raise ValueError("X_treat and Y_treat have different number of rows (%s and %s)" %
                             (X_treat.shape[0], Y_treat.shape[0],))

-        # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
+        # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen
        # note that the weights, score, and loss function value returned here
        # are for the in-sample predictions
        _, v_mat, _, _, _, _ = \
--- a/SparseSC/weights.py
+++ b/SparseSC/weights.py
@ -29,7 +29,7 @@ def weights(X, X_treat=None, grad_splits = None, custom_donor_pool = None, **kwa
        if X_treat.shape[1] == 0:
            raise ValueError("X_treat.shape[1] == 0")

-        # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE L2_PEN_W
+        # FIT THE V-MATRIX AND POSSIBLY CALCULATE THE w_pen
        # note that the weights, score, and loss function value returned here
        # are for the in-sample predictions
        return ct_weights(X = np.vstack((X,X_treat)),
--- a/example-code.py
+++ b/example-code.py
@ -82,12 +82,12 @@ if __name__ == "__main__":

        L1_max_loo_grid = SC.get_max_lambda(X_and_Y_pre,
                                           Y_post, 
-                                           L2_PEN_W = L2_pen_start_loo * L2_grid)
+                                           w_pen = L2_pen_start_loo * L2_grid)
        L1_max_ct_grid = SC.get_max_lambda(X_control,
                                           Y_pre_control,
                                           X_treat=X_treated,
                                           Y_treat=Y_pre_treated,
-                                           L2_PEN_W = L2_pen_start_ct * L2_grid)
+                                           w_pen = L2_pen_start_ct * L2_grid)
        assert ( L1_max_loo / L1_max_loo_grid  == 1/L2_grid).all()
        assert ( L1_max_ct  / L1_max_ct_grid   == 1/L2_grid).all()

@ -132,11 +132,11 @@ if __name__ == "__main__":
            X_treat = X_treated,
            Y_treat = Y_pre_treated,

-            # if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
-            LAMBDA = grid * L1_max_ct,
-            L2_PEN_W = L2_pen_start_ct,
+            # if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
+            v_pen = grid * L1_max_ct,
+            w_pen = L2_pen_start_ct,

-            # CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
+            # CACHE THE V MATRIX BETWEEN v_pen PARAMETERS (generally faster, but path dependent)
            cache = False, # False by Default

            # Run each of the Cross-validation folds in parallel? Often slower
@ -160,13 +160,13 @@ if __name__ == "__main__":
            grad_splits = 5,
            random_state = 10101, # random_state for the splitting during k-fold gradient descent

-            # L1 Penalty. if LAMBDA is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
-            LAMBDA = grid * L1_max_loo,
+            # L1 Penalty. if v_pen is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
+            v_pen = grid * L1_max_loo,

            # L2 Penalty (float)
-            L2_PEN_W = L2_pen_start_loo,
+            w_pen = L2_pen_start_loo,

-            # CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
+            # CACHE THE V MATRIX BETWEEN v_pen PARAMETERS (generally faster, but path dependent)
            #cache = True, # False by Default

            # Run each of the Cross-validation folds in parallel? Often slower
@ -191,13 +191,13 @@ if __name__ == "__main__":
            # with `grad_splits = None` (the default behavior) we get leave-one-out gradient descent.
            grad_splits = None,

-            # L1 Penalty. if LAMBDA is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
-            LAMBDA = grid * L1_max_loo,
+            # L1 Penalty. if v_pen is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
+            v_pen = grid * L1_max_loo,

            # L2 Penalty (float)
-            L2_PEN_W = L2_pen_start_loo,
+            w_pen = L2_pen_start_loo,

-            # CACHE THE V MATRIX BETWEEN LAMBDA PARAMETERS (generally faster, but path dependent)
+            # CACHE THE V MATRIX BETWEEN v_pen PARAMETERS (generally faster, but path dependent)
            #cache = True, # False by Default

            # Run each of the Cross-validation folds in parallel? Often slower
@ -229,13 +229,13 @@ if __name__ == "__main__":
                     Y = Y_pre_control,
                     X_treat = X_treated,
                     Y_treat = Y_pre_treated,
-                     LAMBDA = best_L1_penalty_ct,
-                     L2_PEN_W = L2_pen_start_ct)
+                     v_pen = best_L1_penalty_ct,
+                     w_pen = L2_pen_start_ct)

    SC_weights_ct = SC.weights(X = X_control,
                               X_treat = X_treated,
                               V = V_ct,
-                               L2_PEN_W = L2_pen_start_ct)
+                               w_pen = L2_pen_start_ct)

    Y_post_treated_synthetic_conrols_ct = SC_weights_ct.dot(Y_post_control)
    ct_prediction_error = Y_post_treated_synthetic_conrols_ct - Y_post_treated
@ -254,12 +254,12 @@ if __name__ == "__main__":
        V_loo = SC.tensor(
                X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
                Y = Y_post      [np.arange(100)], # limit the amount of time...
-                LAMBDA = best_L1_penalty_loo,
-                L2_PEN_W = L2_pen_start_loo)
+                v_pen = best_L1_penalty_loo,
+                w_pen = L2_pen_start_loo)

        SC_weights_loo = SC.weights(X = X_control,
                                    V = V_loo,
-                                    L2_PEN_W = L2_pen_start_loo)
+                                    w_pen = L2_pen_start_loo)

        # in progress...
        import pdb; pdb.set_trace()
@ -304,9 +304,9 @@ if __name__ == "__main__":
                                Y = Y_pre_control,
                                X_treat = X_treated,
                                Y_treat = Y_pre_treated,
-                                # if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
-                                LAMBDA = best_L1_penalty_ct * np.exp(x[0]),
-                                L2_PEN_W = L2_pen_start_ct / np.exp(x[0]),
+                                # if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
+                                v_pen = best_L1_penalty_ct * np.exp(x[0]),
+                                w_pen = L2_pen_start_ct / np.exp(x[0]),
                                # suppress the analysis type message
                                quiet = True)
            t2 = time.time(); 
@ -351,9 +351,9 @@ if __name__ == "__main__":
                                Y = Y_pre_control,
                                X_treat = X_treated,
                                Y_treat = Y_pre_treated,
-                                # if LAMBDA is a single value, we get a single score, If it's an array of values, we get an array of scores.
-                                LAMBDA = best_L1_penalty_ct,
-                                L2_PEN_W = L2_pen_start_ct * np.exp(x[0]),
+                                # if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
+                                v_pen = best_L1_penalty_ct,
+                                w_pen = L2_pen_start_ct * np.exp(x[0]),
                                # suppress the analysis type message
                                quiet = True)
        
--- a/examples/fit_poc.py
+++ b/examples/fit_poc.py
@ -73,13 +73,13 @@ def fit_poc(X,Y,
        Ytest = Y[test,:]

        # Get the L2 penalty guestimate:  very quick ( milliseconds )
-        L2_PEN_W  = SC.L2_pen_guestimate(Xtrain) 
+        w_pen  = SC.L2_pen_guestimate(Xtrain) 

-        # GET THE MAXIMUM LAMBDAS: quick ~ ( seconds to tens of seconds )
-        LAMBDA_max = SC.get_max_lambda(
+        # GET THE MAXIMUM v_penS: quick ~ ( seconds to tens of seconds )
+        v_pen_max = SC.get_max_lambda(
                    Xtrain,
                    Ytrain,
-                    L2_PEN_W = L2_PEN_W,
+                    w_pen = w_pen,
                    grad_splits=gradient_folds,
                    learning_rate = 0.2, # initial learning rate
                    verbose=1)
@ -92,14 +92,14 @@ def fit_poc(X,Y,
        scores = SC.CV_score( X = Xtrain,
                              Y = Ytrain,
                              splits = cv_folds,
-                              LAMBDA = grid * LAMBDA_max,
+                              v_pen = grid * v_pen_max,
                              progress = True,
-                              L2_PEN_W = L2_PEN_W,
+                              w_pen = w_pen,
                              grad_splits = gradient_folds) 

        # GET THE INDEX OF THE BEST SCORE
        best_i = np.argmin(scores)
-        best_lambda = (grid * LAMBDA_max)[best_i]
+        best_lambda = (grid * v_pen_max)[best_i]

        # --------------------------------------------------
        # Phase 2: extract V and weights: slow ( tens of seconds to minutes )
@ -107,7 +107,7 @@ def fit_poc(X,Y,

        best_V = SC.tensor(X = Xtrain, 
                           Y = Ytrain,
-                           LAMBDA = best_lambda,
+                           v_pen = best_lambda,
                           grad_splits = gradient_folds,
                           learning_rate = 0.2) 

@ -115,7 +115,7 @@ def fit_poc(X,Y,
        out_of_sample_weights = SC.weights(Xtrain,
                                           Xtest,
                                           V = best_V,
-                                           L2_PEN_W = L2_PEN_W)
+                                           w_pen = w_pen)

        Y_SC_test = out_of_sample_weights.dot(Ytrain)