Bug fix for Rank and WMA operators (#1228)

* bug fix: 1) 100 should be used to scale down percentileofscore return to 0-1, not length of array; 2) for (linear) weighted MA(n), weight should be n, n-1, ..., 1 instead of n-1, ..., 0 * use native pandas fucntion for rank * remove useless import * require pandas 1.4+ * rank for py37+pandas 1.3.5 compatibility * lint improvement * lint black fix * use hasattr instead of version to check whether rolling.rank is implemented
2022-11-13 19:03:23 +08:00 · 2022-11-13 19:03:23 +08:00 · 4001a5d157
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@ -34,8 +34,6 @@ np.seterr(invalid="ignore")


 #################### Element-Wise Operator ####################
-
-
 class ElemOperator(ExpressionOps):
    """Element-wise Operator

@ -216,9 +214,7 @@ class Not(NpElemOperator):

    Parameters
    ----------
-    feature_left : Expression
-        feature instance
-    feature_right : Expression
+    feature : Expression
        feature instance

    Returns
@ -241,8 +237,6 @@ class PairOperator(ExpressionOps):
        feature instance or numeric value
    feature_right : Expression
        feature instance or numeric value
-    func : str
-        operator function

    Returns
    ----------
@ -1155,9 +1149,13 @@ class Rank(Rolling):
    def __init__(self, feature, N):
        super(Rank, self).__init__(feature, N, "rank")

+    # for compatiblity of python 3.7, which doesn't support pandas 1.4.0+ which implements Rolling.rank
    def _load_internal(self, instrument, start_index, end_index, *args):
        series = self.feature.load(instrument, start_index, end_index, *args)
-        # TODO: implement in Cython
+
+        rolling_or_expending = series.expanding(min_periods=1) if self.N == 0 else series.rolling(self.N, min_periods=1)
+        if hasattr(rolling_or_expending, "rank"):
+            return rolling_or_expending.rank(pct=True)

        def rank(x):
            if np.isnan(x[-1]):
@ -1165,13 +1163,9 @@ class Rank(Rolling):
            x1 = x[~np.isnan(x)]
            if x1.shape[0] == 0:
                return np.nan
-            return percentileofscore(x1, x1[-1]) / len(x1)
+            return percentileofscore(x1, x1[-1]) / 100

-        if self.N == 0:
-            series = series.expanding(min_periods=1).apply(rank, raw=True)
-        else:
-            series = series.rolling(self.N, min_periods=1).apply(rank, raw=True)
-        return series
+        return rolling_or_expending.apply(rank, raw=True)


 class Count(Rolling):
@ -1341,7 +1335,7 @@ class WMA(Rolling):
        # TODO: implement in Cython

        def weighted_mean(x):
-            w = np.arange(len(x))
+            w = np.arange(len(x)) + 1
            w = w / w.sum()
            return np.nanmean(w * x)