optimize and remove unused code

2020-07-06 17:13:55 +01:00 · 2020-07-06 17:13:55 +01:00 · 432b3a8b84
--- a/contrib/sarplus/python/tests/test_pyspark_sar.py
+++ b/contrib/sarplus/python/tests/test_pyspark_sar.py
@ -149,12 +149,6 @@ def test_e2e(spark, pandas_dummy_dataset, header):
    df = spark.createDataFrame(pandas_dummy_dataset)
    sar.fit(df)

-    # assert 4*4 + 32 == sar.item_similarity.count()
-
-    # print(sar.item_similarity
-    # .toPandas()
-    # .pivot_table(index='i1', columns='i2', values='value'))
-
    test_df = spark.createDataFrame(
        pd.DataFrame({header["col_user"]: [3], header["col_item"]: [2]})
    )
--- a/reco_utils/recommender/deeprec/io/nextitnet_iterator.py
+++ b/reco_utils/recommender/deeprec/io/nextitnet_iterator.py
@ -122,23 +122,29 @@ class NextItNetIterator(SequentialIterator):
            history_lengths = [len(item_history_batch[i]) for i in range(instance_cnt)]
            max_seq_length_batch = self.max_seq_length
            item_history_batch_all = np.zeros(
-                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch)
-            ).astype("int32")
+                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch),
+                dtype=np.int32,
+            )
            item_cate_history_batch_all = np.zeros(
-                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch)
-            ).astype("int32")
+                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch),
+                dtype=np.int32,
+            )
            time_diff_batch = np.zeros(
-                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch)
-            ).astype("float32")
+                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch),
+                dtype=np.float32,
+            )
            time_from_first_action_batch = np.zeros(
-                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch)
-            ).astype("float32")
+                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch),
+                dtype=np.float32,
+            )
            time_to_now_batch = np.zeros(
-                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch)
-            ).astype("float32")
+                (instance_cnt * (batch_num_ngs + 1), max_seq_length_batch),
+                dtype=np.float32,
+            )
            mask = np.zeros(
-                (instance_cnt * (1 + batch_num_ngs), max_seq_length_batch)
-            ).astype("float32")
+                (instance_cnt * (1 + batch_num_ngs), max_seq_length_batch),
+                dtype=np.float32,
+            )

            for i in range(instance_cnt):
                this_length = min(history_lengths[i], max_seq_length_batch)
@ -174,7 +180,6 @@ class NextItNetIterator(SequentialIterator):
                    item_cate_list[i],
                ]
                label_list_all.append([1] * max_seq_length_batch)
-                # label_list_all.append(1)
                item_list_all.append(positive_item)
                item_cate_list_all.append(positive_item_cate)

@ -193,7 +198,6 @@ class NextItNetIterator(SequentialIterator):
                        count_inner += 1

                    label_list_all.append([0] * max_seq_length_batch)
-                    # label_list_all.append(0)
                    item_list_all.append(negative_item_list)
                    item_cate_list_all.append(negative_item_cate_list)
                    count += 1
@ -213,9 +217,6 @@ class NextItNetIterator(SequentialIterator):
            res["time_from_first_action"] = time_from_first_action_batch
            res["time_to_now"] = time_to_now_batch

-            # print("label_list_all.shape: ", res["labels"].shape)
-            # print("item_list_all.shape: ", res["items"].shape)
-
            return res

        else:
@ -223,21 +224,21 @@ class NextItNetIterator(SequentialIterator):
            history_lengths = [len(item_history_batch[i]) for i in range(instance_cnt)]
            max_seq_length_batch = self.max_seq_length
            item_history_batch_all = np.zeros(
-                (instance_cnt, max_seq_length_batch)
-            ).astype("int32")
+                (instance_cnt, max_seq_length_batch), dtype=np.int32
+            )
            item_cate_history_batch_all = np.zeros(
-                (instance_cnt, max_seq_length_batch)
-            ).astype("int32")
-            time_diff_batch = np.zeros((instance_cnt, max_seq_length_batch)).astype(
-                "float32"
+                (instance_cnt, max_seq_length_batch), dtype=np.int32
+            )
+            time_diff_batch = np.zeros(
+                (instance_cnt, max_seq_length_batch), dtype=np.float32
            )
            time_from_first_action_batch = np.zeros(
-                (instance_cnt, max_seq_length_batch)
-            ).astype("float32")
-            time_to_now_batch = np.zeros((instance_cnt, max_seq_length_batch)).astype(
-                "float32"
+                (instance_cnt, max_seq_length_batch), dtype=np.float32
            )
-            mask = np.zeros((instance_cnt, max_seq_length_batch)).astype("float32")
+            time_to_now_batch = np.zeros(
+                (instance_cnt, max_seq_length_batch), dtype=np.float32
+            )
+            mask = np.zeros((instance_cnt, max_seq_length_batch), dtype=np.float32)

            for i in range(instance_cnt):
                this_length = min(history_lengths[i], max_seq_length_batch)
--- a/reco_utils/recommender/rlrmc/conjugate_gradient_ms.py
+++ b/reco_utils/recommender/rlrmc/conjugate_gradient_ms.py
@ -204,7 +204,6 @@ class ConjugateGradientMS(Solver):
                    # if ip_diff = man.inner(newx, diff, desc_dir) = 0
                    except ZeroDivisionError:
                        beta = 1
-                    # print(ip_diff,beta,man.inner(newx, diff, desc_dir))
                elif self._beta_type == BetaTypes.HagerZhang:
                    diff = newgrad - oldgrad
                    Poldgrad = man.transp(x, newx, Pgrad)