Deep dive notebook for BiVAE model using Cornac (#1358)
* upgrade cornac version * update README files * first draft * fix equations * change photo sizes * fix equations * fix equations * add smoke test * add integration test * upgrade cornac version * fix tests * add cornac_bivae_deep_dive
This commit is contained in:
Родитель
574bff99ac
Коммит
b2c7ce333c
|
@ -95,6 +95,7 @@ To contributors: please add your name to the list when you submit a patch to the
|
|||
* Deep dive notebook demonstrating the use of LightGCN
|
||||
* **[Quoc-Tuan Truong](https://github.com/tqtg)**
|
||||
* BPR notebook using [Cornac](https://github.com/PreferredAI/cornac) framework
|
||||
* BiVAE notebook using [Cornac](https://github.com/PreferredAI/cornac) framework
|
||||
* **[Robert Alexander](https://github.com/roalexan)**
|
||||
* Windows test pipelines
|
||||
* **[Satyadev Ntv](https://github.com/satyadevntv)**
|
||||
|
|
|
@ -74,6 +74,7 @@ The table below lists the recommender algorithms currently available in the repo
|
|||
| Alternating Least Squares (ALS) | [PySpark](examples/00_quick_start/als_movielens.ipynb) | Collaborative Filtering | Matrix factorization algorithm for explicit or implicit feedback in large datasets, optimized by Spark MLLib for scalability and distributed computing capability |
|
||||
| Attentive Asynchronous Singular Value Decomposition (A2SVD)<sup>*</sup> | [Python CPU / Python GPU](examples/00_quick_start/sequential_recsys_amazondataset.ipynb) | Collaborative Filtering | Sequential-based algorithm that aims to capture both long and short-term user preferences using attention mechanism |
|
||||
| Cornac/Bayesian Personalized Ranking (BPR) | [Python CPU](examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb) | Collaborative Filtering | Matrix factorization algorithm for predicting item ranking with implicit feedback |
|
||||
| Cornac/Bilateral Variational Autoencoder (BiVAE) | [Python CPU / Python GPU](examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb) | Collaborative Filtering | Generative model for dyadic data (e.g., user-item interactions) |
|
||||
| Convolutional Sequence Embedding Recommendation (Caser) | [Python CPU / Python GPU](examples/00_quick_start/sequential_recsys_amazondataset.ipynb) | Collaborative Filtering | Algorithm based on convolutions that aim to capture both user’s general preferences and sequential patterns |
|
||||
| Deep Knowledge-Aware Network (DKN)<sup>*</sup> | [Python CPU / Python GPU](examples/00_quick_start/dkn_MIND.ipynb) | Content-Based Filtering | Deep learning algorithm incorporating a knowledge graph and article embeddings to provide powerful news or article recommendations |
|
||||
| Extreme Deep Factorization Machine (xDeepFM)<sup>*</sup> | [Python CPU / Python GPU](examples/00_quick_start/xdeepfm_criteo.ipynb) | Hybrid | Deep learning based algorithm for implicit and explicit feedback with user/item features |
|
||||
|
|
|
@ -6,6 +6,7 @@ In this directory, notebooks are provided to give a deep dive of collaborative f
|
|||
| --- | --- | --- |
|
||||
| [als_deep_dive](als_deep_dive.ipynb) | PySpark | Deep dive on the ALS algorithm and implementation.
|
||||
| [baseline_deep_dive](baseline_deep_dive.ipynb) | --- | Deep dive on baseline performance estimation.
|
||||
| [cornac_bivae_deep_dive](cornac_bivae_deep_dive.ipynb) | Python CPU, GPU | Deep dive on the BiVAE algorithm and implementation.
|
||||
| [cornac_bpr_deep_dive](cornac_bpr_deep_dive.ipynb) | Python CPU | Deep dive on the BPR algorithm and implementation.
|
||||
| [lightgcn_deep_dive](lightgcn_deep_dive.ipynb) | Python CPU, GPU | Deep dive on a LightGCN algorithm and implementation.
|
||||
| [multi_vae_deep_dive](multi_vae_deep_dive.ipynb) | Python CPU, GPU | Deep dive on the Multinomial VAE algorithm and implementation.
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
2
setup.py
2
setup.py
|
@ -25,7 +25,7 @@ name = environ.get("LIBRARY_NAME", "reco_utils")
|
|||
install_requires = [
|
||||
"bottleneck>=1.2.1,<2",
|
||||
"category_encoders>=1.3.0,<2",
|
||||
"cornac>=1.1.2,<2",
|
||||
"cornac>=1.11.0,<2",
|
||||
"jinja2>=2,<3",
|
||||
"lightfm>=1.15,<2",
|
||||
"lightgbm>=2.2.1,<3",
|
||||
|
|
|
@ -307,6 +307,11 @@ def notebooks():
|
|||
"02_model_collaborative_filtering",
|
||||
"cornac_bpr_deep_dive.ipynb",
|
||||
),
|
||||
"cornac_bivae_deep_dive": os.path.join(
|
||||
folder_notebooks,
|
||||
"02_model_collaborative_filtering",
|
||||
"cornac_bivae_deep_dive.ipynb",
|
||||
),
|
||||
"xlearn_fm_deep_dive": os.path.join(
|
||||
folder_notebooks, "02_model_hybrid", "fm_deep_dive.ipynb"
|
||||
),
|
||||
|
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import papermill as pm
|
||||
import scrapbook as sb
|
||||
except ImportError:
|
||||
pass # disable error while collecting tests for non-notebook environments
|
||||
|
||||
|
||||
|
||||
from reco_utils.common.gpu_utils import get_number_gpus
|
||||
|
||||
|
@ -589,3 +590,30 @@ def test_dkn_quickstart_integration(notebooks, output_notebook, kernel_name):
|
|||
assert results["res"]["mean_mrr"] == pytest.approx(0.1639, rel=TOL, abs=ABS_TOL)
|
||||
assert results["res"]["ndcg@5"] == pytest.approx(0.1735, rel=TOL, abs=ABS_TOL)
|
||||
assert results["res"]["ndcg@10"] == pytest.approx(0.2301, rel=TOL, abs=ABS_TOL)
|
||||
|
||||
|
||||
@pytest.mark.gpu
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.parametrize(
|
||||
"size, expected_values",
|
||||
[
|
||||
("1m", dict(map=0.081794, ndcg=0.400983, precision=0.367997, recall=0.138352)),
|
||||
# 10m works but takes too long
|
||||
],
|
||||
)
|
||||
def test_cornac_bivae_integration(
|
||||
notebooks, output_notebook, kernel_name, size, expected_values
|
||||
):
|
||||
notebook_path = notebooks["cornac_bivae_deep_dive"]
|
||||
pm.execute_notebook(
|
||||
notebook_path,
|
||||
OUTPUT_NOTEBOOK,
|
||||
kernel_name=KERNEL_NAME,
|
||||
parameters=dict(MOVIELENS_DATA_SIZE=size),
|
||||
)
|
||||
results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.dataframe.set_index("name")[
|
||||
"data"
|
||||
]
|
||||
|
||||
for key, value in expected_values.items():
|
||||
assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)
|
||||
|
|
|
@ -234,3 +234,24 @@ def test_lstur_smoke(notebooks, output_notebook, kernel_name):
|
|||
0.5977, rel=TOL, abs=ABS_TOL
|
||||
)
|
||||
assert results["res_syn"]["mean_mrr"] == pytest.approx(0.2618, rel=TOL, abs=ABS_TOL)
|
||||
|
||||
|
||||
@pytest.mark.notebooks
|
||||
@pytest.mark.smoke
|
||||
@pytest.mark.gpu
|
||||
def test_cornac_bivae_smoke(notebooks, output_notebook, kernel_name):
|
||||
notebook_path = notebooks["cornac_bivae_deep_dive"]
|
||||
pm.execute_notebook(
|
||||
notebook_path,
|
||||
OUTPUT_NOTEBOOK,
|
||||
kernel_name=KERNEL_NAME,
|
||||
parameters=dict(MOVIELENS_DATA_SIZE="100k"),
|
||||
)
|
||||
results = sb.read_notebook(OUTPUT_NOTEBOOK).scraps.dataframe.set_index("name")[
|
||||
"data"
|
||||
]
|
||||
|
||||
assert results["map"] == pytest.approx(0.146552, rel=TOL, abs=ABS_TOL)
|
||||
assert results["ndcg"] == pytest.approx(0.474124, rel=TOL, abs=ABS_TOL)
|
||||
assert results["precision"] == pytest.approx(0.412527, rel=TOL, abs=ABS_TOL)
|
||||
assert results["recall"] == pytest.approx(0.225064, rel=TOL, abs=ABS_TOL)
|
||||
|
|
|
@ -56,7 +56,7 @@ CONDA_BASE = {
|
|||
"scikit-surprise": "scikit-surprise>=1.0.6",
|
||||
"swig": "swig==3.0.12",
|
||||
"lightgbm": "lightgbm==2.2.1",
|
||||
"cornac": "cornac>=1.1.2",
|
||||
"cornac": "cornac>=1.11.0",
|
||||
"papermill": "papermill>=2.2.0",
|
||||
"tqdm": "tqdm>=4.31.1",
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче