Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions src/lenskit/sklearn/nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import numpy as np
from pydantic import AliasChoices, BaseModel, Field
from sklearn.decomposition import non_negative_factorization
from sklearn.decomposition import MiniBatchNMF, non_negative_factorization
from typing_extensions import Literal, override

from lenskit.data import Dataset, ItemList, QueryInput, RecQuery
Expand Down Expand Up @@ -44,6 +44,7 @@ class NMFConfig(BaseModel, extra="forbid"):
alpha_W: float = 0.0
alpha_H: float | Literal["same"] = "same"
l1_ratio: float = 0.0
method: Literal["full", "minibatch"] = "full"


class NMFScorer(Component[ItemList], Trainable):
Expand Down Expand Up @@ -73,16 +74,31 @@ def train(self, data: Dataset, options: TrainingOptions = TrainingOptions()):
_log.info("[%s] sparsifying and normalizing matrix", timer)
r_mat = data.interactions().matrix().scipy(layout="csr", legacy=True)

_log.info("[%s] training NMF", timer)
W, H, n_iter = non_negative_factorization(
r_mat,
beta_loss=self.config.beta_loss,
max_iter=self.config.max_iter,
n_components=self.config.n_components,
alpha_W=self.config.alpha_W,
alpha_H=self.config.alpha_H,
l1_ratio=self.config.l1_ratio,
)
_log.info("[%s] training NMF (%s)", timer, self.config.method)

if self.config.method == "full":
W, H, n_iter = non_negative_factorization(
r_mat,
beta_loss=self.config.beta_loss,
max_iter=self.config.max_iter,
n_components=self.config.n_components,
alpha_W=self.config.alpha_W,
alpha_H=self.config.alpha_H,
l1_ratio=self.config.l1_ratio,
)
else: # minibatch
model = MiniBatchNMF(
beta_loss=self.config.beta_loss,
max_iter=self.config.max_iter,
n_components=self.config.n_components,
alpha_W=self.config.alpha_W,
alpha_H=self.config.alpha_H,
l1_ratio=self.config.l1_ratio,
)
W = model.fit_transform(r_mat)
H = model.components_
n_iter = model.n_iter_

_log.info("[%s] Trained NMF in %d iterations", timer, n_iter)

self.user_components = np.require(W, dtype=np.float32)
Expand Down
34 changes: 13 additions & 21 deletions tests/sklearn/test_nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,48 +31,40 @@ class TestNMF(BasicComponentTests, ScorerTests):
expected_ndcg = 0.22


def test_nmf_basic_build():
algo = nmf.NMFScorer(n_components=2)
@mark.parametrize("method", ["full", "minibatch"])
def test_nmf_basic_build(method):
algo = nmf.NMFScorer(n_components=2, method=method)
algo.train(simple_ds)

assert algo.user_components.shape == (3, 2)


def test_nmf_predict_basic():
_log.info("NMF input data:\n%s", simple_df)
algo = nmf.NMFScorer(n_components=2)
@mark.parametrize("method", ["full", "minibatch"])
def test_nmf_predict_basic(method):
algo = nmf.NMFScorer(n_components=2, method=method)
algo.train(simple_ds)
_log.info("user matrix:\n%s", str(algo.user_components))
_log.info("item matrix:\n%s", str(algo.item_components))

preds = algo(10, ItemList([3]))
assert len(preds) == 1
preds = preds.scores("pandas", index="ids")
assert preds is not None
assert preds.index[0] == 3
assert preds.loc[3] >= 0
assert preds.loc[3] <= 5
assert 0 <= preds.loc[3] <= 5


def test_nmf_predict_bad_item():
algo = nmf.NMFScorer(n_components=2)
@mark.parametrize("method", ["full", "minibatch"])
def test_nmf_predict_bad_item(method):
algo = nmf.NMFScorer(n_components=2, method=method)
algo.train(simple_ds)

preds = algo(10, ItemList([4]))
assert len(preds) == 1
preds = preds.scores("pandas", index="ids")
assert preds is not None
assert preds.index[0] == 4
assert np.isnan(preds.loc[4])


def test_nmf_predict_bad_user():
algo = nmf.NMFScorer(n_components=2)
@mark.parametrize("method", ["full", "minibatch"])
def test_nmf_predict_bad_user(method):
algo = nmf.NMFScorer(n_components=2, method=method)
algo.train(simple_ds)

preds = algo(50, ItemList([3]))
assert len(preds) == 1
preds = preds.scores("pandas", index="ids")
assert preds is not None
assert preds.index[0] == 3
assert np.isnan(preds.loc[3])
14 changes: 7 additions & 7 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading