使用矩阵分解,根据用户给短视频的评分数据,做-一个千 人千面的个性化推荐系统
- 需要安装推荐系统库surprise,使用如下命令安装: pip install scikit-surprise
import numpy as np
import surprise
class MatrixFactorization(surprise.AlgoBase):
def __init__(self,learning_rate,n_epochs,n_factors,lmd):
self.lr = learning_rate
self.n_epochs = n_epochs
self.n_factors = n_factors
self.lmd = lmd
def fit(self,trainset):
print('fitting data with SGD...')
u = np.random.normal(0,.1,(trainset.n_users,self.n_factors))
p = np.random.normal(0,.1,(trainset.n_items,self.n_factors))
for _ in range(self.n_epochs):
for i,j,r_ij in trainset.all_ratings():
err = r_ij-np.dot(u[i],p[j])
u[i]-= -self.lr*err*p[j]+self.lr*self.lmd*u[i]
p[j]-= -self.lr*err*u[i]+self.lr*self.lmd*p[j]
self.u,self.p = u,p
self.trainset = trainset
def estimate(self, i, j):
if self.trainset.knows_user(i) and self.trainset.knows_item(j):
return np.dot(self.u[i],self.p[j])
else:
return self.trainset.global_mean
from surprise import BaselineOnly
from surprise import Dataset
from surprise import Reader
from surprise import accuracy
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
import os
file_path = os.path.expanduser('./data/u.data')
reader = Reader(line_format='user item rating timestamp', sep='\t', rating_scale=(1, 5))
data = Dataset.load_from_file(file_path, reader=reader)
trainset,testset=train_test_split(data,test_size=0.2)
algo = MatrixFactorization(learning_rate=.005,n_epochs=60,n_factors=2,lmd=0.2)
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.mae(predictions)
对比试验
algo = surprise.KNNBasic()
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.mae(predictions)
algo = surprise.SVD()
algo.fit(trainset)
predictions = algo.test(testset)
accuracy.mae(predictions)
|