我在尝试使用Django框架实现一个群体推荐系统,并使用LensKit的Python工具(具体来说是使用UserUser算法的Recommender对象)。然而,它只在某些情况下(对于特定用户)返回个性化推荐,但对于用户组总是能返回推荐(我创建了一个混合用户,其评分是组内成员评分的平均值,并为其请求推荐)。下面是我请求个别用户和用户组推荐的实现代码:
from rest_framework import viewsets, statusfrom .models import Movie, Rating, Customer, Recommendationfrom .serializers import MovieSerializer, RatingSerializer, UserSerializer, GroupSerializer, CustomerSerializer, RecommendationSerializerfrom rest_framework.response import Responsefrom rest_framework.decorators import actionfrom django.contrib.auth.models import User, Groupfrom rest_framework.authentication import TokenAuthenticationfrom rest_framework.permissions import IsAuthenticated, AllowAnyfrom pandas import Seriesfrom lenskit.algorithms import Recommenderfrom lenskit.algorithms.user_knn import UserUserimport lenskit.datasets as dsclass CustomerViewSet(viewsets.ModelViewSet): queryset = Customer.objects.all() serializer_class = CustomerSerializer authentication_classes = (TokenAuthentication,) permission_classes = (IsAuthenticated,)@action(methods=['GET'], detail=False) def recommendations(self, request): if 'genre' in request.data: genre = request.data['genre'] else: genre = 'All' user = request.user ratings = Rating.objects.filter(user=user.id) user_dict = {} #用户评分字典 name = user.username #print('name', name) #删除之前请求生成的推荐结果,然后生成新的 Recommendation.objects.filter(name=name).delete() for rating in ratings: stars = rating.stars movieId = int(rating.movie.movieId) user_dict[movieId] = stars #print(user_dict) data = ds.MovieLens('datasets/') user_user = UserUser(15, min_nbrs=3) algo = Recommender.adapt(user_user) algo.fit(data.ratings) #print(algo) #print(data.ratings) """ 基于不在训练集中的用户(因此,他们的id为-1),以及从评分字典中获取的pandas.Series来生成推荐。 """ recs = algo.recommend(user=-1, ratings=Series(user_dict)) #print(recs) #解析结果DataFrame,并将推荐保存为对象 for index, row in recs.iterrows(): #print(row['item']) movieId = row['item'] stars = row['score'] if genre == 'All': Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars) if genre != 'All' and genre in movie.genres: Recommendation.objects.create(name=name, movieId=movieId, pred_stars=stars) #在推荐20个项目时停止 count = Recommendation.objects.filter(name=name).count() #print('count', count) if count >= 20: break #返回按预测评分排序的电影 recs = Recommendation.objects.filter(name=name).order_by('-pred_stars') rec_movies = [] for rec in recs: mov = Movie.objects.get(movieId=rec.movieId) rec_movies.append(mov) serializer = MovieSerializer(rec_movies, many=True) return Response(serializer.data, status=status.HTTP_200_OK)class GroupViewSet(viewsets.ModelViewSet): queryset = Group.objects.all() serializer_class = GroupSerializer authentication_classes = (TokenAuthentication,) permission_classes = (IsAuthenticated, ) @action(methods=['GET'], detail=True) def recommendations(self, request, pk=None): if 'genre' in request.data: genre = request.data['genre'] else: genre = 'All' #检查用户是否属于该组 group = Group.objects.get(id=pk) users = group.user_set.all() #print(users) user = request.user #print(user) if user in users: #删除之前请求生成的推荐结果,然后生成新的 Recommendation.objects.filter(name=group.name).delete() rating_dict = {} #组平均评分字典 for user in users: ratings = Rating.objects.filter(user=user.id) for rating in ratings: stars = rating.stars movieId = int(rating.movie.movieId) """ 如果电影已被其他成员评分(即字典中存在该电影的评分),则计算平均评分 """ if movieId in rating_dict: x = rating_dict[movieId][0] y = rating_dict[movieId][1] x = (x * y + stars) / (y+1) y += 1 rating_dict[movieId][0] = x rating_dict[movieId][1] = y #如果没有,则直接将个人评分插入字典 else: rating_dict[movieId] = [stars, 1] #训练机器学习算法 data = ds.MovieLens('datasets/') user_user = UserUser(15, min_nbrs=3) algo = Recommender.adapt(user_user) algo.fit(data.ratings) for key in rating_dict.keys(): x = rating_dict[key][0] rating_dict[key] = x #print(rating_dict) #请求混合用户的推荐 recs = algo.recommend(user=-1, ratings=Series(rating_dict)) #print(recs) genre = request.data['genre'] name = group.name #解析结果DataFrame,并将推荐保存为对象 for index, row in recs.iterrows(): print(row['item']) movie = Movie.objects.get(movieId=str(int(row['item']))) stars = row['score'] name = group.name if genre == 'All': Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars) if genre != 'All' and genre in movie.genres: Recommendation.objects.create(name=name, movieId=movie.movieId, pred_stars=stars) #在推荐20个项目时停止 count = Recommendation.objects.filter(name=name).count() print('count', count) if count >= 20: break #返回按预测评分排序的组电影 recs = Recommendation.objects.filter(name=name).order_by('-pred_stars') rec_movies = [] for rec in recs: mov = Movie.objects.get(movieId=rec.movieId) rec_movies.append(mov) serializer = MovieSerializer(rec_movies, many=True) return Response(serializer.data, status=status.HTTP_200_OK) else: response = {'message': '您不是该组的成员'} return Response(response, status=status.HTTP_400_BAD_REQUEST)
这里是一个工作响应的示例:
[ { "id": 17521, "movieId": "318", "title": "肖申克的救赎 (1994)", "genres": "犯罪|戏剧", "link": "https://www.imdb.com/title/tt0111161/", "average_rating": 4.487138263665595, "no_ratings": 311, "poster": "/default-movie.jpg" }, { "id": 17503, "movieId": "296", "title": "低俗小说 (1994)", "genres": "喜剧|犯罪|戏剧|惊悚", "link": "https://www.imdb.com/title/tt0110912/", "average_rating": 4.256172839506172, "no_ratings": 324, "poster": "/default-movie.jpg" }, ...]
一个不工作的响应:
[]
在后一种情况下,打印Recommender返回的DataFrame显示如下:
空DataFrame列: [item, score]索引: []
我不确定我做错了什么。谁能帮帮我?
回答:
这个问题最可能的原因是用户-用户推荐系统无法构建足够的可行邻域来提供推荐。这是基于邻域的推荐的一个缺点。
解决方案是切换到一种总是能为有某些评分的用户推荐的算法(例如,矩阵分解算法中的一种),和/或使用一个后备算法,如Popular
,在个性化协同过滤无法推荐时使用。
(另一种解决方案是为LensKit实现各种冷启动推荐器或基于内容的推荐器之一,但目前项目中没有提供这些。)