基于协同过滤和内容过滤的steam游戏推荐模型
立即下载
资源介绍:
通过Steam游戏推荐系统,用户可以快速找到符合自己口味的游戏,节省用户的搜索和筛选时间,提高用户体验。通过精准的推荐提高用户对Steam平台的满意度,增加用户粘性。通过对用户推荐其可能感兴趣的游戏,提高高评分游戏的曝光度和销售量,为游戏开发者和发行商带来更多的商业机会。
import pandas as pd
import numpy as np
import pyhdfs
import translators as ts
import os
import joblib
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import coo_matrix
from utils.uploads import *
# 构建游戏推荐模型
class GameRecommendation:
def __init__(self,recommendations_data,games_data):
self.games_data=games_data[['app_id','title']].copy()
self.knn_model=NearestNeighbors(metric='euclidean', algorithm='brute')
self.user_ids=recommendations_data['user_id'].astype('category').cat.codes.astype('int32')
self.item_ids=recommendations_data['app_id'].astype('category').cat.codes.astype('int32')
self.unique_user_ids=recommendations_data['user_id'].astype('category').cat.categories.astype('int32')
self.unique_item_ids=recommendations_data['app_id'].astype('category').cat.categories.astype('int32')
self.user_game_matrix=coo_matrix((recommendations_data['score'], (self.user_ids, self.item_ids))).astype('float32')
self.tf_tags=TfidfVectorizer(analyzer='word',ngram_range=(1, 1),min_df=1, stop_words='english')
self.tf_description=TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=1, stop_words='english')
self.tags_matrix=self.tf_tags.fit_transform(games_data['tags']).astype('float32')
self.description_matrix=self.tf_description.fit_transform(games_data['description']).astype('float32')
self.cosine_tags=None
self.cosine_description=None
def train(self):
self.knn_model.fit(self.user_game_matrix)
def train_tags(self):
self.cosine_tags=cosine_similarity(self.tags_matrix,self.tags_matrix)
def train_description(self):
self.cosine_description=cosine_similarity(self.description_matrix,self.description_matrix)
# 根据游戏名称获取app_id再获取该游戏在user_game_matrix中的索引
def get_app_index(self,game):
app_id = self.games_data[self.games_data['title'] == game]['app_id'].values[0]
app_index = self.unique_item_ids.get_indexer([app_id])[0]
return app_index
# 获取相似用户
def get_similar_users(self,games,scores,n_neighbors=6):
app_index=[self.get_app_index(game) for game in games]
user = np.zeros(self.user_game_matrix.shape[1])
for i in range(len(app_index)):
user[app_index[i]] = scores[i]
user = user.reshape(1, -1)
_, indices = self.knn_model.kneighbors(user, n_neighbors=n_neighbors)
similar_users = [self.unique_user_ids[i] for i in indices.flatten()[1:]]
return similar_users
# 获取相似用户的游戏
def get_users_games(self,similar_users):
users_games = set()
for user in similar_users:
user_index = self.unique_user_ids.get_indexer([user])[0]
user_games = self.user_game_matrix.getrow(user_index).nonzero()[1]
for game_id in user_games:
users_games.add(self.games_data[self.games_data['app_id'] == self.unique_item_ids[game_id]]['title'].values[0])
return users_games
# 获取相似游戏
def get_similar_games(self,games):
games_index=[]
for game in games:
games_index.append(self.games_data[self.games_data['title'] == game].index[0])
recommended_games=pd.DataFrame()
for game_index in games_index:
similar_tags = self.cosine_tags[game_index].argsort()[:-10:-1]
similar_descrip = self.cosine_description[game_index].argsort()[:-2:-1]
similar_games = [self.games_data['title'].iloc[i] for i in similar_tags]
similar_games.extend([self.games_data['title'].iloc[i] for i in similar_descrip])
recommended_games['title']=similar_games
recommended_games['score']=np.arange(len(similar_tags),-1,-1)
# 去除重复的游戏
recommended_games.drop_duplicates('title',keep='first',inplace=True)
# 根据score排序,保留前5个
recommended_games=recommended_games.sort_values('score',ascending=False).head(5)
return set(recommended_games['title'].values)
# 推荐游戏
def recommend(self,games,scores):
similar_users=self.get_similar_users(games,scores)
users_games=self.get_users_games(similar_users)
recommend_games=self.get_similar_games(users_games)
return recommend_games
# 推荐游戏的中文翻译
def recommend_CN(self,games,scores):
similar_users=self.get_similar_users(games,scores)
users_games=self.get_users_games(similar_users)
recommend_games=self.get_similar_games(users_games)
recommend_games_CN=set()
for game in recommend_games:
recommend_games_CN.add(ts.translate_text(game, to_language='zh-CN'))
return recommend_games_CN
fs=pyhdfs.HdfsClient(hosts='192.168.88.132:9870',user_name='zjy')
# 加载数据
recommendations_data=get_recommendations_data(fs,'/input/recommendations/')
games_data=pd.read_csv(fs.open('/uploads/games_metadata.csv'))
recommendations_data.head()
games_data.head()
model=GameRecommendation(recommendations_data,games_data)
model.train()
model.train_tags()
model.train_description()
games=['Prince of Persia: Warrior Within™']
scores=[100]
recommend_games=model.recommend(games,scores)
recommend_games
games=['Dungeons & Dragons: Dark Alliance','Dungeon of the ENDLESS™','Labyrinth of Refrain: Coven of Dusk']
scores=[100,20,50]
recommend_games=model.recommend(games,scores)
recommend_games
recommend_games_CN=model.recommend_CN(games,scores)
recommend_games_CN
# 保存模型,如果文件夹不存在则创建
if not os.path.exists('output/model/'):
os.makedirs('output/model/')
joblib.dump(model, 'output/model/model.pkl')