From 539ae24b0ba66725e930b0dd72ea120b2f838872 Mon Sep 17 00:00:00 2001 From: Tom Bray <tom.bray@student-cs.fr> Date: Thu, 9 Jun 2022 00:38:13 +0200 Subject: [PATCH] primary recommendation system done --- algo/recommendation.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/algo/recommendation.py b/algo/recommendation.py index 8dfd595..735e40f 100644 --- a/algo/recommendation.py +++ b/algo/recommendation.py @@ -1,10 +1,10 @@ from pymongo import MongoClient import pandas as pd -#import ast +import ast from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity -#import seaborn as sns +import seaborn as sns import numpy as np import matplotlib.pyplot as plt @@ -16,9 +16,8 @@ def dbToDf(): client = MongoClient("mongodb://group3:GJF6cQqM4RLxBfNb@cs2022.lmichelin.fr:27017/group3?ssl=true") db = client.group3 collection = db.movies_populated - cursor = collection.find() + cursor = collection.find({},{"_id":1, "original_title": 1, "genre": 1, "id":1, "overview":1, "popularity":1, "vote_count":1, "release_date":1, "cast": {"name":1, "order":1}}) df=pd.DataFrame(list(cursor)) - return df def preFiltering(df,percent=15): @@ -49,7 +48,7 @@ def similarity(df): ''' tf_idf = TfidfVectorizer(stop_words='english') tf_idf_matrix = tf_idf.fit_transform(df['overview']); - + print(tf_idf_matrix) # calculating cosine similarity between movies cosine_similarity_matrix = cosine_similarity(tf_idf_matrix, tf_idf_matrix) @@ -83,7 +82,6 @@ def recommendations_on_overview( original_title, df, number_of_recommendations): #calculates similarity scores of all movies calculated_sim = similarity(df) - similarity_scores = list(enumerate(calculated_sim[index])) similarity_scores_sorted = sorted(similarity_scores, key=lambda x: x[1], reverse=True) @@ -94,4 +92,4 @@ def recommendations_on_overview( original_title, df, number_of_recommendations): df = dbToDf() -print(recommendations_on_overview( 'Batman', df, 9)) \ No newline at end of file +print(recommendations_on_overview('Avatar', df, 9)) -- GitLab