diff --git a/algo/adreco.py b/algo/adreco.py index fc477936033d7f50597202ea5407156489b7cb05..142e3b22cac41a03f8a10fc9c89eaf9d100d608c 100644 --- a/algo/adreco.py +++ b/algo/adreco.py @@ -36,6 +36,29 @@ def movieDbToDf(): return df +def userDbToDf(): + ''' + This function convert a movie DataBase from mongoDB into a pandas DataFrame + ''' + #load DB + client = MongoClient("mongodb://group3:GJF6cQqM4RLxBfNb@cs2022.lmichelin.fr:27017/group3?ssl=true") + db = client.group3 + collection = db.users + + #projection on useful data + cursor = collection.find({},{"_id":1, "liked_movies": 1, "update":1}) + df=pd.DataFrame(list(cursor)) + + return df + +def loadRecDB(): + + #load DB + client = MongoClient("mongodb://group3:GJF6cQqM4RLxBfNb@cs2022.lmichelin.fr:27017/group3?ssl=true") + db = client.group3 + collection = db['recommendations'] + return collection + def preFiltering(df,percent=90): ''' This function removes movies who do not have enough votes to be evaluated @@ -103,7 +126,6 @@ def index_from_id(df,id): ''' return df[df['_id']==id].index.values[0] - def recommendations(original_title, df, number_of_recommendations): #prefilter the dataframe @@ -130,6 +152,10 @@ def recommendations(original_title, df, number_of_recommendations): return df['original_title'].iloc[recommendations_indices] def formatingFeatures(df_row): + """ + This function creates a new column "features" in the df + used to calculate similarities between users_profiles et movies + """ g = [] genres = [] k=[] @@ -150,21 +176,6 @@ def formatingFeatures(df_row): return ' '.join([genres]*w_genres)+' '+' '.join([keywords]*w_keywords)+' '+' '.join([str(df_row['main_actor'])]*w_actor)+' '+' '.join([str(df_row['director'])]*w_director)+' '+' '.join([str(df_row['release_date'])]*w_release_date) -def userDbToDf(): - ''' - This function convert a movie DataBase from mongoDB into a pandas DataFrame - ''' - #load DB - client = MongoClient("mongodb://group3:GJF6cQqM4RLxBfNb@cs2022.lmichelin.fr:27017/group3?ssl=true") - db = client.group3 - collection = db.users - - #projection on useful data - cursor = collection.find({},{"_id":1, "liked_movies": 1, "update":1}) - df=pd.DataFrame(list(cursor)) - - return df - def user_profile( user_index, moviesdf, usersdf, vectMatrix ): """ This function creates a user profile based on the likef movies of the user @@ -199,16 +210,12 @@ def user_profile( user_index, moviesdf, usersdf, vectMatrix ): else: return [i for i in range(100)] - -def loadRecDB(): - - #load DB - client = MongoClient("mongodb://group3:GJF6cQqM4RLxBfNb@cs2022.lmichelin.fr:27017/group3?ssl=true") - db = client.group3 - collection = db['recommendations'] - return collection def updateDB(): + """ + This function update the recommandation DB based on the likes of thes users + """ + #loadDB moviesdf = movieDbToDf()