diff --git a/Jaccard_Similarities.py b/Jaccard_Similarities.py new file mode 100644 index 0000000000000000000000000000000000000000..e5a64a5eab11e7470a614cab3e4b11edb37964d5 --- /dev/null +++ b/Jaccard_Similarities.py @@ -0,0 +1,42 @@ +import numpy as np +import os +import math + +os.chdir('C:\\users\\Michel\\Documents\\apizee07\\Glove') + +f=open('vocab.txt',encoding="utf8") +mots = [x.rstrip().split(' ')[0] for x in f.readlines()][:100] +f.close() + +## Jaccard Similarities + +def decoupage (mot): + liste=list(mot) + dec=[] + for i in range(len(liste)): + dec.append(liste[i]) + for i in range(len(liste)-1): + dec.append(liste[i]+liste[i+1]) + for i in range(len(liste)-2): + dec.append(liste[i]+liste[i+1]+liste[i+2]) + return dec + +def intersection (l1,l2): + inter=[] + for element in l1: + if element in l2: + inter.append(element) + return(inter) + +def union (l1,l2): + uni=l2 + for element in l1: + if not(element in l2): + uni.append(element) + return(uni) + +def Js (mot1,mot2): + d1=decoupage(mot1) + d2=decoupage(mot2) + return (len(intersection(d1,d2))/len(union(d1,d2))) + \ No newline at end of file