Merge branch 'GatherBcastAdditionOfN' into 'GatherBcast'

Gather bcast addition of n See merge request 2019marechals/st7-intel!3

Merge branch 'GatherBcastAdditionOfN' into 'GatherBcast'
c94ea4e7 · Paul Saurou · 49bdac35 · ee894144 · c94ea4e7 · c94ea4e7
Commit c94ea4e7 authored Mar 22, 2021 by Paul Saurou
--- a/Appli-iso3dfd/ACO.py
+++ b/Appli-iso3dfd/ACO.py
@@ -6,7 +6,7 @@ import compute_path
 import compute_tau
 import launcher_SUBP
-def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3=256,
+def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1_max=256, n2_max=256, n3_max=256,
 		fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9,
 		nb_threads=8, reps=100):
 	""" Ant colony optimization of the cache blocking parameters for the execution of
@@ -22,9 +22,9 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
 			nb_ants (int): number of ants
 			tau_0 (float): initial quantity of pheromones on each edge
 			n_iter (int): number of cycles done for ACO
-			n1 (int, optional): First dimension of matrix. Defaults to 256.
+			n1_max (int, optional): Maximal first dimension of matrix. Defaults to 256.
-			n2 (int, optional): Second dimension of matrix. Defaults to 256.
+			n2_max (int, optional): Maximal second dimension of matrix. Defaults to 256.
-			n3 (int, optional): Third dimension of matrix. Defaults to 256.
+			n3_max (int, optional): Maximal third dimension of matrix. Defaults to 256.
 			fancy_strategy (string, optional): Strategy used to update tau. Defaults to 'AS'.
 			sub_threshold (float, optional): Inferior threshold for Min-Max strategy. Defaults to 0.1.
 			sup_threshold (float, optional): Superior threshold for Min-Max strategy. Defaults to 10**9.
@@ -36,13 +36,19 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
 	"""
 	# Initialisation of the graph and pheromon matrix
-	n_cbx = n1//16
+	n_cbx = n1_max//16
-	n_cby = n2
+	n_cby = n2_max
-	n_cbz = n3
+	n_cbz = n3_max
-	tau = np.zeros((n_cbx + n_cby + 1, n_cbx + n_cby + n_cbz + 1), dtype="float64")
+	n1_size = int(1 + np.log2(n1_max/256))
-	tau[0, 1:n_cbx+1] = tau_0
+	n2_size = int(1 + np.log2(n2_max/256))
-	tau[1:n_cbx+1, n_cbx+1:n_cbx+n_cby+1] = tau_0
+	n3_size = int(1 + np.log2(n3_max/256))
-	tau[n_cbx+1:n_cbx+n_cby+1, n_cbx+n_cby+1:n_cbx+n_cby+n_cbz+1] = tau_0
+	tau = np.zeros((n1_size + n2_size + n3_size + n_cbx + n_cby + 1, n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1), dtype="float64")
+	tau[0, 1:(n1_size+1)] = tau_0
+	tau[1:(n1_size+1), (n1_size+1):(n1_size + n2_size + 1)] = tau_0
+	tau[(n1_size+1):(n1_size + n2_size + 1), (n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1)] = tau_0
+	tau[(n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1), (n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1)] = tau_0
+	tau[(n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1), (n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1)] = tau_0
+	tau[(n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1), (n1_size + n2_size + n3_size + n_cbx + n_cby + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1)] = tau_0
 	cost_opti = math.inf
 	# n_iter cycles of ants traveling through the graph 
@@ -50,16 +56,22 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
 		paths = []
 		costs = []
 		for k in range(nb_ants//NbP):
-			path = compute_path.compute_path(tau, alpha, n1, n2, n3)
+			path = compute_path.compute_path(tau, alpha, n1_size, n2_size, n3_size)
-			cost = launcher_SUBP.deploySUBP(n1, n2, n3, nb_threads, reps, path[0]*16, path[1], path[2])
+			n1 = 256 * (2**(path[0]-1))
+			n2 = 256 * (2**(path[1]-1))
+			n3 = 256 * (2**(path[2]-1))
+			cbx= path[3]*16
+			cby= path[4]
+			cbz= path[5]
+			cost = launcher_SUBP.deploySUBP(n1, n2, n3, nb_threads, reps, cbx, cby, cbz)
 			cost = [cost]
-			print(f"Path followed at iteration {iter} on process {Me} by ant {k} : {[path[0]*16, path[1], path[2]]} with cost equal to {cost[0]}.")
+			print(f"Path followed at iteration {iter} on process {Me} by ant {k} : {[n1, n2, n3, cbx, cby, cbz]} with cost equal to {cost[0]}.")
 			paths.append(path)
 			costs.append(cost)
 		paths = np.array(paths, dtype="int32")
 		costs = np.array(costs, dtype="float64")
 		if Me == 0:
-			all_paths = np.empty(((nb_ants//NbP)*NbP, 3), dtype="int32")
+			all_paths = np.empty(((nb_ants//NbP)*NbP, 6), dtype="int32")
 			all_costs = np.empty(((nb_ants//NbP)*NbP, 1), dtype="float64")
 		else:
 			all_paths = None
@@ -68,15 +80,20 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
 		comm.Gather(costs, all_costs, root=0)
 		if Me == 0:
-			tau, best_p, best_cost = compute_tau.compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy, sub_threshold, sup_threshold)
+			tau, best_p, best_cost = compute_tau.compute_tau(tau, all_paths, all_costs, Q, rho, n1_max, n2_max, n3_max, fancy_strategy, sub_threshold, sup_threshold)
 			if best_cost < cost_opti:
 				cost_opti = best_cost
 				path_opti = best_p
 		comm.Bcast(tau, root=0)
 	if Me == 0:
-		path_opti[0] *= 16
+		n1 = 256 * (2**(path_opti[0]-1))
-		return path_opti, cost_opti
+		n2 = 256 * (2**(path_opti[1]-1))
+		n3 = 256 * (2**(path_opti[2]-1))
+		cbx= path_opti[3]*16
+		cby= path_opti[4]
+		cbz= path_opti[5]
+		return [n1, n2, n3, cbx, cby, cbz], cost_opti
 	else:
 		return None
@@ -99,7 +116,7 @@ if __name__ == "__main__":
 	# Parameters for compilation and execution of iso3dfd
 	nb_threads = 8
 	reps = 100
-	n1, n2, n3 = 256, 256, 256
+	n1_max, n2_max, n3_max = 256, 256, 256
 	optimization = "-O3"
 	simd = "avx512"
@@ -111,12 +128,12 @@ if __name__ == "__main__":
 	if Me == 0:
 		path_opti, cost_opti = ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter,
-									n1=n1, n2=n2, n3=n3, fancy_strategy=fancy_strategy,
+									n1_max=n1_max, n2_max=n2_max, n3_max=n3_max, fancy_strategy=fancy_strategy,
 									nb_threads=nb_threads, reps=reps)
 		print(f"Le chemin optimal est {path_opti}.")
 		print(f"Le throughput associé est alors {-1*cost_opti} MPoints/s.")
 	else:
 		ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter,
-			n1=n1, n2=n2, n3=n3, fancy_strategy=fancy_strategy,
+			n1_max=n1_max, n2_max=n2_max, n3_max=n3_max, fancy_strategy=fancy_strategy,
 			nb_threads=nb_threads, reps=reps)
--- a/Appli-iso3dfd/compute_path.py
+++ b/Appli-iso3dfd/compute_path.py
 import random as rd
 import numpy as np
-#tau : pheromone matrix, size : 1 + n1//16 + n2 x 1 + n1//16 + n2 + n3
+#tau : pheromone matrix, size : 1 + n1_size + n2_size + n3_size + n1_max//16 + n2_max x 1 + n1_size + n2_size + n3_size + n1//16 + n2 + n3
+def proba(i, alpha, tau, n1_size, n2_size, n3_size, n_cbx, n_cby, n_cbz):
+    n1_max = (2**(n1_size-1)) * 16
+    n2_max = (2**(n2_size-1)) * 256
+    n3_max = (2**(n3_size-1)) * 256
-def proba(i, alpha, tau, n_cbx, n_cby, n_cbz):
    if i == 0 :
        #we are on the initial state
-        #the ant is going to choose cbx
+        #the ant is going to choose n1
-        sequence = np.arange(1, n_cbx +1)
+        sequence = np.arange(1, n1_size +1)
-    if i>0 and i<n_cbx +1 :
+    elif i > 0 and i < n1_size + 1 :
        #we are on the first state
-        #the ant is going to choose cby
+        #the ant is going to choose n2
-        sequence = np.arange(n_cbx + 1, n_cbx + n_cby + 1)
+        sequence = np.arange(n1_size + 1, n1_size + n2_size + 1)
-    if i>n_cbx and i<n_cbx + n_cby + 1 :
+    elif i > n1_size and i < n1_size + n2_size + 1 :
        #we are on the second state
+        #the ant is going to choose n3
+        sequence = np.arange(n1_size + n2_size + 1, n1_size + n2_size + n3_size + 1)
+    elif i > n1_size + n2_size and i < n1_size + n2_size + n3_size + 1 :
+        #we are on the third state
+        #the ant is going to choose cbx
+        sequence = np.arange(n1_size + n2_size + n3_size + 1, n1_size + n2_size + n3_size + n_cbx + 1)
+    elif i > n1_size + n2_size + n3_size and i < n1_size + n2_size + n3_size + n1_max + 1 :
+        #we are on the fourth state
+        #the ant is going to choose cby
+        sequence = np.arange(n1_size + n2_size + n3_size + n1_max + 1, n1_size + n2_size + n3_size + n1_max + n_cby + 1)
+    elif i > n1_size + n2_size + n3_size + n1_max and i < n1_size + n2_size + n3_size + n1_max + n2_max + 1 :
+        #we are on the fifth state
        #the ant is going to choose cbz
-        sequence = np.arange(n_cbx + n_cby + 1,n_cbx + n_cby + n_cbz + 1)
+        sequence = np.arange(n1_size + n2_size + n3_size + n1_max + n2_max + 1, n1_size + n2_size + n3_size + n1_max + n2_max + n_cbz + 1)
-    #we compute the weights and then we normalize it
    weights = np.power(tau[i][sequence[0]:sequence[-1]+1],alpha)
-    norm = np.sum(weights)
+    #Uncomment the two next lines to normalize the weights
-    weights = weights/norm
+    #norm = np.sum(weights)
+    #weights = weights/norm
    return (sequence, weights)
-def compute_path(tau, alpha, n1, n2, n3):
+def compute_path(tau, alpha, n1_size, n2_size, n3_size):
-    n_cbx = n1//16
-    n_cby = n2
-    n_cbz = n3
-    path = []
-    sequence, weights = proba(0, alpha, tau, n_cbx, n_cby, n_cbz)
+    #n_cbx, n_cby, n_cbz will be initialized after the choice of n1, n2, n3
-    new_node = rd.choices(sequence,weights)[0]
+    n_cbx = 0
-    path.append(new_node)
+    n_cby = 0
+    n_cbz = 0
+    path = []
-    sequence, weights = proba(path[0], alpha, tau, n_cbx, n_cby, n_cbz)
+    sequence, weights = proba(0, alpha, tau, n1_size, n2_size, n3_size, n_cbx, n_cby, n_cbz)
    new_node = rd.choices(sequence,weights)[0]
    path.append(new_node)
-    sequence, weights = proba(path[1], alpha, tau, n_cbx, n_cby, n_cbz)
+    for i in range(5):
+        sequence, weights = proba(path[i], alpha, tau, n1_size, n2_size, n3_size, n_cbx, n_cby, n_cbz)
        new_node = rd.choices(sequence,weights)[0]
        path.append(new_node)
+        if i == 1:
+            n1 = 256 * (2**(path[0]-1))
+            n2 = 256 * (2**(path[1] - n1_size - 1))
+            n3 = 256 * (2**(path[2] - n1_size - n2_size - 1))
+            n_cbx, n_cby, n_cbz = n1//16, n2, n3
-    #we transform the path so that it is now in the following form : [n_cbx, n_cby, n_cbz]
+    #we transform the path so that it is now in the following form : [n1, n2, n3, n_cbx, n_cby, n_cbz]   
-    path[1] = path[1] - n_cbx
+    path[1] = path[1] - n1_size
-    path[2] = path[2] - n_cbx - n_cby
+    path[2] = path[2] - n1_size - n2_size
+    path[3] = path[3] - n1_size - n2_size - n3_size
+    path[4] = path[4] - n1_size - n2_size - n3_size - (2**(n1_size-1)) * 16
+    path[5] = path[5] - n1_size - n2_size - n3_size - (2**(n1_size-1)) * 16 - (2**(n2_size-1)) * 256
    return path
+if __name__ == "__main__": 
+    n1_max, n2_max, n3_max = 1024, 1024, 1024
+    n_cbx = n1_max//16
+    n_cby = n2_max
+    n_cbz = n3_max
+    n1_size = int(1 + np.log2(n1_max/256))
+    n2_size = int(1 + np.log2(n2_max/256))
+    n3_size = int(1 + np.log2(n3_max/256))
+    tau_0 = 10
+    tau = np.zeros((n1_size + n2_size + n3_size + n_cbx + n_cby + 1, n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1), dtype="float64")
+    tau[0, 1:(n1_size+1)] = tau_0
+    tau[1:(n1_size+1), (n1_size+1):(n1_size + n2_size + 1)] = tau_0
+    tau[(n1_size+1):(n1_size + n2_size + 1), (n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1)] = tau_0
+    tau[(n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1), (n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1)] = tau_0
+    tau[(n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1), (n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1)] = tau_0
+    tau[(n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1), (n1_size + n2_size + n3_size + n_cbx + n_cby + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1)] = tau_0
+    path = compute_path(tau, 1, n1_size, n2_size, n3_size)
+    print(path)
\ No newline at end of file
--- a/Appli-iso3dfd/compute_tau.py
+++ b/Appli-iso3dfd/compute_tau.py
 import math
+import numpy as np
-def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9):
+def compute_tau(tau, all_paths, all_costs, Q, rho, n1_max, n2_max, n3_max, fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9):
  """Computing of the pheromon matrix and the cost of each path.
  Args:
@@ -19,6 +20,12 @@ def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='A
      (np.array, list, float): the updated pheromon matrix, the best path and the associated cost
  """
  #evaporation of pheromons
+  n_cbx = n1_max//16
+  n_cby = n2_max
+  n_cbz = n3_max
+  n1_size = int(1 + np.log2(n1_max/256))
+  n2_size = int(1 + np.log2(n2_max/256))
+  n3_size = int(1 + np.log2(n3_max/256))
  tau = tau * (1-rho)
  best_cost = math.inf
@@ -33,15 +40,21 @@ def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='A
      #Classic Ant System : each path is rewarded according to its length
      #Elitist Ant System : each path is rewarded according to its length
      tau[0,p[0]] += Q*(-1)*cost
-      tau[p[0],n1//16+p[1]] += Q*(-1)*cost
+      tau[p[0], p[1] + n1_size] += Q*(-1)*cost
-      tau[n1//16+p[1],n1//16+n2+p[2]] += Q*(-1)*cost
+      tau[p[1] + n1_size, p[2] + n1_size + n2_size] += Q*(-1)*cost
+      tau[p[2] + n1_size + n2_size, p[3] + n1_size + n2_size + n3_size] += Q*(-1)*cost
+      tau[p[3] + n1_size + n2_size + n3_size, p[4] + n1_size + n2_size + n3_size + n_cbx] += Q*(-1)*cost
+      tau[p[4] + n1_size + n2_size + n3_size + n_cbx, p[5] + n1_size + n2_size + n3_size + n_cbx + n_cby] += Q*(-1)*cost
  if fancy_strategy == 'ElitistAS' or fancy_strategy == "MMAS":
    #Elitist Ant System : The best ant is rewarded a second time
    #Max-Min Ant System : Only the winner ant is rewarded but the pheromon are limited within a minimal and a maximal threshold
    tau[0,best_p[0]] += Q*(-1)*best_cost
-    tau[best_p[0],n1//16+best_p[1]] += Q*(-1)*best_cost
+    tau[best_p[0], best_p[1] + n1_size] += Q*(-1)*best_cost
-    tau[n1//16+best_p[1],n1//16+n2+best_p[2]] += Q*(-1)*best_cost
+    tau[best_p[1] + n1_size, best_p[2] + n1_size + n2_size] += Q*(-1)*best_cost
+    tau[best_p[2] + n1_size + n2_size, best_p[3] + n1_size + n2_size + n3_size] += Q*(-1)*best_cost
+    tau[best_p[3] + n1_size + n2_size + n3_size, best_p[4] + n1_size + n2_size + n3_size + n_cbx] += Q*(-1)*best_cost
+    tau[best_p[4] + n1_size + n2_size + n3_size + n_cbx, best_p[5] + n1_size + n2_size + n3_size + n_cbx + n_cby] += Q*(-1)*best_cost
  if fancy_strategy == "MMAS":
    #verification of the threshold constraint