Skip to content
Snippets Groups Projects
Commit c94ea4e7 authored by Paul Saurou's avatar Paul Saurou
Browse files

Merge branch 'GatherBcastAdditionOfN' into 'GatherBcast'

Gather bcast addition of n

See merge request 2019marechals/st7-intel!3
parents 49bdac35 ee894144
No related branches found
No related tags found
No related merge requests found
...@@ -6,7 +6,7 @@ import compute_path ...@@ -6,7 +6,7 @@ import compute_path
import compute_tau import compute_tau
import launcher_SUBP import launcher_SUBP
def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3=256, def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1_max=256, n2_max=256, n3_max=256,
fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9, fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9,
nb_threads=8, reps=100): nb_threads=8, reps=100):
""" Ant colony optimization of the cache blocking parameters for the execution of """ Ant colony optimization of the cache blocking parameters for the execution of
...@@ -22,9 +22,9 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3 ...@@ -22,9 +22,9 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
nb_ants (int): number of ants nb_ants (int): number of ants
tau_0 (float): initial quantity of pheromones on each edge tau_0 (float): initial quantity of pheromones on each edge
n_iter (int): number of cycles done for ACO n_iter (int): number of cycles done for ACO
n1 (int, optional): First dimension of matrix. Defaults to 256. n1_max (int, optional): Maximal first dimension of matrix. Defaults to 256.
n2 (int, optional): Second dimension of matrix. Defaults to 256. n2_max (int, optional): Maximal second dimension of matrix. Defaults to 256.
n3 (int, optional): Third dimension of matrix. Defaults to 256. n3_max (int, optional): Maximal third dimension of matrix. Defaults to 256.
fancy_strategy (string, optional): Strategy used to update tau. Defaults to 'AS'. fancy_strategy (string, optional): Strategy used to update tau. Defaults to 'AS'.
sub_threshold (float, optional): Inferior threshold for Min-Max strategy. Defaults to 0.1. sub_threshold (float, optional): Inferior threshold for Min-Max strategy. Defaults to 0.1.
sup_threshold (float, optional): Superior threshold for Min-Max strategy. Defaults to 10**9. sup_threshold (float, optional): Superior threshold for Min-Max strategy. Defaults to 10**9.
...@@ -36,13 +36,19 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3 ...@@ -36,13 +36,19 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
""" """
# Initialisation of the graph and pheromon matrix # Initialisation of the graph and pheromon matrix
n_cbx = n1//16 n_cbx = n1_max//16
n_cby = n2 n_cby = n2_max
n_cbz = n3 n_cbz = n3_max
tau = np.zeros((n_cbx + n_cby + 1, n_cbx + n_cby + n_cbz + 1), dtype="float64") n1_size = int(1 + np.log2(n1_max/256))
tau[0, 1:n_cbx+1] = tau_0 n2_size = int(1 + np.log2(n2_max/256))
tau[1:n_cbx+1, n_cbx+1:n_cbx+n_cby+1] = tau_0 n3_size = int(1 + np.log2(n3_max/256))
tau[n_cbx+1:n_cbx+n_cby+1, n_cbx+n_cby+1:n_cbx+n_cby+n_cbz+1] = tau_0 tau = np.zeros((n1_size + n2_size + n3_size + n_cbx + n_cby + 1, n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1), dtype="float64")
tau[0, 1:(n1_size+1)] = tau_0
tau[1:(n1_size+1), (n1_size+1):(n1_size + n2_size + 1)] = tau_0
tau[(n1_size+1):(n1_size + n2_size + 1), (n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1)] = tau_0
tau[(n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1), (n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1)] = tau_0
tau[(n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1), (n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1)] = tau_0
tau[(n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1), (n1_size + n2_size + n3_size + n_cbx + n_cby + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1)] = tau_0
cost_opti = math.inf cost_opti = math.inf
# n_iter cycles of ants traveling through the graph # n_iter cycles of ants traveling through the graph
...@@ -50,16 +56,22 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3 ...@@ -50,16 +56,22 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
paths = [] paths = []
costs = [] costs = []
for k in range(nb_ants//NbP): for k in range(nb_ants//NbP):
path = compute_path.compute_path(tau, alpha, n1, n2, n3) path = compute_path.compute_path(tau, alpha, n1_size, n2_size, n3_size)
cost = launcher_SUBP.deploySUBP(n1, n2, n3, nb_threads, reps, path[0]*16, path[1], path[2]) n1 = 256 * (2**(path[0]-1))
n2 = 256 * (2**(path[1]-1))
n3 = 256 * (2**(path[2]-1))
cbx= path[3]*16
cby= path[4]
cbz= path[5]
cost = launcher_SUBP.deploySUBP(n1, n2, n3, nb_threads, reps, cbx, cby, cbz)
cost = [cost] cost = [cost]
print(f"Path followed at iteration {iter} on process {Me} by ant {k} : {[path[0]*16, path[1], path[2]]} with cost equal to {cost[0]}.") print(f"Path followed at iteration {iter} on process {Me} by ant {k} : {[n1, n2, n3, cbx, cby, cbz]} with cost equal to {cost[0]}.")
paths.append(path) paths.append(path)
costs.append(cost) costs.append(cost)
paths = np.array(paths, dtype="int32") paths = np.array(paths, dtype="int32")
costs = np.array(costs, dtype="float64") costs = np.array(costs, dtype="float64")
if Me == 0: if Me == 0:
all_paths = np.empty(((nb_ants//NbP)*NbP, 3), dtype="int32") all_paths = np.empty(((nb_ants//NbP)*NbP, 6), dtype="int32")
all_costs = np.empty(((nb_ants//NbP)*NbP, 1), dtype="float64") all_costs = np.empty(((nb_ants//NbP)*NbP, 1), dtype="float64")
else: else:
all_paths = None all_paths = None
...@@ -68,15 +80,20 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3 ...@@ -68,15 +80,20 @@ def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1=256, n2=256, n3
comm.Gather(costs, all_costs, root=0) comm.Gather(costs, all_costs, root=0)
if Me == 0: if Me == 0:
tau, best_p, best_cost = compute_tau.compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy, sub_threshold, sup_threshold) tau, best_p, best_cost = compute_tau.compute_tau(tau, all_paths, all_costs, Q, rho, n1_max, n2_max, n3_max, fancy_strategy, sub_threshold, sup_threshold)
if best_cost < cost_opti: if best_cost < cost_opti:
cost_opti = best_cost cost_opti = best_cost
path_opti = best_p path_opti = best_p
comm.Bcast(tau, root=0) comm.Bcast(tau, root=0)
if Me == 0: if Me == 0:
path_opti[0] *= 16 n1 = 256 * (2**(path_opti[0]-1))
return path_opti, cost_opti n2 = 256 * (2**(path_opti[1]-1))
n3 = 256 * (2**(path_opti[2]-1))
cbx= path_opti[3]*16
cby= path_opti[4]
cbz= path_opti[5]
return [n1, n2, n3, cbx, cby, cbz], cost_opti
else: else:
return None return None
...@@ -99,7 +116,7 @@ if __name__ == "__main__": ...@@ -99,7 +116,7 @@ if __name__ == "__main__":
# Parameters for compilation and execution of iso3dfd # Parameters for compilation and execution of iso3dfd
nb_threads = 8 nb_threads = 8
reps = 100 reps = 100
n1, n2, n3 = 256, 256, 256 n1_max, n2_max, n3_max = 256, 256, 256
optimization = "-O3" optimization = "-O3"
simd = "avx512" simd = "avx512"
...@@ -111,12 +128,12 @@ if __name__ == "__main__": ...@@ -111,12 +128,12 @@ if __name__ == "__main__":
if Me == 0: if Me == 0:
path_opti, cost_opti = ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, path_opti, cost_opti = ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter,
n1=n1, n2=n2, n3=n3, fancy_strategy=fancy_strategy, n1_max=n1_max, n2_max=n2_max, n3_max=n3_max, fancy_strategy=fancy_strategy,
nb_threads=nb_threads, reps=reps) nb_threads=nb_threads, reps=reps)
print(f"Le chemin optimal est {path_opti}.") print(f"Le chemin optimal est {path_opti}.")
print(f"Le throughput associé est alors {-1*cost_opti} MPoints/s.") print(f"Le throughput associé est alors {-1*cost_opti} MPoints/s.")
else: else:
ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter,
n1=n1, n2=n2, n3=n3, fancy_strategy=fancy_strategy, n1_max=n1_max, n2_max=n2_max, n3_max=n3_max, fancy_strategy=fancy_strategy,
nb_threads=nb_threads, reps=reps) nb_threads=nb_threads, reps=reps)
import random as rd import random as rd
import numpy as np import numpy as np
#tau : pheromone matrix, size : 1 + n1//16 + n2 x 1 + n1//16 + n2 + n3 #tau : pheromone matrix, size : 1 + n1_size + n2_size + n3_size + n1_max//16 + n2_max x 1 + n1_size + n2_size + n3_size + n1//16 + n2 + n3
def proba(i, alpha, tau, n1_size, n2_size, n3_size, n_cbx, n_cby, n_cbz):
n1_max = (2**(n1_size-1)) * 16
n2_max = (2**(n2_size-1)) * 256
n3_max = (2**(n3_size-1)) * 256
def proba(i, alpha, tau, n_cbx, n_cby, n_cbz):
if i == 0 : if i == 0 :
#we are on the initial state #we are on the initial state
#the ant is going to choose cbx #the ant is going to choose n1
sequence = np.arange(1, n_cbx +1) sequence = np.arange(1, n1_size +1)
if i>0 and i<n_cbx +1 : elif i > 0 and i < n1_size + 1 :
#we are on the first state #we are on the first state
#the ant is going to choose cby #the ant is going to choose n2
sequence = np.arange(n_cbx + 1, n_cbx + n_cby + 1) sequence = np.arange(n1_size + 1, n1_size + n2_size + 1)
if i>n_cbx and i<n_cbx + n_cby + 1 : elif i > n1_size and i < n1_size + n2_size + 1 :
#we are on the second state #we are on the second state
#the ant is going to choose n3
sequence = np.arange(n1_size + n2_size + 1, n1_size + n2_size + n3_size + 1)
elif i > n1_size + n2_size and i < n1_size + n2_size + n3_size + 1 :
#we are on the third state
#the ant is going to choose cbx
sequence = np.arange(n1_size + n2_size + n3_size + 1, n1_size + n2_size + n3_size + n_cbx + 1)
elif i > n1_size + n2_size + n3_size and i < n1_size + n2_size + n3_size + n1_max + 1 :
#we are on the fourth state
#the ant is going to choose cby
sequence = np.arange(n1_size + n2_size + n3_size + n1_max + 1, n1_size + n2_size + n3_size + n1_max + n_cby + 1)
elif i > n1_size + n2_size + n3_size + n1_max and i < n1_size + n2_size + n3_size + n1_max + n2_max + 1 :
#we are on the fifth state
#the ant is going to choose cbz #the ant is going to choose cbz
sequence = np.arange(n_cbx + n_cby + 1,n_cbx + n_cby + n_cbz + 1) sequence = np.arange(n1_size + n2_size + n3_size + n1_max + n2_max + 1, n1_size + n2_size + n3_size + n1_max + n2_max + n_cbz + 1)
#we compute the weights and then we normalize it
weights = np.power(tau[i][sequence[0]:sequence[-1]+1],alpha) weights = np.power(tau[i][sequence[0]:sequence[-1]+1],alpha)
norm = np.sum(weights) #Uncomment the two next lines to normalize the weights
weights = weights/norm #norm = np.sum(weights)
#weights = weights/norm
return (sequence, weights) return (sequence, weights)
def compute_path(tau, alpha, n1, n2, n3): def compute_path(tau, alpha, n1_size, n2_size, n3_size):
n_cbx = n1//16
n_cby = n2
n_cbz = n3
path = []
sequence, weights = proba(0, alpha, tau, n_cbx, n_cby, n_cbz) #n_cbx, n_cby, n_cbz will be initialized after the choice of n1, n2, n3
new_node = rd.choices(sequence,weights)[0] n_cbx = 0
path.append(new_node) n_cby = 0
n_cbz = 0
path = []
sequence, weights = proba(path[0], alpha, tau, n_cbx, n_cby, n_cbz) sequence, weights = proba(0, alpha, tau, n1_size, n2_size, n3_size, n_cbx, n_cby, n_cbz)
new_node = rd.choices(sequence,weights)[0] new_node = rd.choices(sequence,weights)[0]
path.append(new_node) path.append(new_node)
sequence, weights = proba(path[1], alpha, tau, n_cbx, n_cby, n_cbz) for i in range(5):
sequence, weights = proba(path[i], alpha, tau, n1_size, n2_size, n3_size, n_cbx, n_cby, n_cbz)
new_node = rd.choices(sequence,weights)[0] new_node = rd.choices(sequence,weights)[0]
path.append(new_node) path.append(new_node)
if i == 1:
n1 = 256 * (2**(path[0]-1))
n2 = 256 * (2**(path[1] - n1_size - 1))
n3 = 256 * (2**(path[2] - n1_size - n2_size - 1))
n_cbx, n_cby, n_cbz = n1//16, n2, n3
#we transform the path so that it is now in the following form : [n_cbx, n_cby, n_cbz] #we transform the path so that it is now in the following form : [n1, n2, n3, n_cbx, n_cby, n_cbz]
path[1] = path[1] - n_cbx path[1] = path[1] - n1_size
path[2] = path[2] - n_cbx - n_cby path[2] = path[2] - n1_size - n2_size
path[3] = path[3] - n1_size - n2_size - n3_size
path[4] = path[4] - n1_size - n2_size - n3_size - (2**(n1_size-1)) * 16
path[5] = path[5] - n1_size - n2_size - n3_size - (2**(n1_size-1)) * 16 - (2**(n2_size-1)) * 256
return path return path
if __name__ == "__main__":
n1_max, n2_max, n3_max = 1024, 1024, 1024
n_cbx = n1_max//16
n_cby = n2_max
n_cbz = n3_max
n1_size = int(1 + np.log2(n1_max/256))
n2_size = int(1 + np.log2(n2_max/256))
n3_size = int(1 + np.log2(n3_max/256))
tau_0 = 10
tau = np.zeros((n1_size + n2_size + n3_size + n_cbx + n_cby + 1, n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1), dtype="float64")
tau[0, 1:(n1_size+1)] = tau_0
tau[1:(n1_size+1), (n1_size+1):(n1_size + n2_size + 1)] = tau_0
tau[(n1_size+1):(n1_size + n2_size + 1), (n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1)] = tau_0
tau[(n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1), (n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1)] = tau_0
tau[(n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1), (n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1)] = tau_0
tau[(n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1), (n1_size + n2_size + n3_size + n_cbx + n_cby + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1)] = tau_0
path = compute_path(tau, 1, n1_size, n2_size, n3_size)
print(path)
\ No newline at end of file
import math import math
import numpy as np
def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9): def compute_tau(tau, all_paths, all_costs, Q, rho, n1_max, n2_max, n3_max, fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9):
"""Computing of the pheromon matrix and the cost of each path. """Computing of the pheromon matrix and the cost of each path.
Args: Args:
...@@ -19,6 +20,12 @@ def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='A ...@@ -19,6 +20,12 @@ def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='A
(np.array, list, float): the updated pheromon matrix, the best path and the associated cost (np.array, list, float): the updated pheromon matrix, the best path and the associated cost
""" """
#evaporation of pheromons #evaporation of pheromons
n_cbx = n1_max//16
n_cby = n2_max
n_cbz = n3_max
n1_size = int(1 + np.log2(n1_max/256))
n2_size = int(1 + np.log2(n2_max/256))
n3_size = int(1 + np.log2(n3_max/256))
tau = tau * (1-rho) tau = tau * (1-rho)
best_cost = math.inf best_cost = math.inf
...@@ -33,15 +40,21 @@ def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='A ...@@ -33,15 +40,21 @@ def compute_tau(tau, all_paths, all_costs, Q, rho, n1, n2, n3, fancy_strategy='A
#Classic Ant System : each path is rewarded according to its length #Classic Ant System : each path is rewarded according to its length
#Elitist Ant System : each path is rewarded according to its length #Elitist Ant System : each path is rewarded according to its length
tau[0,p[0]] += Q*(-1)*cost tau[0,p[0]] += Q*(-1)*cost
tau[p[0],n1//16+p[1]] += Q*(-1)*cost tau[p[0], p[1] + n1_size] += Q*(-1)*cost
tau[n1//16+p[1],n1//16+n2+p[2]] += Q*(-1)*cost tau[p[1] + n1_size, p[2] + n1_size + n2_size] += Q*(-1)*cost
tau[p[2] + n1_size + n2_size, p[3] + n1_size + n2_size + n3_size] += Q*(-1)*cost
tau[p[3] + n1_size + n2_size + n3_size, p[4] + n1_size + n2_size + n3_size + n_cbx] += Q*(-1)*cost
tau[p[4] + n1_size + n2_size + n3_size + n_cbx, p[5] + n1_size + n2_size + n3_size + n_cbx + n_cby] += Q*(-1)*cost
if fancy_strategy == 'ElitistAS' or fancy_strategy == "MMAS": if fancy_strategy == 'ElitistAS' or fancy_strategy == "MMAS":
#Elitist Ant System : The best ant is rewarded a second time #Elitist Ant System : The best ant is rewarded a second time
#Max-Min Ant System : Only the winner ant is rewarded but the pheromon are limited within a minimal and a maximal threshold #Max-Min Ant System : Only the winner ant is rewarded but the pheromon are limited within a minimal and a maximal threshold
tau[0,best_p[0]] += Q*(-1)*best_cost tau[0,best_p[0]] += Q*(-1)*best_cost
tau[best_p[0],n1//16+best_p[1]] += Q*(-1)*best_cost tau[best_p[0], best_p[1] + n1_size] += Q*(-1)*best_cost
tau[n1//16+best_p[1],n1//16+n2+best_p[2]] += Q*(-1)*best_cost tau[best_p[1] + n1_size, best_p[2] + n1_size + n2_size] += Q*(-1)*best_cost
tau[best_p[2] + n1_size + n2_size, best_p[3] + n1_size + n2_size + n3_size] += Q*(-1)*best_cost
tau[best_p[3] + n1_size + n2_size + n3_size, best_p[4] + n1_size + n2_size + n3_size + n_cbx] += Q*(-1)*best_cost
tau[best_p[4] + n1_size + n2_size + n3_size + n_cbx, best_p[5] + n1_size + n2_size + n3_size + n_cbx + n_cby] += Q*(-1)*best_cost
if fancy_strategy == "MMAS": if fancy_strategy == "MMAS":
#verification of the threshold constraint #verification of the threshold constraint
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment