Select Git revision
Forked from an inaccessible project.
-
Fabien Zucchet authoredFabien Zucchet authored
ACO.py 7.78 KiB
import numpy as np
import math
import socket
import time
from mpi4py import MPI
import compute_path
import tools
import launcher_SUBP
def ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter, n1_max=256, n2_max=256, n3_max=256,
fancy_strategy='AS', sub_threshold=0.1, sup_threshold=10**9, nb_threads=8, reps=100):
""" Ant colony optimization of the cache blocking parameters for the execution of
the iso3dfd programm.
Args:
Me (int): index of process running the function
NbP (int): number of processes
comm (MPI object): mpi communication object
alpha (float): hyperparameter alpha of ACO
rho (float): evaporation rate between 0 and 1
Q (float): quantity of pheromones deposited by an ant on an edge
nb_ants (int): number of ants
tau_0 (float): initial quantity of pheromones on each edge
n_iter (int): number of cycles done for ACO
n1_max (int, optional): Maximal first dimension of the problem. Defaults to 256.
n2_max (int, optional): Maximal second dimension of the problem. Defaults to 256.
n3_max (int, optional): Maximal third dimension of the problem. Defaults to 256.
fancy_strategy (string, optional): Strategy used to update tau. Defaults to 'AS'.
sub_threshold (float, optional): Inferior threshold for Min-Max strategy. Defaults to 0.1.
sup_threshold (float, optional): Superior threshold for Min-Max strategy. Defaults to 10**9.
nb_threads (int, optional): Number of threads per MPI process. Defaults to 4.
reps (int, optional): Max number of iteration before stopping the process. Defaults to 100.
Returns:
(tuple: list, float): optimal path [n1, n2, n3, cbx, cby, cbz] and the associated cost
"""
# Initialisation of the graph and pheromon matrix
n_cbx = n1_max//16
n_cby = n2_max
n_cbz = n3_max
n1_size = int(1 + np.log2(n1_max/256))
n2_size = int(1 + np.log2(n2_max/256))
n3_size = int(1 + np.log2(n3_max/256))
tau = np.zeros((n1_size + n2_size + n3_size + n_cbx + n_cby + 1, n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1), dtype="float64")
tau[0, 1:(n1_size+1)] = tau_0
tau[1:(n1_size+1), (n1_size+1):(n1_size + n2_size + 1)] = tau_0
tau[(n1_size+1):(n1_size + n2_size + 1), (n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1)] = tau_0
tau[(n1_size + n2_size + 1):(n1_size + n2_size + n3_size + 1), (n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1)] = tau_0
tau[(n1_size + n2_size + n3_size + 1):(n1_size + n2_size + n3_size + n_cbx + 1), (n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1)] = tau_0
tau[(n1_size + n2_size + n3_size + n_cbx + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + 1), (n1_size + n2_size + n3_size + n_cbx + n_cby + 1):(n1_size + n2_size + n3_size + n_cbx + n_cby + n_cbz + 1)] = tau_0
cost_opti = math.inf
# n_iter cycles of ants traveling through the graph
for iter in range(n_iter):
paths = []
costs = []
for k in range(nb_ants//NbP):
path = compute_path.compute_path(tau, alpha, n1_size, n2_size, n3_size)
n1 = 256 * (2**(path[0]-1))
n2 = 256 * (2**(path[1]-1))
n3 = 256 * (2**(path[2]-1))
cbx = path[3]*16
cby = path[4]
cbz = path[5]
cost = launcher_SUBP.deploySUBP(n1, n2, n3, nb_threads, reps, cbx, cby, cbz)
cost = [cost]
print(f"Path followed at iteration {iter} on process {Me} by ant {k} : {[n1, n2, n3, cbx, cby, cbz]} with cost equal to {cost[0]}.")
paths.append(path)
costs.append(cost)
paths = np.array(paths, dtype="int32")
costs = np.array(costs, dtype="float64")
if fancy_strategy == 'AS' or fancy_strategy == 'ElitistAS':
#we initialize the best path with the ants already in this process
best_cost = np.amin(costs)
best_p = paths[np.argmin(costs), :]
#we vaporize the pheromon
tau = tau * (1-rho)
#we compute the new pheromon with the ants already present
tau = tools.add_pheromones(tau, paths, costs, Q, n1_size, n2_size, n3_size, n_cbx, n_cby)
for i in range(1,NbP):
#we exchange the ants with a ring structure to compute locally tau
comm.Sendrecv_replace(paths, dest=(Me+1)%NbP, source=(Me-1)%NbP)
comm.Sendrecv_replace(costs, dest=(Me+1)%NbP, source=(Me-1)%NbP)
tau = tools.add_pheromones(tau, paths, costs, Q, n1_size, n2_size, n3_size, n_cbx, n_cby)
#we search for the best path
new_cost = np.amin(costs)
if new_cost < best_cost:
best_cost = new_cost
best_p = paths[np.argmin(costs), :]
if fancy_strategy == 'ElitistAS':
tau = tools.add_pheromones(tau, np.array(best_p), np.array(best_cost), Q, n1_size, n2_size, n3_size, n_cbx, n_cby)
if fancy_strategy == 'MMAS':
#we compute the best path locally
best_cost = np.amin(costs)
best_p = paths[np.argmin(costs), :]
#we compare the best paths of all process and we save only the best one
recv_table = np.empty(2)
comm.Allreduce(np.array([best_cost, Me]), recv_table, OP=MPI.MINLOC)
best_cost = recv_table[0]
best_p = comm.bcast(best_p, root=recv_table[1])
#we compute the pheromon using only the best path
tau = tau * (1-rho)
tau = tools.add_pheromones(tau, np.array(best_p), np.array(best_cost), Q, n1_size, n2_size, n3_size, n_cbx, n_cby)
#verification of the threshold constraint
size = np.size(tau)
for i in range(size[0]):
for j in range(size[1]):
if tau[i][j] < sub_threshold:
tau[i][j] = sub_threshold
if tau[i][j] > sup_threshold:
tau[i][j] = sup_threshold
#we update the best path on all iteration
if best_cost < cost_opti:
cost_opti = best_cost
path_opti = best_p
if Me == 0:
with open("cost_opti.txt", 'a') as file:
file.write(f"{-1*cost_opti}\n")
n1 = 256 * (2**(path_opti[0]-1))
n2 = 256 * (2**(path_opti[1]-1))
n3 = 256 * (2**(path_opti[2]-1))
cbx= path_opti[3]*16
cby= path_opti[4]
cbz= path_opti[5]
return [n1, n2, n3, cbx, cby, cbz], cost_opti
if __name__ == "__main__":
#MPI information extraction
comm = MPI.COMM_WORLD
NbP = comm.Get_size()
Me = comm.Get_rank()
print(f"Process {Me} launched on node {socket.gethostname()}.")
# Initialization of hyperparameters
alpha = 1
rho = 0.1
Q = 0.1
nb_ants = 2
tau_0 = Q*100
n_iter = 1
fancy_strategy = "AS"
# Parameters for compilation and execution of iso3dfd
nb_threads = 8
reps = 100
n1_max, n2_max, n3_max = 256, 256, 256
optimization = "-O3"
simd = "avx512"
if Me == 0 :
# Compiling the code
launcher_SUBP.compileSUBP(optimization=optimization, simd=simd)
# Waiting for the compilation to end on other processes
comm.Barrier()
if Me == 0:
begin = time.time()
path_opti, cost_opti = ACO(Me, NbP, comm, alpha, rho, Q, nb_ants, tau_0, n_iter,
n1_max=n1_max, n2_max=n2_max, n3_max=n3_max, fancy_strategy=fancy_strategy,
nb_threads=nb_threads, reps=reps)
if Me == 0:
end = time.time()
print(f"Le temps d'exécution total est {end - begin} s.")
print(f"Le chemin optimal est {path_opti}.")
print(f"Le throughput associé est alors {-1*cost_opti} MPoints/s.")