import subprocess
import sys
import os

import tools


#----------------------------------------------------------------
# Deployment function: launch a MPI pgm on a set of cluster nodes
# + get the MPI pgm output and achieve a pretty print of the perf
#----------------------------------------------------------------
def deploySUBP(n1, n2, n3, nb_threads, reps, cbx, cby, cbz):
    """Launch MPI execution based on exe file in bin/, and returns average fitness

    Args:
        n1 (int): First dimension of the problem
        n2 (int): Second dimension of the problem
        n3 (int): Third dimension of the problem
        nb_threads (int): Number of threads per MPI process
        reps (int): Max number of iteration before stopping the process
        cbx (int): First dimension of the cache
        cby (int): Second dimension of the cache
        cbz (int): Third dimension of the cache

    Returns:
        float: calculated fitness (average of MPI processes executions throughputs)
    """

    # Get the name of the exe file
    exeFile = os.listdir("bin/")[0]
    
    # MPI command
    res = subprocess.run(" bin/" + exeFile + " " +
                        str(n1) + " " + str(n2) + " "  + str(n3) + " " +
                        str(nb_threads) + " " + str(reps) + " "  +
                        str(cbx) + " " + str(cby) + " " + str(cbz),
                        shell=True,
                        stdout=subprocess.PIPE)      

    # Results exploitation
    times, throughputs, gflops, runs = tools.commandLineExtract(res)
    # Fitness calculation
    fitness = 0.
    for throughput in throughputs:
      fitness += throughput
    if res:
      fitness /= len(throughputs)

    return -1*fitness


#-----------------------------------------------------------------
# Compiling function
#-----------------------------------------------------------------

def compileSUBP(optimization, simd):
    """Compile code using specific flags. Doesn't return anything

    Args:
        optimization (str): Either "-O2" or "-O3", global standard C optimization flag.
        simd (str): Either "sse", "avx", "avx2" or "avx512", pecificoptimizationflags for vectorization.
    """

    print("Compiling with OPTIMIZATION: {} and simd: {}".format(optimization, simd))

    # Launh the compiling command
    res = subprocess.run("make -e OPTIMIZATION=\"{}\" -e simd={} last".format(optimization, simd),
                        shell=True,
                        stdout=subprocess.PIPE)
    
    # Encode the output to use it (not used at the moment, but meh...)
    result = str(res.stdout,'utf-8')

    print('Compiling successful')


#-----------------------------------------------------------------
# Main code
#-----------------------------------------------------------------

if __name__ == "__main__":

    # Command line parsing:
    n1, n2, n3, nb_threads, reps, cbx, cby, cbz = tools.cmdLineParsing()

    print("n1, n2, n3 : " + str(n1) + " " + str(n2) + " " + str(n3))
    print("Number of threads : " + str(nb_threads))
    print("Number of reps : " + str(reps))
    print("Thread block size : " + str(cbx) + " " + str(cby) + " " + str(cbz))

    # Example
    print("---------- Compiling and Deployment using Subprocess module (default values) ---------")
    compileSUBP(optimization="-O3", simd="avx512")

    print("---------- Deployment using Subprocess module (default values) ---------")

    print(deploySUBP(n1, n2, n3, nb_threads, reps, cbx, cby, cbz))
    print(deploySUBP(n1, n2, n3, nb_threads, reps, cbx, cby, cbz))
