Commit 208a9644 authored by David Hendriks's avatar David Hendriks
Browse files

almost done with a nice scaling routine

parent 1b729990
Loading
Loading
Loading
Loading
+13 −30
Original line number Diff line number Diff line
@@ -88,7 +88,7 @@ def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems):
def amdahl(f, n):
    return 1.0 / ((1 - f) + (f / n))

def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase, machine):
def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase):
    """
    Plotting routine to plot the speedup and efficiency of scaling

@@ -106,10 +106,6 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
        with open(jsonfile, "r") as f:
            result_data = json.loads(f.read())

            # if i==0:
            #     name_testcase = result_data['name_testcase']
            #     hostname = result_data['hostname']

            # Get linear data
            linear_data = result_data["linear"]
            linear_mean = np.mean(linear_data)
@@ -146,14 +142,14 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
                stddev_speedups,
                linestyle="None",
                marker="^",
                label="Speed up & efficiency of {} systems".format(
                    result_data["amt_systems"]
                ),
                label="{}".format(os.path.basename(jsonfile)),
                # label="Speed up & efficiency of {} systems".format(
                #     result_data["amt_systems"]
                # ),
            )

            # Plot the efficiencies
            ax2.plot(cpus, efficiencies, alpha=0.5)

            ax2.plot(cpus, efficiencies, alpha=0.5, linestyle='dotted')
            # x_position_shift += 0.1

    #####################
@@ -161,11 +157,11 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes

    # 100 % scaling line
    ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling')

    # Amdahls law fitting

    ax2.axhline(y=1, linestyle='--', alpha=0.25, label='100% efficient')
    # ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling')


    # Amdahls law fitting
    # Old stuff
    # Do Amdahls law fitting
    # cores = np.arange(1, 48, 0.1)
@@ -183,9 +179,7 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
    #################################
    # Adding plot make up
    ax1.set_title(
        "Speed up ratio vs amount of cores for different amounts of systems on {}".format(
            machine
        )
        "Speed up ratio (left y, symbols) and efficiency (right y, dotted line) vs amount of cores"
    )

    ax1.set_xlabel("Amount of cores used")
@@ -196,20 +190,9 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
    ax1.set_xscale("log")
    ax2.set_xscale("log")

    ax2.set_ylim(ymin=0, ymax=None)


    fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "png")))
    fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf")))
    plt.show()

#################################
# Files
SCALING_RESULT_DIR = "scaling_results"
FILENAMES = [
    "david-Lenovo-IdeaPad-S340-14IWL_100_systems.json"
    # "astro2_2500_systems.json",
    # "astro2_3000_systems.json",
]
RESULT_JSONS = []
for filename in FILENAMES:
    RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename))

plot_speedup_and_efficiency(RESULT_JSONS, SCALING_RESULT_DIR, "Example", "laptop_david")
 No newline at end of file
+117 −1
Original line number Diff line number Diff line
@@ -3,6 +3,20 @@ Module containing the scaling functions.
"""

import time
import socket
import os

import json

import numpy as np

from binarycpython.utils.grid import Population

def dummy_parsefunction(self, output):
    """
    Dummy parsing function
    """
    pass

def get_mp_results(population, cpu_list, amt_repeats, total_systems):
    """
@@ -31,7 +45,7 @@ def get_mp_results(population, cpu_list, amt_repeats, total_systems):
            )
            mp_times.append(total_mp)

        mp_dict[cpu_amt] = mp_times
        mp_dict[str(cpu_amt)] = mp_times

    return mp_dict

@@ -55,3 +69,105 @@ def get_linear_results(population, amt_repeats, total_systems):
        linear_times.append(total_lin)

    return linear_times

def run_systems_for_scaling_comparison(settings_dict):
    """
    Function that runs the systems for the scaling comparison
    """

    amount_of_cpus = settings_dict['amount_of_cpus']
    amount_of_cores = settings_dict['amount_of_cores']
    amt_repeats = settings_dict['amt_repeats']
    stepsize_cpus = settings_dict['stepsize_cpus']
    testcase = settings_dict['testcase']
    plot_dir = settings_dict['plot_dir']
    result_dir = settings_dict['result_dir']

    resolutions = settings_dict['resolutions']

    # For each set of resolutions 
    for resolution in resolutions:
        # Some calculated values
        total_systems = int(np.prod([el for el in resolution.values()]))
        hostname = socket.gethostname()

        # Generate the range of cpu numbers
        cpu_list = np.arange(1, amount_of_cpus+1, stepsize_cpus)
        if not cpu_list[-1] == amount_of_cpus:
            cpu_list = np.append(cpu_list, np.array([amount_of_cpus]))

        ##################################################################
        # Create dictionary in which to store all the results:
        result_dict = {}

        #
        result_dict["amt_systems"] = total_systems
        result_dict["hostname"] = hostname
        result_dict["amt_logical_cores"] = amount_of_cpus
        result_dict["amt_of_physical_cores"] = amount_of_cores
        result_dict["testcase"] = testcase

        #################
        # Configuring population
        test_pop = Population()

        test_pop.set(
            verbose=1, binary=1, parse_function=dummy_parsefunction,
        )

        test_pop.add_grid_variable(
            name="lnm1",
            longname="Primary mass",
            valuerange=[1, 100],
            resolution="{}".format(resolution["M_1"]),
            spacingfunc="const(math.log(1), math.log(100), {})".format(resolution["M_1"]),
            precode="M_1=math.exp(lnm1)",
            probdist="three_part_powerlaw(M_1, 0.1, 0.5, 1.0, 100, -1.3, -2.3, -2.3)*M_1",
            dphasevol="dlnm1",
            parameter_name="M_1",
            condition="",  # Impose a condition on this grid variable. Mostly for a check for yourself
        )

        test_pop.add_grid_variable(
            name="q",
            longname="Mass ratio",
            valuerange=["0.1/M_1", 1],
            resolution="{}".format(resolution['q']),
            spacingfunc="const(0.1/M_1, 1, {})".format(resolution['q']),
            probdist="flatsections(q, [{'min': 0.1/M_1, 'max': 0.8, 'height': 1}, {'min': 0.8, 'max': 1.0, 'height': 1.0}])",
            dphasevol="dq",
            precode="M_2 = q * M_1",
            parameter_name="M_2",
            condition="",  # Impose a condition on this grid variable. Mostly for a check for yourself    
        )

        test_pop.add_grid_variable(
            name="logper",
            longname="log(Orbital_Period)",
            valuerange=[-2, 12],
            resolution="{}".format(resolution["per"]),
            spacingfunc="np.linspace(-2, 12, {})".format(resolution["per"]),
            precode="orbital_period = 10** logper\n", # TODO: 
            probdist="gaussian(logper,4.8, 2.3, -2.0, 12.0)",
            parameter_name="orbital_period",
            dphasevol="dln10per",
        )

        #######################################################################################
        # Execute grids

        # Linear runs
        linear_times = get_linear_results(test_pop, amt_repeats, total_systems)
        result_dict["linear"] = linear_times

        #######################################################################################
        # MP runs
        mp_dict = get_mp_results(test_pop, cpu_list, amt_repeats, total_systems)
        result_dict["mp"] = mp_dict

        print(result_dict)

        # Write to file and make sure the directory exists.
        os.makedirs(result_dir, exist_ok=True)
        with open(os.path.join(result_dir, "{}_{}_systems.json".format(hostname, total_systems)), "w") as f:
            f.write(json.dumps(result_dict, indent=4))
+40 −91
Original line number Diff line number Diff line
@@ -6,9 +6,9 @@ It requires some user input, which you can define at the top of the script after
The following values should be configured according to your system:
-

It will then run the population you specified, first linearly <AMT_REPEATS> times,
and then using multiprocessing it will run the population <AMT_REPEATS> times each time
with more cores. (Up until <AMOUNT_OF_CPUS>)
It will then run the population you specified, first linearly <amt_repeats> times,
and then using multiprocessing it will run the population <amt_repeats> times each time
with more cores. (Up until <amount_of_cpus>)

TODO: get the real evolution time instead of the total as well
TODO: put the methods in functions and put them in a different file
@@ -22,96 +22,45 @@ import psutil
import numpy as np

from binarycpython.utils.grid import Population
from scaling_functions import get_mp_results, get_linear_results

AMT_REPEATS = 5                         # Number of times the population will be repeated per cpu
                                        # number. Useful to get some reliable statistics
RESOLUTION = {"M_1": 50, "per": 60}     # Resolution of sampling of the population
RESULT_DIR = "scaling_results"          # Directory where the results are written to.
PLOT_DIR = "scaling_plots"              # Directory where the plots will be stored
TESTCASE = "linear vs MP batched"       # `name` of the calculation
STEPSIZE_CPUS = 1                       # Stepsize for the cpu number generator. Try to keep this
from scaling_functions import get_mp_results, get_linear_results, run_systems_for_scaling_comparison
from plot_scaling import plot_speedup_and_efficiency

settings_dict = {}
settings_dict['amt_repeats'] = 1                    # Number of times the population will be repeated per cpu
                                                    # number. Better do it several times than only run it once
settings_dict['resolutions'] = [                    # List of resolution of sampling of the population. Useful for checking whether population size has an effect on the results
    {"M_1": 10, "per": 10, "q": 2}
] 
settings_dict['result_dir'] = "scaling_results"     # Relative of absolute directory where results are writting to
settings_dict['plot_dir'] = "scaling_plots"         # Directory where the plots will be stored
settings_dict['testcase'] = "linear vs MP batched"  # 'name' of the calculation. will be used in the plot
settings_dict['stepsize_cpus'] = 1                  # Stepsize for the cpu number generator. Try to keep this
                                                    # low, to get the most reliable results

AMOUNT_OF_CPUS = 4                                 # Amount of logical cpus the machine has.
# AMOUNT_OF_CPUS = psutil.cpu_count()

AMOUNT_OF_CORES = 2                                 # The amount of physical cores. This value
settings_dict['amount_of_cpus'] = 4                 # Amount of logical cpus the machine has (this is not the same as physical cpus!)
# settings_dict['amount_of_cpus'] = psutil.cpu_count()
settings_dict['amount_of_cores'] = 2                 # The amount of physical cores. This value
                                                    # is not vital bit will be used in the plot
# AMOUNT_OF_CORES = psutil.cpu_count(logical=False)   # You can also use the psutil function to get 
# settings_dict['amount_of_cores'] = psutil.cpu_count(logical=False)   # You can also use the psutil function to get 
                                                    # the amt of physical cores, but this isnt fully
                                                    # reliable (in mar 2020 it didnt get this value
                                                    # right when there were multiple sockets)


# Some calculated values
TOTAL_SYSTEMS = int(np.prod([el for el in RESOLUTION.values()]))
HOSTNAME = socket.gethostname()


# Generate the range of cpu numbers
CPU_LIST = np.arange(1, AMOUNT_OF_CPUS+1, STEPSIZE_CPUS)
if not CPU_LIST[-1] == AMOUNT_OF_CPUS:
    CPU_LIST = np.append(CPU_LIST, np.array([AMOUNT_OF_CPUS]))

##################################################################
# Create dictionairy in which to store all the results:
result_dict = {}

#
result_dict["amt_systems"] = TOTAL_SYSTEMS
result_dict["hostname"] = HOSTNAME
result_dict["amt_logical_cores"] = AMOUNT_OF_CPUS
result_dict["amt_of_physical_cores"] = AMOUNT_OF_CORES
result_dict["testcase"] = TESTCASE

#################
# Configuring population
test_pop = Population()

test_pop.set(
    verbose=1, binary=1,
)

test_pop.add_grid_variable(
    name="M_1",
    longname="log primary mass",
    valuerange=[1, 100],
    resolution="{}".format(RESOLUTION["M_1"]),
    spacingfunc="const(1, 100, {})".format(RESOLUTION["M_1"]),
    probdist="Kroupa2001(M_1)",
    # probdist='self.custom_options["extra_prob_function"](M_1)',
    dphasevol="dlnm1",
    parameter_name="M_1",
    condition="",
)

test_pop.add_grid_variable(
    name="period",
    longname="period",
    valuerange=["M_1", 20],
    resolution="{}".format(RESOLUTION["per"]),
    spacingfunc="np.linspace(1, 10, {})".format(RESOLUTION["per"]),
    precode="orbital_period = period**2",
    probdist="flat(orbital_period)",
    parameter_name="orbital_period",
    dphasevol="dper",
    condition='self.grid_options["binary"]==1',
run_systems_for_scaling_comparison(settings_dict)
#################################
# Files
SCALING_RESULT_DIR = settings_dict['result_dir']
RESULT_JSONS = [os.path.join(SCALING_RESULT_DIR, file) for file in os.listdir(SCALING_RESULT_DIR) if file.endswith('.json')] # Automatically grab all of the stuff, override it

# FILENAMES = [
#     "david-Lenovo-IdeaPad-S340-14IWL_100_systems.json",
#     "david-Lenovo-IdeaPad-S340-14IWL_2500_systems.json"
# ]
# RESULT_JSONS = []
# for filename in FILENAMES:
#     RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename))

plot_speedup_and_efficiency(
    RESULT_JSONS,
    SCALING_RESULT_DIR,
    "Example"
)
 No newline at end of file

#######################################################################################
# Execute grids

# Linear runs
LINEAR_TIMES = get_linear_results(test_pop, AMT_REPEATS, TOTAL_SYSTEMS)
result_dict["linear"] = LINEAR_TIMES

#######################################################################################
# MP runs
MP_DICT = get_mp_results(test_pop, CPU_LIST, AMT_REPEATS, TOTAL_SYSTEMS)
result_dict["mp"] = MP_DICT

# Write to file and make sure the directory exists.
os.makedirs(RESULT_DIR, exist_ok=True)
with open(os.path.join(RESULT_DIR, "{}_{}_systems.json".format(HOSTNAME, TOTAL_SYSTEMS)), "w") as f:
    f.write(json.dumps(result_dict))