Commit 1b729990 authored by David Hendriks's avatar David Hendriks
Browse files

removed obselet functions, cleaning up the scaling routine

parent d724f3a7
Loading
Loading
Loading
Loading
+0 −21
Original line number Diff line number Diff line
import matplotlib.pyplot as plt
import numpy as np


def amdahl(f, n):
    return 1.0 / ((1 - f) + (f / n))


cores = np.arange(1, 10, 0.1)
values_list = []
par_vals = np.arange(0, 1.1, 0.1)


for par_val in par_vals:
    values = amdahl(par_val, cores)
    values_list.append(values)


for values in values_list:
    plt.plot(cores, values, "b-")
plt.show()
+125 −167
Original line number Diff line number Diff line
import os
import json
import math

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


def calc_mean_and_std(arr):
    return np.mean(arr), np.std(arr)


def calculate_results(results, unique_amt_cores, unique_amt_systems):
    """
    Function to calculate the numbers
    """

    calculated_results = []
    for i in unique_amt_cores:
        for j in unique_amt_systems:
            (
                total_time_sequential_list,
                total_time_multiprocessing_list,
                total_ratio_list,
            ) = ([], [], [])

            for res in results:
                if (res[0] == i) & (res[1] == j):
                    total_time_sequential_list.append(res[2])
                    total_time_multiprocessing_list.append(res[3])
                    total_ratio_list.append(res[4])

            if (
                (total_time_sequential_list)
                and (total_time_multiprocessing_list)
                and (total_ratio_list)
            ):
                # calculate stuff
                mean_time_sequential, std_sequential = calc_mean_and_std(
                    np.array(total_time_sequential_list)
                )
                mean_time_multiprocessing, std_multiprocessing = calc_mean_and_std(
                    np.array(total_time_multiprocessing_list)
                )
                mean_ratio, std_ratio = calc_mean_and_std(np.array(total_ratio_list))

                # make dict
                res_dict = {
                    "cores": i,
                    "systems": j,
                    "mean_time_sequential": mean_time_sequential,
                    "mean_time_multiprocessing": mean_time_multiprocessing,
                    "mean_ratio": mean_ratio,
                    "std_sequential": std_sequential,
                    "std_multiprocessing": std_multiprocessing,
                    "std_ratio": std_ratio,
                    "total_runs": len(total_time_sequential_list),
                }

                calculated_results.append(res_dict)

    return calculated_results


def plot_speedup_and_efficiency(
    calculated_results, unique_amt_cores, unique_amt_systems
):
    x_position_shift = 0
    y_position_shift = -0.05
    max_speedup = 0

    # https://stackoverflow.com/questions/46323530/matplotlib-plot-two-x-axes-one-linear-and-one-with-logarithmic-ticks
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    for amt_systems in unique_amt_systems:

        cores = []
        speedup = []
        std = []
        efficiency = []

        for el in calculated_results:
            if el["systems"] == amt_systems:

                cores.append(el["cores"] + x_position_shift)
                speedup.append(el["mean_ratio"])
                std.append(el["std_ratio"])
                efficiency.append(el["mean_ratio"] / el["cores"])

                if el["mean_ratio"] > max_speedup:
                    max_speedup = el["mean_ratio"]

                # add number of runs its based on
                ax1.text(
                    el["cores"] + x_position_shift + 0.01,
                    el["mean_ratio"] + y_position_shift,
                    el["total_runs"],
                )

        ax1.errorbar(
            cores,
            speedup,
            std,
            linestyle="None",
            marker="^",
            label="Speed up & efficiency of {} systems".format(amt_systems),
        )

        ax2.plot(cores, efficiency, alpha=0.5)
        x_position_shift += 0.1

    ax1.set_title(
        "Speed up ratio vs amount of cores for different amounts of systems on {}".format(
            name_testcase
        )
    )
    ax1.set_xlabel("Amount of cores used")
    ax1.set_ylabel("Speed up ratio (time_linear/time_parallel)")

    ax1.set_xlim(0, max(unique_amt_cores) + 4)
    ax1.set_ylim(0, max_speedup + 2)
    ax2.set_ylim(0, 1)

    ax1.grid()
    ax1.legend(loc=4)
    fig.savefig(
        os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "png"))
    )
    fig.savefig(
        os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf"))
    )
    fig.savefig(
        os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "eps"))
    )
    plt.show()


# Old plotting routine, needs updating
def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems):
    fig = plt.figure()
    ax = fig.add_subplot(111)
@@ -211,47 +85,131 @@ def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems):

    plt.show()

def amdahl(f, n):
    return 1.0 / ((1 - f) + (f / n))

def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase, machine):
    """
    Plotting routine to plot the speedup and efficiency of scaling

    Takes the name_testcase and hostname values of 
    the first json file to add some info to the plots.
    """

    # Setup figure
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    # Go over all result jsons
    for i, jsonfile in enumerate(result_json_filenames):
        print("Processing {}".format(jsonfile))
        with open(jsonfile, "r") as f:
            result_data = json.loads(f.read())

            # if i==0:
            #     name_testcase = result_data['name_testcase']
            #     hostname = result_data['hostname']

            # Get linear data
            linear_data = result_data["linear"]
            linear_mean = np.mean(linear_data)
            linear_stdev = np.std(linear_data)

            # Get multiprocessing data
            cpus, speedups, efficiencies, stddev_speedups = [], [], [], []
            for amt_cpus in result_data["mp"]:

                # Get mp data
                mp_data = result_data["mp"][amt_cpus]
                mp_mean = np.mean(mp_data)
                mp_stdev = np.std(mp_data)

                # Calc
                amt_cpus = int(amt_cpus)
                speedup = linear_mean / mp_mean
                stddev_speedup = (
                    math.sqrt((linear_stdev / linear_mean) ** 2 + (mp_stdev / mp_mean) ** 2)
                    * speedup
                )
                efficiency = speedup / int(amt_cpus)

                # Add to lists
                cpus.append(amt_cpus)
                efficiencies.append(efficiency)
                speedups.append(speedup)
                stddev_speedups.append(stddev_speedup)

            # Plot
            ax1.errorbar(
                cpus,
                speedups,
                stddev_speedups,
                linestyle="None",
                marker="^",
                label="Speed up & efficiency of {} systems".format(
                    result_data["amt_systems"]
                ),
            )

            # Plot the efficiencies
            ax2.plot(cpus, efficiencies, alpha=0.5)

            # x_position_shift += 0.1

    #####################
    # Extra plots

    # 100 % scaling line
    ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling')

    # Amdahls law fitting

# Configure
result_file = "comparison_result_laptop.dat"
# result_file = "comparison_result_astro1.dat"
name_testcase = "laptop"
img_dir = "scaling_plots"

# Readout file
results = []
with open(result_file, "r") as f:
    for line in f:
        res = list(eval(line.strip()))
        if len(res) == 6:
            res.append(res[-2] / res[-1])
        results.append(res)

# make dataframe and set correct headers.
headers = [
    "cores",
    "total_systems",
    "total_time_sequentially",
    "total_time_multiprocessing",
    "ratio",
]

df = pd.DataFrame(results)
df.columns = headers

# Select unique amounts
unique_amt_cores = df["cores"].unique()
unique_amt_systems = df["total_systems"].unique()
    # Old stuff
    # Do Amdahls law fitting
    # cores = np.arange(1, 48, 0.1)
    # values_list = []
    # par_step = 0.005
    # par_vals = np.arange(.95, 1, par_step)

# Create dictionary with calculated means and stdevs etc
calculated_results = calculate_results(results, unique_amt_cores, unique_amt_systems)
    # for par_val in par_vals:
    #     values = amdahl(par_val, cores)
    #     values_list.append(values)

########################################################################################
# Plot speed up  and efficiency.
plot_speedup_and_efficiency(calculated_results, unique_amt_cores, unique_amt_systems)
    # for i, values in enumerate(values_list):
    #     ax1.plot(cores, values, label="par_val={}".format(par_vals[i]))

    #################################
    # Adding plot make up
    ax1.set_title(
        "Speed up ratio vs amount of cores for different amounts of systems on {}".format(
            machine
        )
    )

    ax1.set_xlabel("Amount of cores used")
    ax1.set_ylabel("Speed up ratio (time_linear/time_parallel)")

    ax1.grid()
    ax1.legend(loc=4)
    ax1.set_xscale("log")
    ax2.set_xscale("log")

    fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "png")))
    fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf")))
    plt.show()

#################################
# Files
SCALING_RESULT_DIR = "scaling_results"
FILENAMES = [
    "david-Lenovo-IdeaPad-S340-14IWL_100_systems.json"
    # "astro2_2500_systems.json",
    # "astro2_3000_systems.json",
]
RESULT_JSONS = []
for filename in FILENAMES:
    RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename))

########################################################################################
# Plot run time
plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems)
plot_speedup_and_efficiency(RESULT_JSONS, SCALING_RESULT_DIR, "Example", "laptop_david")
 No newline at end of file
+0 −119
Original line number Diff line number Diff line
import matplotlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import math


def amdahl(f, n):
    return 1.0 / ((1 - f) + (f / n))


#################################
# Files
scaling_result_dir = "scaling_results"
filenames = [
    "astro2_2500_systems.json",
    "astro2_3000_systems.json",
]
result_jsons = []
for filename in filenames:
    result_jsons.append(os.path.join(os.path.abspath(scaling_result_dir), filename))

#################################
# Plotting of the scaling results
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
for jsonfile in result_jsons:
    print(jsonfile)

    with open(jsonfile, "r") as f:
        result_data = json.loads(f.read())

        # Get linear data
        linear_data = result_data["linear"]
        linear_mean = np.mean(linear_data)
        linear_stdev = np.std(linear_data)

        cpus, speedups, efficiencies, stddev_speedups = [], [], [], []
        for amt_cpus in result_data["mp"]:
            # Get mp data
            mp_data = result_data["mp"][amt_cpus]
            mp_mean = np.mean(mp_data)
            mp_stdev = np.std(mp_data)

            # Calc
            amt_cpus = int(amt_cpus)
            speedup = linear_mean / mp_mean
            stddev_speedup = (
                math.sqrt((linear_stdev / linear_mean) ** 2 + (mp_stdev / mp_mean) ** 2)
                * speedup
            )
            efficiency = speedup / int(amt_cpus)

            # Add to lists
            cpus.append(amt_cpus)
            efficiencies.append(efficiency)
            speedups.append(speedup)
            stddev_speedups.append(stddev_speedup)

        # Plot
        ax1.errorbar(
            cpus,
            speedups,
            stddev_speedups,
            linestyle="None",
            marker="^",
            label="Speed up & efficiency of {} systems".format(
                result_data["amt_systems"]
            ),
        )

        ax2.plot(cpus, efficiencies, alpha=0.5)

        # x_position_shift += 0.1


# Do Amdahls law fitting
# cores = np.arange(1, 48, 0.1)
# values_list = []
# par_step = 0.005
# par_vals = np.arange(.95, 1, par_step)


# for par_val in par_vals:
#     values = amdahl(par_val, cores)
#     values_list.append(values)

# for i, values in enumerate(values_list):
#     ax1.plot(cores, values, label="par_val={}".format(par_vals[i]))


#################################
# Adding plot make up
ax1.set_title(
    "Speed up ratio vs amount of cores for different amounts of systems on {}".format(
        "name_testcase"
    )
)

# ax1.plot([1, max(cpus)], [1, max(cpus)], label='100% scaling')

ax1.set_xlabel("Amount of cores used")
ax1.set_ylabel("Speed up ratio (time_linear/time_parallel)")

# ax1.set_xlim(0, max(cpus) + 4)
# ax2.set_ylim(0, 1)


ax1.grid()
ax1.legend(loc=4)
ax1.set_xscale("log")
ax2.set_xscale("log")

# fig.savefig(os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "png")))
# fig.savefig(os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf")))
# fig.savefig(os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "eps")))
plt.show()