removed obselet functions, cleaning up the scaling routine (1b729990) · Commits · binary_c / binary_c-python

tests/population/scaling/argparseo.py→tests/population/scaling/argparse_setup.py

+0 −0

File moved.

tests/population/scaling/plot_amdahl.py

deleted100644 → 0

+0 −21

Original line number	Diff line number	Diff line
		import matplotlib.pyplot as plt
		import numpy as np


		def amdahl(f, n):
		return 1.0 / ((1 - f) + (f / n))


		cores = np.arange(1, 10, 0.1)
		values_list = []
		par_vals = np.arange(0, 1.1, 0.1)


		for par_val in par_vals:
		values = amdahl(par_val, cores)
		values_list.append(values)


		for values in values_list:
		plt.plot(cores, values, "b-")
		plt.show()

tests/population/scaling/plot_scaling.py

+125 −167

Original line number	Diff line number	Diff line
		import os
		import json
		import math

		import matplotlib.pyplot as plt
		import pandas as pd
		import numpy as np


		def calc_mean_and_std(arr):
		return np.mean(arr), np.std(arr)


		def calculate_results(results, unique_amt_cores, unique_amt_systems):
		"""
		Function to calculate the numbers
		"""

		calculated_results = []
		for i in unique_amt_cores:
		for j in unique_amt_systems:
		(
		total_time_sequential_list,
		total_time_multiprocessing_list,
		total_ratio_list,
		) = ([], [], [])

		for res in results:
		if (res[0] == i) & (res[1] == j):
		total_time_sequential_list.append(res[2])
		total_time_multiprocessing_list.append(res[3])
		total_ratio_list.append(res[4])

		if (
		(total_time_sequential_list)
		and (total_time_multiprocessing_list)
		and (total_ratio_list)
		):
		# calculate stuff
		mean_time_sequential, std_sequential = calc_mean_and_std(
		np.array(total_time_sequential_list)
		)
		mean_time_multiprocessing, std_multiprocessing = calc_mean_and_std(
		np.array(total_time_multiprocessing_list)
		)
		mean_ratio, std_ratio = calc_mean_and_std(np.array(total_ratio_list))

		# make dict
		res_dict = {
		"cores": i,
		"systems": j,
		"mean_time_sequential": mean_time_sequential,
		"mean_time_multiprocessing": mean_time_multiprocessing,
		"mean_ratio": mean_ratio,
		"std_sequential": std_sequential,
		"std_multiprocessing": std_multiprocessing,
		"std_ratio": std_ratio,
		"total_runs": len(total_time_sequential_list),
		}

		calculated_results.append(res_dict)

		return calculated_results


		def plot_speedup_and_efficiency(
		calculated_results, unique_amt_cores, unique_amt_systems
		):
		x_position_shift = 0
		y_position_shift = -0.05
		max_speedup = 0

		# https://stackoverflow.com/questions/46323530/matplotlib-plot-two-x-axes-one-linear-and-one-with-logarithmic-ticks
		fig, ax1 = plt.subplots()
		ax2 = ax1.twinx()
		for amt_systems in unique_amt_systems:

		cores = []
		speedup = []
		std = []
		efficiency = []

		for el in calculated_results:
		if el["systems"] == amt_systems:

		cores.append(el["cores"] + x_position_shift)
		speedup.append(el["mean_ratio"])
		std.append(el["std_ratio"])
		efficiency.append(el["mean_ratio"] / el["cores"])

		if el["mean_ratio"] > max_speedup:
		max_speedup = el["mean_ratio"]

		# add number of runs its based on
		ax1.text(
		el["cores"] + x_position_shift + 0.01,
		el["mean_ratio"] + y_position_shift,
		el["total_runs"],
		)

		ax1.errorbar(
		cores,
		speedup,
		std,
		linestyle="None",
		marker="^",
		label="Speed up & efficiency of {} systems".format(amt_systems),
		)

		ax2.plot(cores, efficiency, alpha=0.5)
		x_position_shift += 0.1

		ax1.set_title(
		"Speed up ratio vs amount of cores for different amounts of systems on {}".format(
		name_testcase
		)
		)
		ax1.set_xlabel("Amount of cores used")
		ax1.set_ylabel("Speed up ratio (time_linear/time_parallel)")

		ax1.set_xlim(0, max(unique_amt_cores) + 4)
		ax1.set_ylim(0, max_speedup + 2)
		ax2.set_ylim(0, 1)

		ax1.grid()
		ax1.legend(loc=4)
		fig.savefig(
		os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "png"))
		)
		fig.savefig(
		os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf"))
		)
		fig.savefig(
		os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "eps"))
		)
		plt.show()


		# Old plotting routine, needs updating
		def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems):
		fig = plt.figure()
		ax = fig.add_subplot(111)
		@@ -211,47 +85,131 @@ def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems):

		plt.show()

		def amdahl(f, n):
		return 1.0 / ((1 - f) + (f / n))

		def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase, machine):
		"""
		Plotting routine to plot the speedup and efficiency of scaling

		Takes the name_testcase and hostname values of
		the first json file to add some info to the plots.
		"""

		# Setup figure
		fig, ax1 = plt.subplots()
		ax2 = ax1.twinx()

		# Go over all result jsons
		for i, jsonfile in enumerate(result_json_filenames):
		print("Processing {}".format(jsonfile))
		with open(jsonfile, "r") as f:
		result_data = json.loads(f.read())

		# if i==0:
		# name_testcase = result_data['name_testcase']
		# hostname = result_data['hostname']

		# Get linear data
		linear_data = result_data["linear"]
		linear_mean = np.mean(linear_data)
		linear_stdev = np.std(linear_data)

		# Get multiprocessing data
		cpus, speedups, efficiencies, stddev_speedups = [], [], [], []
		for amt_cpus in result_data["mp"]:

		# Get mp data
		mp_data = result_data["mp"][amt_cpus]
		mp_mean = np.mean(mp_data)
		mp_stdev = np.std(mp_data)

		# Calc
		amt_cpus = int(amt_cpus)
		speedup = linear_mean / mp_mean
		stddev_speedup = (
		math.sqrt((linear_stdev / linear_mean) 2 + (mp_stdev / mp_mean) 2)
		* speedup
		)
		efficiency = speedup / int(amt_cpus)

		# Add to lists
		cpus.append(amt_cpus)
		efficiencies.append(efficiency)
		speedups.append(speedup)
		stddev_speedups.append(stddev_speedup)

		# Plot
		ax1.errorbar(
		cpus,
		speedups,
		stddev_speedups,
		linestyle="None",
		marker="^",
		label="Speed up & efficiency of {} systems".format(
		result_data["amt_systems"]
		),
		)

		# Plot the efficiencies
		ax2.plot(cpus, efficiencies, alpha=0.5)

		# x_position_shift += 0.1

		#####################
		# Extra plots

		# 100 % scaling line
		ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling')

		# Amdahls law fitting

		# Configure
		result_file = "comparison_result_laptop.dat"
		# result_file = "comparison_result_astro1.dat"
		name_testcase = "laptop"
		img_dir = "scaling_plots"

		# Readout file
		results = []
		with open(result_file, "r") as f:
		for line in f:
		res = list(eval(line.strip()))
		if len(res) == 6:
		res.append(res[-2] / res[-1])
		results.append(res)

		# make dataframe and set correct headers.
		headers = [
		"cores",
		"total_systems",
		"total_time_sequentially",
		"total_time_multiprocessing",
		"ratio",
		]

		df = pd.DataFrame(results)
		df.columns = headers

		# Select unique amounts
		unique_amt_cores = df["cores"].unique()
		unique_amt_systems = df["total_systems"].unique()
		# Old stuff
		# Do Amdahls law fitting
		# cores = np.arange(1, 48, 0.1)
		# values_list = []
		# par_step = 0.005
		# par_vals = np.arange(.95, 1, par_step)

		# Create dictionary with calculated means and stdevs etc
		calculated_results = calculate_results(results, unique_amt_cores, unique_amt_systems)
		# for par_val in par_vals:
		# values = amdahl(par_val, cores)
		# values_list.append(values)

		########################################################################################
		# Plot speed up and efficiency.
		plot_speedup_and_efficiency(calculated_results, unique_amt_cores, unique_amt_systems)
		# for i, values in enumerate(values_list):
		# ax1.plot(cores, values, label="par_val={}".format(par_vals[i]))

		#################################
		# Adding plot make up
		ax1.set_title(
		"Speed up ratio vs amount of cores for different amounts of systems on {}".format(
		machine
		)
		)

		ax1.set_xlabel("Amount of cores used")
		ax1.set_ylabel("Speed up ratio (time_linear/time_parallel)")

		ax1.grid()
		ax1.legend(loc=4)
		ax1.set_xscale("log")
		ax2.set_xscale("log")

		fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "png")))
		fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf")))
		plt.show()

		#################################
		# Files
		SCALING_RESULT_DIR = "scaling_results"
		FILENAMES = [
		"david-Lenovo-IdeaPad-S340-14IWL_100_systems.json"
		# "astro2_2500_systems.json",
		# "astro2_3000_systems.json",
		]
		RESULT_JSONS = []
		for filename in FILENAMES:
		RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename))

		########################################################################################
		# Plot run time
		plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems)
		plot_speedup_and_efficiency(RESULT_JSONS, SCALING_RESULT_DIR, "Example", "laptop_david")
		No newline at end of file

tests/population/scaling/plot_scaling_new.py

deleted100644 → 0

+0 −119

Original line number	Diff line number	Diff line
		import matplotlib
		import os
		import matplotlib.pyplot as plt
		import pandas as pd
		import numpy as np
		import json
		import math


		def amdahl(f, n):
		return 1.0 / ((1 - f) + (f / n))


		#################################
		# Files
		scaling_result_dir = "scaling_results"
		filenames = [
		"astro2_2500_systems.json",
		"astro2_3000_systems.json",
		]
		result_jsons = []
		for filename in filenames:
		result_jsons.append(os.path.join(os.path.abspath(scaling_result_dir), filename))

		#################################
		# Plotting of the scaling results
		fig, ax1 = plt.subplots()
		ax2 = ax1.twinx()
		for jsonfile in result_jsons:
		print(jsonfile)

		with open(jsonfile, "r") as f:
		result_data = json.loads(f.read())

		# Get linear data
		linear_data = result_data["linear"]
		linear_mean = np.mean(linear_data)
		linear_stdev = np.std(linear_data)

		cpus, speedups, efficiencies, stddev_speedups = [], [], [], []
		for amt_cpus in result_data["mp"]:
		# Get mp data
		mp_data = result_data["mp"][amt_cpus]
		mp_mean = np.mean(mp_data)
		mp_stdev = np.std(mp_data)

		# Calc
		amt_cpus = int(amt_cpus)
		speedup = linear_mean / mp_mean
		stddev_speedup = (
		math.sqrt((linear_stdev / linear_mean) 2 + (mp_stdev / mp_mean) 2)
		* speedup
		)
		efficiency = speedup / int(amt_cpus)

		# Add to lists
		cpus.append(amt_cpus)
		efficiencies.append(efficiency)
		speedups.append(speedup)
		stddev_speedups.append(stddev_speedup)

		# Plot
		ax1.errorbar(
		cpus,
		speedups,
		stddev_speedups,
		linestyle="None",
		marker="^",
		label="Speed up & efficiency of {} systems".format(
		result_data["amt_systems"]
		),
		)

		ax2.plot(cpus, efficiencies, alpha=0.5)

		# x_position_shift += 0.1


		# Do Amdahls law fitting
		# cores = np.arange(1, 48, 0.1)
		# values_list = []
		# par_step = 0.005
		# par_vals = np.arange(.95, 1, par_step)


		# for par_val in par_vals:
		# values = amdahl(par_val, cores)
		# values_list.append(values)

		# for i, values in enumerate(values_list):
		# ax1.plot(cores, values, label="par_val={}".format(par_vals[i]))


		#################################
		# Adding plot make up
		ax1.set_title(
		"Speed up ratio vs amount of cores for different amounts of systems on {}".format(
		"name_testcase"
		)
		)

		# ax1.plot([1, max(cpus)], [1, max(cpus)], label='100% scaling')

		ax1.set_xlabel("Amount of cores used")
		ax1.set_ylabel("Speed up ratio (time_linear/time_parallel)")

		# ax1.set_xlim(0, max(cpus) + 4)
		# ax2.set_ylim(0, 1)


		ax1.grid()
		ax1.legend(loc=4)
		ax1.set_xscale("log")
		ax2.set_xscale("log")

		# fig.savefig(os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "png")))
		# fig.savefig(os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf")))
		# fig.savefig(os.path.join(img_dir, "speedup_scaling_{}.{}".format(name_testcase, "eps")))
		plt.show()