almost done with a nice scaling routine (208a9644) · Commits · binary_c / binary_c-python

tests/population/scaling/plot_scaling.py

+13 −30

Original line number	Diff line number	Diff line
		@@ -88,7 +88,7 @@ def plot_runtime(calculated_results, unique_amt_cores, unique_amt_systems):
		def amdahl(f, n):
		return 1.0 / ((1 - f) + (f / n))

		def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase, machine):
		def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_testcase):
		"""
		Plotting routine to plot the speedup and efficiency of scaling

		@@ -106,10 +106,6 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
		with open(jsonfile, "r") as f:
		result_data = json.loads(f.read())

		# if i==0:
		# name_testcase = result_data['name_testcase']
		# hostname = result_data['hostname']

		# Get linear data
		linear_data = result_data["linear"]
		linear_mean = np.mean(linear_data)
		@@ -146,14 +142,14 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
		stddev_speedups,
		linestyle="None",
		marker="^",
		label="Speed up & efficiency of {} systems".format(
		result_data["amt_systems"]
		),
		label="{}".format(os.path.basename(jsonfile)),
		# label="Speed up & efficiency of {} systems".format(
		# result_data["amt_systems"]
		# ),
		)

		# Plot the efficiencies
		ax2.plot(cpus, efficiencies, alpha=0.5)

		ax2.plot(cpus, efficiencies, alpha=0.5, linestyle='dotted')
		# x_position_shift += 0.1

		#####################
		@@ -161,11 +157,11 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes

		# 100 % scaling line
		ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling')

		# Amdahls law fitting

		ax2.axhline(y=1, linestyle='--', alpha=0.25, label='100% efficient')
		# ax1.plot([1, max(cpus)], [1, max(cpus)], '--', alpha=0.25, label='100% scaling')


		# Amdahls law fitting
		# Old stuff
		# Do Amdahls law fitting
		# cores = np.arange(1, 48, 0.1)
		@@ -183,9 +179,7 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
		#################################
		# Adding plot make up
		ax1.set_title(
		"Speed up ratio vs amount of cores for different amounts of systems on {}".format(
		machine
		)
		"Speed up ratio (left y, symbols) and efficiency (right y, dotted line) vs amount of cores"
		)

		ax1.set_xlabel("Amount of cores used")
		@@ -196,20 +190,9 @@ def plot_speedup_and_efficiency(result_json_filenames, plot_output_dir, name_tes
		ax1.set_xscale("log")
		ax2.set_xscale("log")

		ax2.set_ylim(ymin=0, ymax=None)


		fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "png")))
		fig.savefig(os.path.join(plot_output_dir, "speedup_scaling_{}.{}".format(name_testcase, "pdf")))
		plt.show()

		#################################
		# Files
		SCALING_RESULT_DIR = "scaling_results"
		FILENAMES = [
		"david-Lenovo-IdeaPad-S340-14IWL_100_systems.json"
		# "astro2_2500_systems.json",
		# "astro2_3000_systems.json",
		]
		RESULT_JSONS = []
		for filename in FILENAMES:
		RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename))

		plot_speedup_and_efficiency(RESULT_JSONS, SCALING_RESULT_DIR, "Example", "laptop_david")
		No newline at end of file

tests/population/scaling/scaling_functions.py

+117 −1

Original line number	Diff line number	Diff line
		@@ -3,6 +3,20 @@ Module containing the scaling functions.
		"""

		import time
		import socket
		import os

		import json

		import numpy as np

		from binarycpython.utils.grid import Population

		def dummy_parsefunction(self, output):
		"""
		Dummy parsing function
		"""
		pass

		def get_mp_results(population, cpu_list, amt_repeats, total_systems):
		"""
		@@ -31,7 +45,7 @@ def get_mp_results(population, cpu_list, amt_repeats, total_systems):
		)
		mp_times.append(total_mp)

		mp_dict[cpu_amt] = mp_times
		mp_dict[str(cpu_amt)] = mp_times

		return mp_dict

		@@ -55,3 +69,105 @@ def get_linear_results(population, amt_repeats, total_systems):
		linear_times.append(total_lin)

		return linear_times

		def run_systems_for_scaling_comparison(settings_dict):
		"""
		Function that runs the systems for the scaling comparison
		"""

		amount_of_cpus = settings_dict['amount_of_cpus']
		amount_of_cores = settings_dict['amount_of_cores']
		amt_repeats = settings_dict['amt_repeats']
		stepsize_cpus = settings_dict['stepsize_cpus']
		testcase = settings_dict['testcase']
		plot_dir = settings_dict['plot_dir']
		result_dir = settings_dict['result_dir']

		resolutions = settings_dict['resolutions']

		# For each set of resolutions
		for resolution in resolutions:
		# Some calculated values
		total_systems = int(np.prod([el for el in resolution.values()]))
		hostname = socket.gethostname()

		# Generate the range of cpu numbers
		cpu_list = np.arange(1, amount_of_cpus+1, stepsize_cpus)
		if not cpu_list[-1] == amount_of_cpus:
		cpu_list = np.append(cpu_list, np.array([amount_of_cpus]))

		##################################################################
		# Create dictionary in which to store all the results:
		result_dict = {}

		#
		result_dict["amt_systems"] = total_systems
		result_dict["hostname"] = hostname
		result_dict["amt_logical_cores"] = amount_of_cpus
		result_dict["amt_of_physical_cores"] = amount_of_cores
		result_dict["testcase"] = testcase

		#################
		# Configuring population
		test_pop = Population()

		test_pop.set(
		verbose=1, binary=1, parse_function=dummy_parsefunction,
		)

		test_pop.add_grid_variable(
		name="lnm1",
		longname="Primary mass",
		valuerange=[1, 100],
		resolution="{}".format(resolution["M_1"]),
		spacingfunc="const(math.log(1), math.log(100), {})".format(resolution["M_1"]),
		precode="M_1=math.exp(lnm1)",
		probdist="three_part_powerlaw(M_1, 0.1, 0.5, 1.0, 100, -1.3, -2.3, -2.3)*M_1",
		dphasevol="dlnm1",
		parameter_name="M_1",
		condition="", # Impose a condition on this grid variable. Mostly for a check for yourself
		)

		test_pop.add_grid_variable(
		name="q",
		longname="Mass ratio",
		valuerange=["0.1/M_1", 1],
		resolution="{}".format(resolution['q']),
		spacingfunc="const(0.1/M_1, 1, {})".format(resolution['q']),
		probdist="flatsections(q, [{'min': 0.1/M_1, 'max': 0.8, 'height': 1}, {'min': 0.8, 'max': 1.0, 'height': 1.0}])",
		dphasevol="dq",
		precode="M_2 = q * M_1",
		parameter_name="M_2",
		condition="", # Impose a condition on this grid variable. Mostly for a check for yourself
		)

		test_pop.add_grid_variable(
		name="logper",
		longname="log(Orbital_Period)",
		valuerange=[-2, 12],
		resolution="{}".format(resolution["per"]),
		spacingfunc="np.linspace(-2, 12, {})".format(resolution["per"]),
		precode="orbital_period = 10** logper\n", # TODO:
		probdist="gaussian(logper,4.8, 2.3, -2.0, 12.0)",
		parameter_name="orbital_period",
		dphasevol="dln10per",
		)

		#######################################################################################
		# Execute grids

		# Linear runs
		linear_times = get_linear_results(test_pop, amt_repeats, total_systems)
		result_dict["linear"] = linear_times

		#######################################################################################
		# MP runs
		mp_dict = get_mp_results(test_pop, cpu_list, amt_repeats, total_systems)
		result_dict["mp"] = mp_dict

		print(result_dict)

		# Write to file and make sure the directory exists.
		os.makedirs(result_dir, exist_ok=True)
		with open(os.path.join(result_dir, "{}_{}_systems.json".format(hostname, total_systems)), "w") as f:
		f.write(json.dumps(result_dict, indent=4))

tests/population/scaling/scaling_script.py

+40 −91

Original line number	Diff line number	Diff line
		@@ -6,9 +6,9 @@ It requires some user input, which you can define at the top of the script after
		The following values should be configured according to your system:
		-

		It will then run the population you specified, first linearly <AMT_REPEATS> times,
		and then using multiprocessing it will run the population <AMT_REPEATS> times each time
		with more cores. (Up until <AMOUNT_OF_CPUS>)
		It will then run the population you specified, first linearly <amt_repeats> times,
		and then using multiprocessing it will run the population <amt_repeats> times each time
		with more cores. (Up until <amount_of_cpus>)

		TODO: get the real evolution time instead of the total as well
		TODO: put the methods in functions and put them in a different file
		@@ -22,96 +22,45 @@ import psutil
		import numpy as np

		from binarycpython.utils.grid import Population
		from scaling_functions import get_mp_results, get_linear_results

		AMT_REPEATS = 5 # Number of times the population will be repeated per cpu
		# number. Useful to get some reliable statistics
		RESOLUTION = {"M_1": 50, "per": 60} # Resolution of sampling of the population
		RESULT_DIR = "scaling_results" # Directory where the results are written to.
		PLOT_DIR = "scaling_plots" # Directory where the plots will be stored
		TESTCASE = "linear vs MP batched" # `name` of the calculation
		STEPSIZE_CPUS = 1 # Stepsize for the cpu number generator. Try to keep this
		from scaling_functions import get_mp_results, get_linear_results, run_systems_for_scaling_comparison
		from plot_scaling import plot_speedup_and_efficiency

		settings_dict = {}
		settings_dict['amt_repeats'] = 1 # Number of times the population will be repeated per cpu
		# number. Better do it several times than only run it once
		settings_dict['resolutions'] = [ # List of resolution of sampling of the population. Useful for checking whether population size has an effect on the results
		{"M_1": 10, "per": 10, "q": 2}
		]
		settings_dict['result_dir'] = "scaling_results" # Relative of absolute directory where results are writting to
		settings_dict['plot_dir'] = "scaling_plots" # Directory where the plots will be stored
		settings_dict['testcase'] = "linear vs MP batched" # 'name' of the calculation. will be used in the plot
		settings_dict['stepsize_cpus'] = 1 # Stepsize for the cpu number generator. Try to keep this
		# low, to get the most reliable results

		AMOUNT_OF_CPUS = 4 # Amount of logical cpus the machine has.
		# AMOUNT_OF_CPUS = psutil.cpu_count()

		AMOUNT_OF_CORES = 2 # The amount of physical cores. This value
		settings_dict['amount_of_cpus'] = 4 # Amount of logical cpus the machine has (this is not the same as physical cpus!)
		# settings_dict['amount_of_cpus'] = psutil.cpu_count()
		settings_dict['amount_of_cores'] = 2 # The amount of physical cores. This value
		# is not vital bit will be used in the plot
		# AMOUNT_OF_CORES = psutil.cpu_count(logical=False) # You can also use the psutil function to get
		# settings_dict['amount_of_cores'] = psutil.cpu_count(logical=False) # You can also use the psutil function to get
		# the amt of physical cores, but this isnt fully
		# reliable (in mar 2020 it didnt get this value
		# right when there were multiple sockets)


		# Some calculated values
		TOTAL_SYSTEMS = int(np.prod([el for el in RESOLUTION.values()]))
		HOSTNAME = socket.gethostname()


		# Generate the range of cpu numbers
		CPU_LIST = np.arange(1, AMOUNT_OF_CPUS+1, STEPSIZE_CPUS)
		if not CPU_LIST[-1] == AMOUNT_OF_CPUS:
		CPU_LIST = np.append(CPU_LIST, np.array([AMOUNT_OF_CPUS]))

		##################################################################
		# Create dictionairy in which to store all the results:
		result_dict = {}

		#
		result_dict["amt_systems"] = TOTAL_SYSTEMS
		result_dict["hostname"] = HOSTNAME
		result_dict["amt_logical_cores"] = AMOUNT_OF_CPUS
		result_dict["amt_of_physical_cores"] = AMOUNT_OF_CORES
		result_dict["testcase"] = TESTCASE

		#################
		# Configuring population
		test_pop = Population()

		test_pop.set(
		verbose=1, binary=1,
		)

		test_pop.add_grid_variable(
		name="M_1",
		longname="log primary mass",
		valuerange=[1, 100],
		resolution="{}".format(RESOLUTION["M_1"]),
		spacingfunc="const(1, 100, {})".format(RESOLUTION["M_1"]),
		probdist="Kroupa2001(M_1)",
		# probdist='self.custom_options["extra_prob_function"](M_1)',
		dphasevol="dlnm1",
		parameter_name="M_1",
		condition="",
		)

		test_pop.add_grid_variable(
		name="period",
		longname="period",
		valuerange=["M_1", 20],
		resolution="{}".format(RESOLUTION["per"]),
		spacingfunc="np.linspace(1, 10, {})".format(RESOLUTION["per"]),
		precode="orbital_period = period**2",
		probdist="flat(orbital_period)",
		parameter_name="orbital_period",
		dphasevol="dper",
		condition='self.grid_options["binary"]==1',
		run_systems_for_scaling_comparison(settings_dict)
		#################################
		# Files
		SCALING_RESULT_DIR = settings_dict['result_dir']
		RESULT_JSONS = [os.path.join(SCALING_RESULT_DIR, file) for file in os.listdir(SCALING_RESULT_DIR) if file.endswith('.json')] # Automatically grab all of the stuff, override it

		# FILENAMES = [
		# "david-Lenovo-IdeaPad-S340-14IWL_100_systems.json",
		# "david-Lenovo-IdeaPad-S340-14IWL_2500_systems.json"
		# ]
		# RESULT_JSONS = []
		# for filename in FILENAMES:
		# RESULT_JSONS.append(os.path.join(os.path.abspath(SCALING_RESULT_DIR), filename))

		plot_speedup_and_efficiency(
		RESULT_JSONS,
		SCALING_RESULT_DIR,
		"Example"
		)
		No newline at end of file

		#######################################################################################
		# Execute grids

		# Linear runs
		LINEAR_TIMES = get_linear_results(test_pop, AMT_REPEATS, TOTAL_SYSTEMS)
		result_dict["linear"] = LINEAR_TIMES

		#######################################################################################
		# MP runs
		MP_DICT = get_mp_results(test_pop, CPU_LIST, AMT_REPEATS, TOTAL_SYSTEMS)
		result_dict["mp"] = MP_DICT

		# Write to file and make sure the directory exists.
		os.makedirs(RESULT_DIR, exist_ok=True)
		with open(os.path.join(RESULT_DIR, "{}_{}_systems.json".format(HOSTNAME, TOTAL_SYSTEMS)), "w") as f:
		f.write(json.dumps(result_dict))