updated code in grid and fixing bugs and commented out some code that isnt working yet (efd60b25) · Commits · binary_c / binary_c-python

binarycpython/utils/custom_logging_functions.py

+3 −1

Original line number	Diff line number	Diff line
		@@ -19,8 +19,10 @@ def autogen_C_logging_code(logging_dict: dict, verbose: int = 0) -> Optional[str
		Input is a dictionary where the key is the header of that logging line
		and items which are lists of parameters that will be put in that logging line

		The list elements are all appended to 'stardata->' in the autogenerated code.

		Example:
		input dictionary should look like this::
		Input dictionary should look like this::

		{'MY_STELLAR_DATA':
		[

binarycpython/utils/grid.py

+527 −474

File changed.

Preview size limit exceeded, changes collapsed.

binarycpython/utils/grid_options_defaults.py

+36 −34

Original line number	Diff line number	Diff line
		@@ -129,43 +129,45 @@ grid_options_defaults_dict = {
		# Slurm stuff
		########################################
		"slurm": 0, # dont use the slurm by default. 1 = use slurm
		"slurm_ntasks": 1, # CPUs required per array job: usually only need this
		"slurm_command": "", # Command that slurm runs (e.g. evolve or join_datafiles)
		"slurm_dir": "", # working directory containing scripts output logs etc.
		"slurm_njobs": 0, # number of scripts; set to 0 as default
		"slurm_jobid": "", # slurm job id (%A)
		"slurm_memory": 512, # in MB, the memory use of the job
		"slurm_warn_max_memory": 1024, # in MB : warn if mem req. > this
		"slurm_use_all_node_CPUs": 0, # 1 = use all of a node's CPUs. 0 = use a given amount of CPUs
		"slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere. want to do it off the slurm grid (e.g. with more RAM)
		"slurm_jobarrayindex": "", # slurm job array index (%a)
		"slurm_jobname": "binary_grid", # default
		"slurm_partition": None,
		"slurm_time": 0, # total time. 0 = infinite time
		"slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script
		"slurm_array": None, # override for --array, useful for rerunning jobs
		"slurm_use_all_node_CPUs": 0, # if given nodes, set to 1
		# if given CPUs, set to 0
		# you will want to use this if your Slurm SelectType is e.g. linear
		# which means it allocates all the CPUs in a node to the job
		"slurm_control_CPUs": 0, # if so, leave this many for Pythons control (0)
		"slurm_array": None, # override for --array, useful for rerunning jobs
		"slurm_partition": None, # MUST be defined
		"slurm_extra_settings": {}, # Place to put extra configuration for the SLURM batch file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so.
		# "slurm_ntasks": 1, # CPUs required per array job: usually only need this
		# "slurm_command": "", # Command that slurm runs (e.g. evolve or join_datafiles)
		# "slurm_dir": "", # working directory containing scripts output logs etc.
		# "slurm_njobs": 0, # number of scripts; set to 0 as default
		# "slurm_jobid": "", # slurm job id (%A)
		# "slurm_memory": 512, # in MB, the memory use of the job
		# "slurm_warn_max_memory": 1024, # in MB : warn if mem req. > this
		# "slurm_use_all_node_CPUs": 0, # 1 = use all of a node's CPUs. 0 = use a given amount of CPUs
		# "slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere. want to do it off the slurm grid (e.g. with more RAM)
		# "slurm_jobarrayindex": "", # slurm job array index (%a)
		# "slurm_jobname": "binary_grid", # default
		# "slurm_partition": None,
		# "slurm_time": 0, # total time. 0 = infinite time
		# "slurm_postpone_sbatch": 0, # if 1: don't submit, just make the script
		# "slurm_array": None, # override for --array, useful for rerunning jobs
		# "slurm_use_all_node_CPUs": 0, # if given nodes, set to 1
		# # if given CPUs, set to 0
		# # you will want to use this if your Slurm SelectType is e.g. linear
		# # which means it allocates all the CPUs in a node to the job
		# "slurm_control_CPUs": 0, # if so, leave this many for Pythons control (0)
		# "slurm_array": None, # override for --array, useful for rerunning jobs
		# "slurm_partition": None, # MUST be defined
		# "slurm_extra_settings": {}, # Place to put extra configuration for the SLURM batch file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so.
		########################################
		# Condor stuff
		########################################
		"condor": 0, # 1 to use condor, 0 otherwise
		"condor_command": "", # condor command e.g. "evolve", "join"
		"condor_dir": "", # working directory containing e.g. scripts, output, logs (e.g. should be NFS available to all)
		"condor_njobs": "", # number of scripts/jobs that CONDOR will run in total
		"condor_jobid": "", # condor job id
		"condor_postpone_join": 0, # if 1, data is not joined, e.g. if you want to do it off the condor grid (e.g. with more RAM)
		# "condor_join_machine": None, # if defined then this is the machine on which the join command should be launched (must be sshable and not postponed)
		"condor_join_pwd": "", # directory the join should be in (defaults to $ENV{PWD} if undef)
		"condor_memory": 1024, # in MB, the memory use (ImageSize) of the job
		"condor_universe": "vanilla", # usually vanilla universe
		"condor_extra_settings": {}, # Place to put extra configuration for the CONDOR submit file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so.
		# "condor_command": "", # condor command e.g. "evolve", "join"
		# "condor_dir": "", # working directory containing e.g. scripts, output, logs (e.g. should be NFS available to all)
		# "condor_njobs": "", # number of scripts/jobs that CONDOR will run in total
		# "condor_jobid": "", # condor job id
		# "condor_postpone_join": 0, # if 1, data is not joined, e.g. if you want to do it off the condor grid (e.g. with more RAM)
		# # "condor_join_machine": None, # if defined then this is the machine on which the join command should be launched (must be sshable and not postponed)
		# "condor_join_pwd": "", # directory the join should be in (defaults to $ENV{PWD} if undef)
		# "condor_memory": 1024, # in MB, the memory use (ImageSize) of the job
		# "condor_universe": "vanilla", # usually vanilla universe
		# "condor_extra_settings": {}, # Place to put extra configuration for the CONDOR submit file. The key and value of the dict will become the key and value of the line in te slurm batch file. Will be put in after all the other settings (and before the command). Take care not to overwrite something without really meaning to do so.


		# snapshots and checkpoints
		# condor_snapshot_on_kill=>0, # if 1 snapshot on SIGKILL before exit
		# condor_load_from_snapshot=>0, # if 1 check for snapshot .sv file and load it if found
		@@ -456,7 +458,7 @@ grid_options_descriptions = {
		"_store_memaddr": "Memory adress of the store object for binary_c.",
		"failed_systems_threshold": "Variable storing the maximum amount of systems that are allowed to fail before logging their commandline arguments to failed_systems log files",
		"parse_function": "Function that the user can provide to handle the output the binary_c. This function has to take the arguments (self, output). Its best not to return anything in this function, and just store stuff in the grid_options['results'] dictionary, or just output results to a file",
		"condor": "Int flag whether to use a condor type population evolution.", # TODO: describe this in more detail
		"condor": "Int flag whether to use a condor type population evolution. Not implemented yet.", # TODO: describe this in more detail
		"slurm": "Int flag whether to use a slurm type population evolution.", # TODO: describe this in more detail
		"weight": "Weight factor for each system. The calculated probability is mulitplied by this. If the user wants each system to be repeated several times, then this variable should not be changed, rather change the _repeat variable instead, as that handles the reduction in probability per system. This is useful for systems that have a process with some random element in it.", # TODO: add more info here, regarding the evolution splitting.
		"repeat": "Factor of how many times a system should be repeated. Consider the evolution splitting binary_c argument for supernovae kick repeating.", # TODO: make sure this is used.

binarycpython/utils/hpc_functions.py

+131 −131

Original line number	Diff line number	Diff line
		"""
		File containing functions for HPC computing, distributed tasks on clusters etc.

		Functions that the slurm and condor subroutines of the population object use.

		Mainly divided in 2 sections: Slurm and Condor
		"""

		import os
		import sys
		import time
		import subprocess
		from typing import Union
		import __main__ as main


		def get_slurm_version() -> Union[str, None]:
		"""
		Function that checks whether slurm is installed and returns the version if its installed.

		Only tested this with slurm v17+

		Returns:
		slurm version, or None
		"""

		slurm_version = None

		try:
		slurm_version = (
		subprocess.run(["sinfo", "-V"], stdout=subprocess.PIPE, check=True)
		.stdout.decode("utf-8")
		.split()
		)[1]
		except FileNotFoundError as err:
		print(err)
		print(err.args)
		print("Slurm is not installed or not loaded")
		except Exception as err:
		print(err)
		print(err.args)
		print("Unknown error, contact me about this")
		# """
		# File containing functions for HPC computing, distributed tasks on clusters etc.

		# Functions that the slurm and condor subroutines of the population object use.

		# Mainly divided in 2 sections: Slurm and Condor
		# """

		# import os
		# import sys
		# import time
		# import subprocess
		# from typing import Union
		# import __main__ as main


		# def get_slurm_version() -> Union[str, None]:
		# """
		# Function that checks whether slurm is installed and returns the version if its installed.

		# Only tested this with slurm v17+

		# Returns:
		# slurm version, or None
		# """

		# slurm_version = None

		# try:
		# slurm_version = (
		# subprocess.run(["sinfo", "-V"], stdout=subprocess.PIPE, check=True)
		# .stdout.decode("utf-8")
		# .split()
		# )[1]
		# except FileNotFoundError as err:
		# print(err)
		# print(err.args)
		# print("Slurm is not installed or not loaded")
		# except Exception as err:
		# print(err)
		# print(err.args)
		# print("Unknown error, contact me about this")

		return slurm_version
		# return slurm_version


		def get_condor_version() -> Union[str, None]:
		"""
		Function that checks whether slurm is installed and returns the version if its installed.
		# def get_condor_version() -> Union[str, None]:
		# """
		# Function that checks whether slurm is installed and returns the version if its installed.

		otherwise returns None
		# otherwise returns None

		Result has to be condor v8 or higher
		# Result has to be condor v8 or higher

		Returns:
		condor version, or None
		"""
		# Returns:
		# condor version, or None
		# """

		condor_version = None
		# condor_version = None

		try:
		condor_version = (
		subprocess.run(
		["condor_q", "--version"], stdout=subprocess.PIPE, check=True
		)
		.stdout.decode("utf-8")
		.split()
		)[1]
		except FileNotFoundError as err:
		print("Slurm is not installed or not loaded: ")
		print(err)
		print(err.args)
		except Exception as err:
		print("Unknown error, contact me about this: ")
		print(err)
		print(err.args)
		# try:
		# condor_version = (
		# subprocess.run(
		# ["condor_q", "--version"], stdout=subprocess.PIPE, check=True
		# )
		# .stdout.decode("utf-8")
		# .split()
		# )[1]
		# except FileNotFoundError as err:
		# print("Slurm is not installed or not loaded: ")
		# print(err)
		# print(err.args)
		# except Exception as err:
		# print("Unknown error, contact me about this: ")
		# print(err)
		# print(err.args)

		return condor_version
		# return condor_version


		def create_directories_hpc(working_dir: str) -> None:
		"""
		Function to create a set of directories, given a root directory
		# def create_directories_hpc(working_dir: str) -> None:
		# """
		# Function to create a set of directories, given a root directory

		These directories will contain stuff for the HPC runs
		# These directories will contain stuff for the HPC runs

		Args:
		working_dir: main working directory of the run. Under this directory all the dirs will be created
		"""
		# Args:
		# working_dir: main working directory of the run. Under this directory all the dirs will be created
		# """

		# Check if working_dir exists
		if not os.path.isdir(working_dir):
		print("Error. Working directory {} does not exist! Aborting")
		raise ValueError
		# # Check if working_dir exists
		# if not os.path.isdir(working_dir):
		# print("Error. Working directory {} does not exist! Aborting")
		# raise ValueError

		directories_list = [
		"scripts",
		"stdout",
		"stderr",
		"results",
		"logs",
		"status",
		"joining",
		]
		# directories_list = [
		# "scripts",
		# "stdout",
		# "stderr",
		# "results",
		# "logs",
		# "status",
		# "joining",
		# ]

		# Make directories.
		for subdir in directories_list:
		full_path = os.path.join(working_dir, subdir)
		os.makedirs(full_path, exist_ok=True)

		# Since the directories are probably made on some mount which has to go over NFS
		# we should explicitly check if they are created
		print("Checking if creating the directories has finished...")
		directories_exist = False
		while directories_exist:
		directories_exist = True

		for subdir in directories_list:
		full_path = os.path.join(working_dir, subdir)

		if not os.path.isdir(full_path):
		time.sleep(1)
		directories_exist = False
		print("..Finished! Directories exist.")


		def path_of_calling_script() -> str:
		"""
		Function to get the name of the script the user executes.
		TODO: fix this function. seems not to work properly.
		"""

		return main.__file__


		def get_python_details() -> dict:
		"""
		Function to get some info about the used python version and virtualenv etc
		# # Make directories.
		# for subdir in directories_list:
		# full_path = os.path.join(working_dir, subdir)
		# os.makedirs(full_path, exist_ok=True)

		# # Since the directories are probably made on some mount which has to go over NFS
		# # we should explicitly check if they are created
		# print("Checking if creating the directories has finished...")
		# directories_exist = False
		# while directories_exist:
		# directories_exist = True

		# for subdir in directories_list:
		# full_path = os.path.join(working_dir, subdir)

		# if not os.path.isdir(full_path):
		# time.sleep(1)
		# directories_exist = False
		# print("..Finished! Directories exist.")


		# def path_of_calling_script() -> str:
		# """
		# Function to get the name of the script the user executes.
		# TODO: fix this function. seems not to work properly.
		# """

		# return main.__file__


		# def get_python_details() -> dict:
		# """
		# Function to get some info about the used python version and virtualenv etc

		Returns:
		dictionary with python executable, virtual environment and version information.
		"""
		# Returns:
		# dictionary with python executable, virtual environment and version information.
		# """

		python_info_dict = {}
		# python_info_dict = {}

		#
		python_info_dict["virtualenv"] = os.getenv("VIRTUAL_ENV")
		python_info_dict["executable"] = sys.executable
		python_info_dict["version"] = sys.version
		# #
		# python_info_dict["virtualenv"] = os.getenv("VIRTUAL_ENV")
		# python_info_dict["executable"] = sys.executable
		# python_info_dict["version"] = sys.version

		return python_info_dict
		# return python_info_dict