busy with hpc slurm and condor stuff (dbf77cdf) · Commits · binary_c / binary_c-python

binarycpython/utils/grid_options_defaults.py

+16 −6

Original line number	Diff line number	Diff line
		@@ -118,27 +118,37 @@ grid_options_defaults_dict = {
		########################################
		# Slurm stuff
		########################################
		"slurm": 0, # dont use the slurm by default
		slurm_ntasks=>1, # 1 CPU required per job
		slurm_partition=>'all', # MUST be defined
		slurm_jobname=>'binary_grid', # not required but useful
		slurm_use_all_node_CPUs=>0, # if given nodes, set to 1
		# if given CPUs, set to 0

		"slurm": 0, # dont use the slurm by default. 1 = use slurm
		"slurm_command": "", # Command that slurm runs (e.g. run_flexigrid or join_datafiles)
		"slurm_dir": "", # working directory containin scripts output logs etc.
		# slurm_njobs=>'', # number of scripts
		# slurm_jobid=>'', # slurm job id (%A)
		"slurm_njobs": 0, # number of scripts; set to 0 as default
		"slurm_jobid": '', # slurm job id (%A)
		"slurm_memory": 512, # in MB, the memory use of the job
		"slurm_warn_max_memory": 1024, # in MB : warn if mem req. > this
		"slurm_use_all_node_CPUs": 0, # 1 = use all of a node's CPUs. 0 = use a given amount of CPUs
		"slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere

		# slurm_jobarrayindex=>'', # slurm job array index (%a)
		# slurm_jobname=>'binary_grid', # set to binary_grid
		# slurm_postpone_join=>0, # if 1, data is not joined, e.g. if you
		# # want to do it off the slurm grid (e.g. with more RAM)
		# slurm_postpone_sbatch=>0, # if 1, don't submit, just make the script
		# # (defaults to $ENV{PWD} if undef)
		# slurm_memory=>512, # in MB, the memory use of the job
		# slurm_warn_max_memory=>1024, # in MB : warn if mem req. > this
		# slurm_partition=>undef,
		# slurm_ntasks=>1, # 1 CPU required per array job: usually only need this
		# slurm_time=>0, # 0 = infinite time
		# slurm_use_all_node_CPUs=>0, # 1 = use all of a node's CPUs (0)
		# # you will want to use this if your Slurm SelectType is e.g. linear
		# # which means it allocates all the CPUs in a node to the job
		# slurm_control_CPUs=>0, # if so, leave this many for Perl control (0)
		# slurm_array=>undef,# override for --array, useful for rerunning jobs


		########################################
		# Condor stuff
		########################################

binarycpython/utils/hpc.py

0 → 100644

+52 −0

Original line number	Diff line number	Diff line
		"""
		File containing functions for HPC computing, distributed tasks on clusters etc.

		Mainly divided in 2 sections: Slurm and Condor
		"""

		import os
		import time

		class slurm_grid():




		def create_directories_hpc(working_dir):
		"""
		Function to create a set of directories, given a root directory

		These directories will contain stuff for the HPC runs
		"""

		#
		if not os.path.exist(working_dir):
		print("Error. Working directory {} does not exist! Aborting")
		raise ValueError

		directories_list = ['scripts','stdout','stderr','results','logs','status','joining']

		#
		for subdir in directories_list:
		full_path = os.path.join(working_dir, subdir)
		os.makedirs(full_path, exist_ok=False)

		# Since the directories are probably made on some mount which has to go over NFS
		# we should explicitly check if they are created

		print("Waiting for directories")
		directories_exist = False
		while directories_exist:
		directories_exist = True

		for subdir in directories_list:
		full_path = os.path.join(working_dir, subdir)

		if not os.path.exist(full_path):
		time.sleep(1)
		directories_exist = False

		print("Directories exist")