Commit dbf77cdf authored by David Hendriks's avatar David Hendriks
Browse files

busy with hpc slurm and condor stuff

parent 7d30dc17
Loading
Loading
Loading
Loading
+16 −6
Original line number Diff line number Diff line
@@ -118,27 +118,37 @@ grid_options_defaults_dict = {
    ########################################
    # Slurm stuff
    ########################################
    "slurm": 0,  # dont use the slurm by default
    slurm_ntasks=>1, # 1 CPU required per job
    slurm_partition=>'all', # MUST be defined
    slurm_jobname=>'binary_grid', # not required but useful
    slurm_use_all_node_CPUs=>0, # if given nodes, set to 1
                                # if given CPUs, set to 0

    "slurm": 0,  # dont use the slurm by default. 1 = use slurm
    "slurm_command": "",  # Command that slurm runs (e.g. run_flexigrid or join_datafiles)
    "slurm_dir": "",  # working directory containin scripts output logs etc.
    # slurm_njobs=>'', # number of scripts
    # slurm_jobid=>'', # slurm job id (%A)
    "slurm_njobs": 0, # number of scripts; set to 0 as default
    "slurm_jobid": '', # slurm job id (%A)
    "slurm_memory": 512, # in MB, the memory use of the job
    "slurm_warn_max_memory": 1024, # in MB : warn if mem req. > this
    "slurm_use_all_node_CPUs": 0, # 1 = use all of a node's CPUs. 0 = use a given amount of CPUs
    "slurm_postpone_join": 0, # if 1 do not join on slurm, join elsewhere

    # slurm_jobarrayindex=>'', # slurm job array index (%a)
    #     slurm_jobname=>'binary_grid', # set to binary_grid
    #     slurm_postpone_join=>0, # if 1, data is not joined, e.g. if you
    # # want to do it off the slurm grid (e.g. with more RAM)
    #     slurm_postpone_sbatch=>0, # if 1, don't submit, just make the script
    # # (defaults to $ENV{PWD} if undef)
    # slurm_memory=>512, # in MB, the memory use of the job
    #     slurm_warn_max_memory=>1024, # in MB : warn if mem req. > this
    #     slurm_partition=>undef,
    #     slurm_ntasks=>1, # 1 CPU required per array job: usually only need this
    #     slurm_time=>0, # 0 = infinite time
    # slurm_use_all_node_CPUs=>0, # 1 = use all of a node's CPUs (0)
    # # you will want to use this if your Slurm SelectType is e.g. linear
    # # which means it allocates all the CPUs in a node to the job
    # slurm_control_CPUs=>0, # if so, leave this many for Perl control (0)
    #     slurm_array=>undef,# override for --array, useful for rerunning jobs


    ########################################
    # Condor stuff
    ########################################
+52 −0
Original line number Diff line number Diff line
"""
File containing functions for HPC computing, distributed tasks on clusters etc.

Mainly divided in 2 sections: Slurm and Condor
"""

import os
import time

class slurm_grid():
    



def create_directories_hpc(working_dir):
    """
    Function to create a set of directories, given a root directory
    
    These directories will contain stuff for the HPC runs
    """

    # 
    if not os.path.exist(working_dir):
        print("Error. Working directory {} does not exist! Aborting")
        raise ValueError

    directories_list = ['scripts','stdout','stderr','results','logs','status','joining']

    # 
    for subdir in directories_list:
        full_path = os.path.join(working_dir, subdir)
        os.makedirs(full_path, exist_ok=False)

    # Since the directories are probably made on some mount which has to go over NFS
    # we should explicitly check if they are created

    print("Waiting for directories")
    directories_exist = False
    while directories_exist:
        directories_exist = True

        for subdir in directories_list:
            full_path = os.path.join(working_dir, subdir)

            if not os.path.exist(full_path):
                time.sleep(1)
                directories_exist = False

    print("Directories exist")