Skip to content
Snippets Groups Projects
Commit ddc42de5 authored by Berk Hess's avatar Berk Hess Committed by Mark Abraham
Browse files

Properly reset CUDA application clocks

We now store the application clock values we read when starting mdrun
and reset to these values, but only when clocks have not been changed
(by another process) in the meantime.

Fixes #1846.

Change-Id: I722d7153202e8f4c6a5330948dcbef06bb6acf28
parent f16daabd
No related branches found
No related tags found
Loading
/*
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2012,2014,2015, by the GROMACS development team, led by
* Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
......@@ -125,7 +125,11 @@ struct gmx_device_info_t
cudaDeviceProp prop; /* CUDA device properties */
int stat; /* result of the device check */
gmx_bool nvml_initialized; /* If NVML was initialized */
gmx_bool nvml_ap_clocks_changed; /* If application clocks have been changed */
unsigned int nvml_orig_app_sm_clock; /* The original SM clock before we changed it */
unsigned int nvml_orig_app_mem_clock; /* The original memory clock before we changed it */
gmx_bool nvml_app_clocks_changed; /* If application clocks have been changed */
unsigned int nvml_set_app_sm_clock; /* The SM clock we set */
unsigned int nvml_set_app_mem_clock; /* The memory clock we set */
#ifdef HAVE_NVML
nvmlDevice_t nvml_device_id; /* NVML device id */
nvmlEnableState_t nvml_is_restricted; /* Status of application clocks permission */
......
/*
* This file is part of the GROMACS molecular simulation package.
*
* Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
* Copyright (c) 2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
......@@ -298,6 +298,31 @@ static bool addNVMLDeviceId(gmx_device_info_t* cuda_dev)
}
return cuda_dev->nvml_initialized;
}
/*! \brief Reads and returns the application clocks for device.
*
* \param[in] device The GPU device
* \param[out] app_sm_clock The current application SM clock
* \param[out] app_mem_clock The current application memory clock
* \returns if applacation clocks are supported
*/
static bool getApplicationClocks(const gmx_device_info_t *cuda_dev,
unsigned int *app_sm_clock,
unsigned int *app_mem_clock)
{
nvmlReturn_t nvml_stat;
nvml_stat = nvmlDeviceGetApplicationsClock(cuda_dev->nvml_device_id, NVML_CLOCK_SM, app_sm_clock);
if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
{
return false;
}
HANDLE_NVML_RET_ERR(nvml_stat, "nvmlDeviceGetApplicationsClock failed");
nvml_stat = nvmlDeviceGetApplicationsClock(cuda_dev->nvml_device_id, NVML_CLOCK_MEM, app_mem_clock);
HANDLE_NVML_RET_ERR(nvml_stat, "nvmlDeviceGetApplicationsClock failed");
return true;
}
#endif /* HAVE_NVML_APPLICATION_CLOCKS */
/*! \brief Tries to set application clocks for the GPU with the given index.
......@@ -366,52 +391,52 @@ static gmx_bool init_gpu_application_clocks(FILE gmx_unused *fplog, int gmx_unus
{
return false;
}
if (!addNVMLDeviceId( &(gpu_info->gpu_dev[gpuid])))
gmx_device_info_t *cuda_dev = &(gpu_info->gpu_dev[gpuid]);
if (!addNVMLDeviceId(cuda_dev))
{
return false;
}
//get current application clocks setting
unsigned int app_sm_clock = 0;
unsigned int app_mem_clock = 0;
nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &app_sm_clock );
if (NVML_ERROR_NOT_SUPPORTED == nvml_stat)
if (!getApplicationClocks(cuda_dev,
&cuda_dev->nvml_orig_app_sm_clock,
&cuda_dev->nvml_orig_app_mem_clock))
{
return false;
}
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
nvml_stat = nvmlDeviceGetApplicationsClock ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &app_mem_clock );
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
//get max application clocks
unsigned int max_sm_clock = 0;
unsigned int max_mem_clock = 0;
nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_SM, &max_sm_clock );
nvml_stat = nvmlDeviceGetMaxClockInfo(cuda_dev->nvml_device_id, NVML_CLOCK_SM, &max_sm_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
nvml_stat = nvmlDeviceGetMaxClockInfo ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock );
nvml_stat = nvmlDeviceGetMaxClockInfo(cuda_dev->nvml_device_id, NVML_CLOCK_MEM, &max_mem_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetMaxClockInfo failed" );
gpu_info->gpu_dev[gpuid].nvml_is_restricted = NVML_FEATURE_ENABLED;
gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = false;
cuda_dev->nvml_is_restricted = NVML_FEATURE_ENABLED;
cuda_dev->nvml_app_clocks_changed = false;
nvml_stat = nvmlDeviceGetAPIRestriction ( gpu_info->gpu_dev[gpuid].nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(gpu_info->gpu_dev[gpuid].nvml_is_restricted) );
nvml_stat = nvmlDeviceGetAPIRestriction(cuda_dev->nvml_device_id, NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &(cuda_dev->nvml_is_restricted));
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetAPIRestriction failed" );
/* Note: Distinguishing between different types of GPUs here might be necessary in the future,
e.g. if max application clocks should not be used for certain GPUs. */
if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock && gpu_info->gpu_dev[gpuid].nvml_is_restricted == NVML_FEATURE_DISABLED)
if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock && cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED)
{
md_print_info( fplog, "Changing GPU application clocks for %s to (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, max_mem_clock, max_sm_clock);
nvml_stat = nvmlDeviceSetApplicationsClocks ( gpu_info->gpu_dev[gpuid].nvml_device_id, max_mem_clock, max_sm_clock );
md_print_info(fplog, "Changing GPU application clocks for %s to (%d,%d)\n", cuda_dev->prop.name, max_mem_clock, max_sm_clock);
nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, max_mem_clock, max_sm_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
gpu_info->gpu_dev[gpuid].nvml_ap_clocks_changed = true;
cuda_dev->nvml_app_clocks_changed = true;
cuda_dev->nvml_set_app_sm_clock = max_sm_clock;
cuda_dev->nvml_set_app_mem_clock = max_mem_clock;
}
else if (nvml_stat == NVML_SUCCESS && app_sm_clock < max_sm_clock)
else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock < max_sm_clock)
{
md_print_warn( fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock, max_mem_clock, max_sm_clock);
md_print_warn(fplog, "Can not change application clocks for %s to optimal values due to insufficient permissions. Current values are (%d,%d), max values are (%d,%d).\nUse sudo nvidia-smi -acp UNRESTRICTED or contact your admin to change application clocks.\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock, max_mem_clock, max_sm_clock);
}
else if (nvml_stat == NVML_SUCCESS && app_sm_clock == max_sm_clock)
else if (nvml_stat == NVML_SUCCESS && cuda_dev->nvml_orig_app_sm_clock == max_sm_clock)
{
//TODO: This should probably be integrated into the GPU Properties table.
md_print_info( fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", gpu_info->gpu_dev[gpuid].prop.name, app_mem_clock, app_sm_clock);
md_print_info(fplog, "Application clocks (GPU clocks) for %s are (%d,%d)\n", cuda_dev->prop.name, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
}
else
{
......@@ -434,10 +459,20 @@ static gmx_bool reset_gpu_application_clocks(const gmx_device_info_t gmx_unused
nvmlReturn_t nvml_stat = NVML_SUCCESS;
if (cuda_dev &&
cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED &&
cuda_dev->nvml_ap_clocks_changed)
cuda_dev->nvml_app_clocks_changed)
{
/* Check if the clocks are still what we set them to.
* If so, set them back to the state we originally found them in.
* If not, don't touch them, because something else set them later.
*/
unsigned int app_sm_clock, app_mem_clock;
getApplicationClocks(cuda_dev, &app_sm_clock, &app_mem_clock);
if (app_sm_clock == cuda_dev->nvml_set_app_sm_clock &&
app_mem_clock == cuda_dev->nvml_set_app_mem_clock)
{
nvml_stat = nvmlDeviceResetApplicationsClocks( cuda_dev->nvml_device_id );
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceResetApplicationsClocks failed" );
nvml_stat = nvmlDeviceSetApplicationsClocks(cuda_dev->nvml_device_id, cuda_dev->nvml_orig_app_mem_clock, cuda_dev->nvml_orig_app_sm_clock);
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetApplicationsClock failed" );
}
}
nvml_stat = nvmlShutdown();
HANDLE_NVML_RET_ERR( nvml_stat, "nvmlShutdown failed" );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment