/*****************************************************************************\ * gres_mps.c - Support MPS as a generic resources. * MPS or CUDA Multi-Process Services is a mechanism to share GPUs. ***************************************************************************** * Copyright (C) 2018 SchedMD LLC * Written by Morris Jette * * This file is part of Slurm, a resource management program. * For details, see . * Please also read the included file: DISCLAIMER. * * Slurm is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * In addition, as a special exception, the copyright holders give permission * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than * OpenSSL. If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source files in * the program, then also delete it here. * * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with Slurm; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include "slurm/slurm.h" #include "slurm/slurm_errno.h" #include "src/common/slurm_xlator.h" #include "src/common/bitstring.h" #include "src/common/env.h" #include "src/common/gres.h" #include "src/common/hostlist.h" #include "src/common/list.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "../common/gres_common.h" /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. * * plugin_name - A string giving a human-readable description of the * plugin. There is no maximum length, but the symbol must refer to * a valid string. * * plugin_type - A string suggesting the type of the plugin or its * applicability to a particular form of data or method of data handling. * If the low-level plugin API is used, the contents of this string are * unimportant and may be anything. Slurm uses the higher-level plugin * interface which requires this string to be of the form * * / * * where is a description of the intended application of * the plugin (e.g., "auth" for Slurm authentication) and is a * description of how this plugin satisfies that application. Slurm will * only load authentication plugins if the plugin_type string has a prefix * of "auth/". * * plugin_version - an unsigned 32-bit integer containing the Slurm version * (major.minor.micro combined into a single number). */ const char *plugin_name = "Gres MPS plugin"; const char *plugin_type = "gres/mps"; const uint32_t plugin_version = SLURM_VERSION_NUMBER; static uint64_t debug_flags = 0; static char *gres_name = "mps"; static List gres_devices = NULL; static List mps_info = NULL; typedef struct mps_dev_info { uint64_t count; int id; } mps_dev_info_t; static void _delete_gres_list(void *x) { gres_slurmd_conf_t *p = (gres_slurmd_conf_t *) x; xfree(p->cpus); FREE_NULL_BITMAP(p->cpus_bitmap); xfree(p->file); xfree(p->links); xfree(p->name); xfree(p->type_name); xfree(p); } /* * Convert all GPU records to a new entries in a list where each File is a * unique device (i.e. convert a record with "File=nvidia[0-3]" into 4 separate * records). */ static List _build_gpu_list(List gres_list) { ListIterator itr; gres_slurmd_conf_t *gres_record, *gpu_record; List gpu_list; hostlist_t hl; char *f_name; bool log_fname = true; if (gres_list == NULL) return NULL; gpu_list = list_create(_delete_gres_list); itr = list_iterator_create(gres_list); while ((gres_record = list_next(itr))) { if (xstrcmp(gres_record->name, "gpu")) continue; if (!gres_record->file) { if (log_fname) { error("%s: GPU configuration lacks \"File\" specification", plugin_name); log_fname = false; } continue; } hl = hostlist_create(gres_record->file); while ((f_name = hostlist_shift(hl))) { gpu_record = xmalloc(sizeof(gres_slurmd_conf_t)); gpu_record->config_flags = gres_record->config_flags; if (gres_record->type_name) { gpu_record->config_flags |= GRES_CONF_HAS_TYPE; } gpu_record->count = 1; gpu_record->cpu_cnt = gres_record->cpu_cnt; gpu_record->cpus = xstrdup(gres_record->cpus); if (gres_record->cpus_bitmap) { gpu_record->cpus_bitmap = bit_copy(gres_record->cpus_bitmap); } gpu_record->file = xstrdup(f_name); gpu_record->links = xstrdup(gres_record->links); gpu_record->name = xstrdup(gres_record->name); gpu_record->plugin_id = gres_record->plugin_id; gpu_record->type_name = xstrdup(gres_record->type_name); list_append(gpu_list, gpu_record); free(f_name); } hostlist_destroy(hl); (void) list_delete_item(itr); } list_iterator_destroy(itr); return gpu_list; } /* * Convert all MPS records to a new entries in a list where each File is a * unique device (i.e. convert a record with "File=nvidia[0-3]" into 4 separate * records). Similar to _build_gpu_list(), but we copy more fields, divide the * "Count" across all MPS records and remove from the original list. */ static List _build_mps_list(List gres_list) { ListIterator itr; gres_slurmd_conf_t *gres_record, *mps_record; List mps_list; hostlist_t hl; char *f_name; uint64_t count_per_file; int mps_no_file_recs = 0, mps_file_recs = 0; if (gres_list == NULL) return NULL; mps_list = list_create(_delete_gres_list); itr = list_iterator_create(gres_list); while ((gres_record = list_next(itr))) { if (xstrcmp(gres_record->name, "mps")) continue; if (!gres_record->file) { if (mps_no_file_recs) fatal("gres/mps: bad configuration, multiple configurations without \"File\""); if (mps_file_recs) fatal("gres/mps: multiple configurations with and without \"File\""); mps_no_file_recs++; mps_record = xmalloc(sizeof(gres_slurmd_conf_t)); mps_record->config_flags = gres_record->config_flags; if (gres_record->type_name) mps_record->config_flags |= GRES_CONF_HAS_TYPE; mps_record->count = gres_record->count; mps_record->cpu_cnt = gres_record->cpu_cnt; mps_record->cpus = xstrdup(gres_record->cpus); if (gres_record->cpus_bitmap) { mps_record->cpus_bitmap = bit_copy(gres_record->cpus_bitmap); } mps_record->name = xstrdup(gres_record->name); mps_record->plugin_id = gres_record->plugin_id; mps_record->type_name = xstrdup(gres_record->type_name); list_append(mps_list, mps_record); } else { mps_file_recs++; if (mps_no_file_recs) fatal("gres/mps: multiple configurations with and without \"File\""); hl = hostlist_create(gres_record->file); count_per_file = gres_record->count/hostlist_count(hl); while ((f_name = hostlist_shift(hl))) { mps_record =xmalloc(sizeof(gres_slurmd_conf_t)); mps_record->config_flags = gres_record->config_flags; if (gres_record->type_name) { mps_record->config_flags |= GRES_CONF_HAS_TYPE; } mps_record->count = count_per_file; mps_record->cpu_cnt = gres_record->cpu_cnt; mps_record->cpus = xstrdup(gres_record->cpus); if (gres_record->cpus_bitmap) { mps_record->cpus_bitmap = bit_copy(gres_record->cpus_bitmap); } mps_record->file = xstrdup(f_name); mps_record->name = xstrdup(gres_record->name); mps_record->plugin_id = gres_record->plugin_id; mps_record->type_name = xstrdup(gres_record->type_name); list_append(mps_list, mps_record); free(f_name); } hostlist_destroy(hl); } (void) list_delete_item(itr); } list_iterator_destroy(itr); return mps_list; } /* * Count of gres/mps records is zero, remove them from GRES list sent to * slurmctld daemon. */ static void _remove_mps_recs(List gres_list) { ListIterator itr; gres_slurmd_conf_t *gres_record; if (gres_list == NULL) return; itr = list_iterator_create(gres_list); while ((gres_record = list_next(itr))) { if (!xstrcmp(gres_record->name, "mps")) { (void) list_delete_item(itr); } } list_iterator_destroy(itr); } /* Distribute MPS Count to records on original list */ static void _distribute_count(List gres_conf_list, List gpu_conf_list, uint64_t count) { ListIterator gpu_itr; gres_slurmd_conf_t *gpu_record, *mps_record; int rem_gpus = list_count(gpu_conf_list); gpu_itr = list_iterator_create(gpu_conf_list); while ((gpu_record = list_next(gpu_itr))) { mps_record = xmalloc(sizeof(gres_slurmd_conf_t)); mps_record->config_flags = gpu_record->config_flags; mps_record->count = count / rem_gpus; count -= mps_record->count; rem_gpus--; mps_record->cpu_cnt = gpu_record->cpu_cnt; mps_record->cpus = xstrdup(gpu_record->cpus); if (gpu_record->cpus_bitmap) { mps_record->cpus_bitmap = bit_copy(gpu_record->cpus_bitmap); } mps_record->file = xstrdup(gpu_record->file); mps_record->name = xstrdup("mps"); mps_record->plugin_id = gres_plugin_build_id("mps"); mps_record->type_name = xstrdup(gpu_record->type_name); list_append(gres_conf_list, mps_record); list_append(gres_conf_list, gpu_record); (void) list_remove(gpu_itr); } list_iterator_destroy(gpu_itr); } /* Merge MPS records back to original list, updating and reordering as needed */ static int _merge_lists(List gres_conf_list, List gpu_conf_list, List mps_conf_list) { ListIterator gpu_itr, mps_itr; gres_slurmd_conf_t *gpu_record, *mps_record; if (!list_count(gpu_conf_list) && list_count(mps_conf_list)) { error("%s: MPS specified without any GPU found", plugin_name); return SLURM_ERROR; } /* * If gres/mps has Count, but no File specification, then evenly * distribute gres/mps Count over all gres/gpu file records */ if (list_count(mps_conf_list) == 1) { mps_record = list_peek(mps_conf_list); if (!mps_record->file) { _distribute_count(gres_conf_list, gpu_conf_list, mps_record->count); list_flush(mps_conf_list); return SLURM_SUCCESS; } } /* Add MPS records, matching File ordering to that of GPU records */ gpu_itr = list_iterator_create(gpu_conf_list); while ((gpu_record = list_next(gpu_itr))) { mps_itr = list_iterator_create(mps_conf_list); while ((mps_record = list_next(mps_itr))) { if (!xstrcmp(gpu_record->file, mps_record->file)) { /* Copy gres/gpu Type & CPU info to gres/mps */ if (gpu_record->type_name) { mps_record->config_flags |= GRES_CONF_HAS_TYPE; } if (gpu_record->cpus) { xfree(mps_record->cpus); mps_record->cpus = xstrdup(gpu_record->cpus); } if (gpu_record->cpus_bitmap) { mps_record->cpu_cnt = gpu_record->cpu_cnt; FREE_NULL_BITMAP( mps_record->cpus_bitmap); mps_record->cpus_bitmap = bit_copy(gpu_record->cpus_bitmap); } xfree(mps_record->type_name); mps_record->type_name = xstrdup(gpu_record->type_name); list_append(gres_conf_list, mps_record); (void) list_remove(mps_itr); break; } } list_iterator_destroy(mps_itr); if (!mps_record) { /* Add gres/mps record to match gres/gps record */ mps_record = xmalloc(sizeof(gres_slurmd_conf_t)); mps_record->config_flags = gpu_record->config_flags; mps_record->count = 0; mps_record->cpu_cnt = gpu_record->cpu_cnt; mps_record->cpus = xstrdup(gpu_record->cpus); if (gpu_record->cpus_bitmap) { mps_record->cpus_bitmap = bit_copy(gpu_record->cpus_bitmap); } mps_record->file = xstrdup(gpu_record->file); mps_record->name = xstrdup("mps"); mps_record->plugin_id = gres_plugin_build_id("mps"); mps_record->type_name = xstrdup(gpu_record->type_name); list_append(gres_conf_list, mps_record); } list_append(gres_conf_list, gpu_record); (void) list_remove(gpu_itr); } list_iterator_destroy(gpu_itr); /* Remove any remaining MPS records (no matching File) */ mps_itr = list_iterator_create(mps_conf_list); while ((mps_record = list_next(mps_itr))) { error("%s: Discarding gres/mps configuration (File=%s) without matching gres/gpu record", plugin_name, mps_record->file); (void) list_delete_item(mps_itr); } list_iterator_destroy(mps_itr); return SLURM_SUCCESS; } extern int init(void) { debug("%s: %s loaded", __func__, plugin_name); return SLURM_SUCCESS; } extern int fini(void) { debug("%s: unloading %s", __func__, plugin_name); FREE_NULL_LIST(gres_devices); FREE_NULL_LIST(mps_info); return SLURM_SUCCESS; } /* * Return true if fake_gpus.conf does exist. Used for testing */ static bool _test_gpu_list_fake(void) { struct stat config_stat; char *fake_gpus_file = NULL; bool have_fake_gpus = false; fake_gpus_file = get_extra_conf_path("fake_gpus.conf"); if (stat(fake_gpus_file, &config_stat) >= 0) { have_fake_gpus = true; } xfree(fake_gpus_file); return have_fake_gpus; } /* Translate device file name to numeric index "/dev/nvidia2" -> 2 */ static int _compute_local_id(char *dev_file_name) { int i, local_id = -1, mult = 1; if (!dev_file_name) return -1; for (i = strlen(dev_file_name) - 1; i >= 0; i--) { if ((dev_file_name[i] < '0') || (dev_file_name[i] > '9')) break; if (local_id == -1) local_id = 0; local_id += (dev_file_name[i] - '0') * mult; mult *= 10; } return local_id; } static uint64_t _build_mps_dev_info(List gres_conf_list) { uint64_t mps_count = 0; uint32_t mps_plugin_id = gres_plugin_build_id("mps"); gres_slurmd_conf_t *gres_conf; mps_dev_info_t *mps_conf; ListIterator iter; mps_info = list_create(xfree_ptr); iter = list_iterator_create(gres_conf_list); while ((gres_conf = list_next(iter))) { if (gres_conf->plugin_id != mps_plugin_id) continue; mps_conf = xmalloc(sizeof(mps_dev_info_t)); mps_conf->count = gres_conf->count; mps_conf->id = _compute_local_id(gres_conf->file); list_append(mps_info, mps_conf); mps_count += gres_conf->count; } list_iterator_destroy(iter); return mps_count; } /* * We could load gres state or validate it using various mechanisms here. * This only validates that the configuration was specified in gres.conf. * In the general case, no code would need to be changed. */ extern int node_config_load(List gres_conf_list, node_config_load_t *config) { int rc = SLURM_SUCCESS; log_level_t log_lvl; List gpu_conf_list, mps_conf_list; bool have_fake_gpus = _test_gpu_list_fake(); /* Assume this state is caused by an scontrol reconfigure */ debug_flags = slurm_get_debug_flags(); if (gres_devices) { debug("Resetting gres_devices"); FREE_NULL_LIST(gres_devices); } FREE_NULL_LIST(mps_info); if (debug_flags & DEBUG_FLAG_GRES) log_lvl = LOG_LEVEL_VERBOSE; else log_lvl = LOG_LEVEL_DEBUG; log_var(log_lvl, "%s: Initalized gres.conf list:", plugin_name); print_gres_list(gres_conf_list, log_lvl); /* * Ensure that every GPU device file is listed as a MPS file. * Any MPS entry that we need to add will have a "Count" of zero. * Every MPS "Type" will be made to match the GPU "Type". The order * of MPS records (by "File") must match the order in which GPUs are * defined for the GRES bitmaps in slurmctld to line up. * * First, convert all GPU records to a new entries in a list where * each File is a unique device (i.e. convert a record with * "File=nvidia[0-3]" into 4 separate records). */ gpu_conf_list = _build_gpu_list(gres_conf_list); /* Now move MPS records to new List, each with unique device file */ mps_conf_list = _build_mps_list(gres_conf_list); /* * Merge MPS records back to original list, updating and reordering * as needed. */ rc = _merge_lists(gres_conf_list, gpu_conf_list, mps_conf_list); FREE_NULL_LIST(gpu_conf_list); FREE_NULL_LIST(mps_conf_list); if (rc != SLURM_SUCCESS) fatal("%s: failed to merge MPS and GPU configuration", plugin_name); rc = common_node_config_load(gres_conf_list, gres_name, &gres_devices); if (rc != SLURM_SUCCESS) fatal("%s: failed to load configuration", plugin_name); if (_build_mps_dev_info(gres_conf_list) == 0) _remove_mps_recs(gres_conf_list); log_var(log_lvl, "%s: Final gres.conf list:", plugin_name); print_gres_list(gres_conf_list, log_lvl); // Print in parsable format for tests if fake system is in use if (have_fake_gpus) { info("Final normalized gres.conf list (parsable):"); print_gres_list_parsable(gres_conf_list); } return rc; } /* Given a global device ID, return its gres/mps count */ static uint64_t _get_dev_count(int global_id) { ListIterator itr; mps_dev_info_t *mps_ptr; uint64_t count = NO_VAL64; if (!mps_info) { error("%s: mps_info is NULL", __func__); return 100; } itr = list_iterator_create(mps_info); while ((mps_ptr = (mps_dev_info_t *) list_next(itr))) { if (mps_ptr->id == global_id) { count = mps_ptr->count; break; } } list_iterator_destroy(itr); if (count == NO_VAL64) { error("%s: Could not find gres/mps count for device ID %d", __func__, global_id); return 100; } return count; } static void _set_env(char ***env_ptr, void *gres_ptr, int node_inx, bitstr_t *usable_gres, bool *already_seen, int *local_inx, bool reset, bool is_job) { char *global_list = NULL, *local_list = NULL, *perc_env = NULL; char perc_str[64], *slurm_env_var = NULL; uint64_t count_on_dev, gres_per_node = 0, percentage; int global_id = -1; if (is_job) slurm_env_var = "SLURM_JOB_GPUS"; else slurm_env_var = "SLURM_STEP_GPUS"; if (*already_seen) { global_list = xstrdup(getenvp(*env_ptr, slurm_env_var)); local_list = xstrdup(getenvp(*env_ptr, "CUDA_VISIBLE_DEVICES")); perc_env = xstrdup(getenvp(*env_ptr, "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE")); } common_gres_set_env(gres_devices, env_ptr, gres_ptr, node_inx, usable_gres, "", local_inx, &gres_per_node, &local_list, &global_list, reset, is_job, &global_id); if (perc_env) { env_array_overwrite(env_ptr, "CUDA_MPS_ACTIVE_THREAD_perc_str", perc_env); xfree(perc_env); } else if (gres_per_node && mps_info) { count_on_dev = _get_dev_count(global_id); if (count_on_dev > 0) { percentage = (gres_per_node * 100) / count_on_dev; percentage = MAX(percentage, 1); } else percentage = 0; snprintf(perc_str, sizeof(perc_str), "%"PRIu64, percentage); env_array_overwrite(env_ptr, "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE", perc_str); } else if (gres_per_node) { error("%s: mps_info list is NULL", __func__); snprintf(perc_str, sizeof(perc_str), "%"PRIu64, gres_per_node); env_array_overwrite(env_ptr, "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE", perc_str); } if (global_list) { env_array_overwrite(env_ptr, slurm_env_var, global_list); xfree(global_list); } if (local_list) { /* * CUDA_VISIBLE_DEVICES is relative to the MPS server. * With only one GPU under the control of MPS, the device * number will always be "0". */ env_array_overwrite(env_ptr, "CUDA_VISIBLE_DEVICES", "0"); env_array_overwrite(env_ptr, "GPU_DEVICE_ORDINAL", "0"); xfree(local_list); *already_seen = true; } } /* * Set environment variables as appropriate for a job (i.e. all tasks) based * upon the job's GRES state. */ extern void job_set_env(char ***job_env_ptr, void *gres_ptr, int node_inx) { /* * Variables are not static like in step_*_env since we could be calling * this from the slurmd where we are dealing with a different job each * time we hit this function, so we don't want to keep track of other * unrelated job's status. This can also get called multiple times * (different prologs and such) which would also result in bad info each * call after the first. */ int local_inx = 0; bool already_seen = false; _set_env(job_env_ptr, gres_ptr, node_inx, NULL, &already_seen, &local_inx, false, true); } /* * Set environment variables as appropriate for a step (i.e. all tasks) based * upon the job step's GRES state. */ extern void step_set_env(char ***step_env_ptr, void *gres_ptr) { static int local_inx = 0; static bool already_seen = false; _set_env(step_env_ptr, gres_ptr, 0, NULL, &already_seen, &local_inx, false, false); } /* * Reset environment variables as appropriate for a job (i.e. this one task) * based upon the job step's GRES state and assigned CPUs. */ extern void step_reset_env(char ***step_env_ptr, void *gres_ptr, bitstr_t *usable_gres) { static int local_inx = 0; static bool already_seen = false; _set_env(step_env_ptr, gres_ptr, 0, usable_gres, &already_seen, &local_inx, true, false); } /* Send GRES information to slurmstepd on the specified file descriptor */ extern void send_stepd(int fd) { int mps_cnt; mps_dev_info_t *mps_ptr; ListIterator itr; common_send_stepd(fd, gres_devices); if (!mps_info) { mps_cnt = 0; safe_write(fd, &mps_cnt, sizeof(int)); } else { mps_cnt = list_count(mps_info); safe_write(fd, &mps_cnt, sizeof(int)); itr = list_iterator_create(mps_info); while ((mps_ptr = (mps_dev_info_t *) list_next(itr))) { safe_write(fd, &mps_ptr->count, sizeof(uint64_t)); safe_write(fd, &mps_ptr->id, sizeof(int)); } list_iterator_destroy(itr); } return; rwfail: error("%s: failed", __func__); return; } /* Receive GRES information from slurmd on the specified file descriptor */ extern void recv_stepd(int fd) { int i, mps_cnt; mps_dev_info_t *mps_ptr = NULL; common_recv_stepd(fd, &gres_devices); safe_read(fd, &mps_cnt, sizeof(int)); if (mps_cnt) { mps_info = list_create(xfree_ptr); for (i = 0; i < mps_cnt; i++) { mps_ptr = xmalloc(sizeof(mps_dev_info_t)); safe_read(fd, &mps_ptr->count, sizeof(uint64_t)); safe_read(fd, &mps_ptr->id, sizeof(int)); list_append(mps_info, mps_ptr); mps_ptr = NULL; } } return; rwfail: error("%s: failed", __func__); xfree(mps_ptr); return; } /* * get data from a job's GRES data structure * IN job_gres_data - job's GRES data structure * IN node_inx - zero-origin index of the node within the job's allocation * for which data is desired * IN data_type - type of data to get from the job's data * OUT data - pointer to the data from job's GRES data structure * DO NOT FREE: This is a pointer into the job's data structure * RET - SLURM_SUCCESS or error code */ extern int job_info(gres_job_state_t *job_gres_data, uint32_t node_inx, enum gres_job_data_type data_type, void *data) { return EINVAL; } /* * get data from a step's GRES data structure * IN step_gres_data - step's GRES data structure * IN node_inx - zero-origin index of the node within the job's allocation * for which data is desired. Note this can differ from the step's * node allocation index. * IN data_type - type of data to get from the step's data * OUT data - pointer to the data from step's GRES data structure * DO NOT FREE: This is a pointer into the step's data structure * RET - SLURM_SUCCESS or error code */ extern int step_info(gres_step_state_t *step_gres_data, uint32_t node_inx, enum gres_step_data_type data_type, void *data) { return EINVAL; } /* * Return a list of devices of this type. The list elements are of type * "gres_device_t" and the list should be freed using FREE_NULL_LIST(). */ extern List get_devices(void) { return gres_devices; } extern void step_hardware_init(bitstr_t *usable_gres, char *settings) { return; } extern void step_hardware_fini(void) { return; } /* * Build record used to set environment variables as appropriate for a job's * prolog or epilog based GRES allocated to the job. */ extern gres_epilog_info_t *epilog_build_env(gres_job_state_t *gres_job_ptr) { int i; gres_epilog_info_t *epilog_info; epilog_info = xmalloc(sizeof(gres_epilog_info_t)); epilog_info->node_cnt = gres_job_ptr->node_cnt; epilog_info->gres_bit_alloc = xcalloc(epilog_info->node_cnt, sizeof(bitstr_t *)); epilog_info->gres_cnt_node_alloc = xcalloc(epilog_info->node_cnt, sizeof(uint64_t)); for (i = 0; i < epilog_info->node_cnt; i++) { if (gres_job_ptr->gres_bit_alloc && gres_job_ptr->gres_bit_alloc[i]) { epilog_info->gres_bit_alloc[i] = bit_copy(gres_job_ptr->gres_bit_alloc[i]); } if (gres_job_ptr->gres_bit_alloc && gres_job_ptr->gres_bit_alloc[i]) { epilog_info->gres_cnt_node_alloc[i] = gres_job_ptr->gres_cnt_node_alloc[i]; } } return epilog_info; } /* * Set environment variables as appropriate for a job's prolog or epilog based * GRES allocated to the job. */ extern void epilog_set_env(char ***epilog_env_ptr, gres_epilog_info_t *epilog_info, int node_inx) { int dev_inx = -1, env_inx = 0, global_id = -1, i; uint64_t count_on_dev, gres_per_node = 0, percentage; gres_device_t *gres_device; ListIterator iter; xassert(epilog_env_ptr); if (!epilog_info) return; if (!gres_devices) return; if (epilog_info->node_cnt == 0) /* no_consume */ return; if (node_inx > epilog_info->node_cnt) { error("%s: %s: bad node index (%d > %u)", plugin_type, __func__, node_inx, epilog_info->node_cnt); return; } if (*epilog_env_ptr) { for (env_inx = 0; (*epilog_env_ptr)[env_inx]; env_inx++) ; xrealloc(*epilog_env_ptr, sizeof(char *) * (env_inx + 3)); } else { *epilog_env_ptr = xcalloc(3, sizeof(char *)); } if (epilog_info->gres_bit_alloc && epilog_info->gres_bit_alloc[node_inx]) dev_inx = bit_ffs(epilog_info->gres_bit_alloc[node_inx]); if (dev_inx >= 0) { /* Translate bit to device number, may differ */ i = -1; iter = list_iterator_create(gres_devices); while ((gres_device = list_next(iter))) { i++; if (i == dev_inx) { global_id = gres_device->dev_num; break; } } list_iterator_destroy(iter); } if (global_id >= 0) { xstrfmtcat((*epilog_env_ptr)[env_inx++], "CUDA_VISIBLE_DEVICES=%d", global_id); } if ((global_id >= 0) && epilog_info->gres_cnt_node_alloc && epilog_info->gres_cnt_node_alloc[node_inx]) { gres_per_node = epilog_info->gres_cnt_node_alloc[node_inx]; count_on_dev = _get_dev_count(global_id); if (count_on_dev > 0) { percentage = (gres_per_node * 100) / count_on_dev; percentage = MAX(percentage, 1); } else percentage = 0; xstrfmtcat((*epilog_env_ptr)[env_inx++], "CUDA_MPS_ACTIVE_THREAD_PERCENTAGE=%"PRIu64, percentage); } return; }