/*****************************************************************************\ * job_container_plugin.c - job container plugin stub. ***************************************************************************** * Copyright (C) 2013 SchedMD LLC * Written by Morris Jette * * This file is part of Slurm, a resource management program. * For details, see . * Please also read the included file: DISCLAIMER. * * Slurm is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * In addition, as a special exception, the copyright holders give permission * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than * OpenSSL. If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your * version. If you delete this exception statement from all source files in * the program, then also delete it here. * * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along * with Slurm; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #include #include "src/common/plugin.h" #include "src/common/plugrack.h" #include "src/common/slurm_protocol_api.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/slurmd/common/job_container_plugin.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" typedef struct job_container_ops { int (*container_p_create) (uint32_t job_id); int (*container_p_add_cont) (uint32_t job_id, uint64_t cont_id); int (*container_p_join) (uint32_t job_id, uid_t uid); int (*container_p_delete) (uint32_t job_id); int (*container_p_restore) (char *dir_name, bool recover); void (*container_p_reconfig) (void); } job_container_ops_t; /* * Must be synchronized with job_container_ops_t above. */ static const char *syms[] = { "container_p_create", "container_p_add_cont", "container_p_join", "container_p_delete", "container_p_restore", "container_p_reconfig", }; static job_container_ops_t *ops = NULL; static plugin_context_t **g_container_context = NULL; static int g_container_context_num = -1; static pthread_mutex_t g_container_context_lock = PTHREAD_MUTEX_INITIALIZER; static bool init_run = false; /* * Initialize the job container plugin. * * RET - slurm error code */ extern int job_container_init(void) { int retval = SLURM_SUCCESS; char *plugin_type = "job_container"; char *container_plugin_type = NULL; char *last = NULL, *job_container_plugin_list, *job_container = NULL; if (init_run && (g_container_context_num >= 0)) return retval; slurm_mutex_lock(&g_container_context_lock); if (g_container_context_num >= 0) goto done; container_plugin_type = slurm_get_job_container_plugin(); g_container_context_num = 0; /* mark it before anything else */ if ((container_plugin_type == NULL) || (container_plugin_type[0] == '\0')) goto done; job_container_plugin_list = container_plugin_type; while ((job_container = strtok_r(job_container_plugin_list, ",", &last))) { xrealloc(ops, sizeof(job_container_ops_t) * (g_container_context_num + 1)); xrealloc(g_container_context, (sizeof(plugin_context_t *) * (g_container_context_num + 1))); if (xstrncmp(job_container, "job_container/", 14) == 0) job_container += 14; /* backward compatibility */ job_container = xstrdup_printf("job_container/%s", job_container); g_container_context[g_container_context_num] = plugin_context_create( plugin_type, job_container, (void **)&ops[g_container_context_num], syms, sizeof(syms)); if (!g_container_context[g_container_context_num]) { error("cannot create %s context for %s", plugin_type, job_container); xfree(job_container); retval = SLURM_ERROR; break; } xfree(job_container); g_container_context_num++; job_container_plugin_list = NULL; /* for next iteration */ } init_run = true; done: slurm_mutex_unlock(&g_container_context_lock); xfree(container_plugin_type); if (retval != SLURM_SUCCESS) job_container_fini(); return retval; } /* * Terminate the job container plugin, free memory. * * RET - slurm error code */ extern int job_container_fini(void) { int i, rc = SLURM_SUCCESS; slurm_mutex_lock(&g_container_context_lock); if (!g_container_context) goto done; init_run = false; for (i = 0; i < g_container_context_num; i++) { if (g_container_context[i]) { if (plugin_context_destroy(g_container_context[i]) != SLURM_SUCCESS) { rc = SLURM_ERROR; } } } xfree(ops); xfree(g_container_context); g_container_context_num = -1; done: slurm_mutex_unlock(&g_container_context_lock); return rc; } /* Create a container for the specified job */ extern int container_g_create(uint32_t job_id) { int i, rc = SLURM_SUCCESS; if (job_container_init()) return SLURM_ERROR; for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); i++) { rc = (*(ops[i].container_p_create))(job_id); } return rc; } /* * Add the calling process to the specified job's container. * A proctrack container will be generated containing the process * before container_g_add_cont() is called (see below). */ extern int container_g_join(uint32_t job_id, uid_t uid) { int i, rc = SLURM_SUCCESS; if (job_container_init()) return SLURM_ERROR; for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); i++) { rc = (*(ops[i].container_p_join))(job_id, uid); } return rc; } /* Add a proctrack container (PAGG) to the specified job's container * The PAGG will be the job's cont_id returned by proctrack/sgi_job */ extern int container_g_add_cont(uint32_t job_id, uint64_t cont_id) { int i, rc = SLURM_SUCCESS; if (job_container_init()) return SLURM_ERROR; for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); i++) { rc = (*(ops[i].container_p_add_cont))(job_id, cont_id); } return rc; } /* Delete the container for the specified job */ extern int container_g_delete(uint32_t job_id) { int i, rc = SLURM_SUCCESS; if (job_container_init()) return SLURM_ERROR; for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); i++) { rc = (*(ops[i].container_p_delete))(job_id); } return rc; } /* Restore container information */ extern int container_g_restore(char * dir_name, bool recover) { int i, rc = SLURM_SUCCESS; if (job_container_init()) return SLURM_ERROR; for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS)); i++) { rc = (*(ops[i].container_p_restore))(dir_name, recover); } return rc; } /* Note change in configuration (e.g. "DebugFlag=JobContainer" set) */ extern void container_g_reconfig(void) { int i; (void) job_container_init(); for (i = 0; i < g_container_context_num;i++) { (*(ops[i].container_p_reconfig))(); } return; }