/*****************************************************************************\
* job_container_plugin.c - job container plugin stub.
*****************************************************************************
* Copyright (C) 2013 SchedMD LLC
* Written by Morris Jette
*
* This file is part of Slurm, a resource management program.
* For details, see .
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include
#include "src/common/plugin.h"
#include "src/common/plugrack.h"
#include "src/common/slurm_protocol_api.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"
#include "src/slurmd/common/job_container_plugin.h"
#include "src/slurmd/slurmstepd/slurmstepd_job.h"
typedef struct job_container_ops {
int (*container_p_create) (uint32_t job_id);
int (*container_p_add_cont) (uint32_t job_id, uint64_t cont_id);
int (*container_p_join) (uint32_t job_id, uid_t uid);
int (*container_p_delete) (uint32_t job_id);
int (*container_p_restore) (char *dir_name, bool recover);
void (*container_p_reconfig) (void);
} job_container_ops_t;
/*
* Must be synchronized with job_container_ops_t above.
*/
static const char *syms[] = {
"container_p_create",
"container_p_add_cont",
"container_p_join",
"container_p_delete",
"container_p_restore",
"container_p_reconfig",
};
static job_container_ops_t *ops = NULL;
static plugin_context_t **g_container_context = NULL;
static int g_container_context_num = -1;
static pthread_mutex_t g_container_context_lock =
PTHREAD_MUTEX_INITIALIZER;
static bool init_run = false;
/*
* Initialize the job container plugin.
*
* RET - slurm error code
*/
extern int job_container_init(void)
{
int retval = SLURM_SUCCESS;
char *plugin_type = "job_container";
char *container_plugin_type = NULL;
char *last = NULL, *job_container_plugin_list, *job_container = NULL;
if (init_run && (g_container_context_num >= 0))
return retval;
slurm_mutex_lock(&g_container_context_lock);
if (g_container_context_num >= 0)
goto done;
container_plugin_type = slurm_get_job_container_plugin();
g_container_context_num = 0; /* mark it before anything else */
if ((container_plugin_type == NULL) ||
(container_plugin_type[0] == '\0'))
goto done;
job_container_plugin_list = container_plugin_type;
while ((job_container =
strtok_r(job_container_plugin_list, ",", &last))) {
xrealloc(ops,
sizeof(job_container_ops_t) *
(g_container_context_num + 1));
xrealloc(g_container_context, (sizeof(plugin_context_t *)
* (g_container_context_num + 1)));
if (xstrncmp(job_container, "job_container/", 14) == 0)
job_container += 14; /* backward compatibility */
job_container = xstrdup_printf("job_container/%s",
job_container);
g_container_context[g_container_context_num] =
plugin_context_create(
plugin_type, job_container,
(void **)&ops[g_container_context_num],
syms, sizeof(syms));
if (!g_container_context[g_container_context_num]) {
error("cannot create %s context for %s",
plugin_type, job_container);
xfree(job_container);
retval = SLURM_ERROR;
break;
}
xfree(job_container);
g_container_context_num++;
job_container_plugin_list = NULL; /* for next iteration */
}
init_run = true;
done:
slurm_mutex_unlock(&g_container_context_lock);
xfree(container_plugin_type);
if (retval != SLURM_SUCCESS)
job_container_fini();
return retval;
}
/*
* Terminate the job container plugin, free memory.
*
* RET - slurm error code
*/
extern int job_container_fini(void)
{
int i, rc = SLURM_SUCCESS;
slurm_mutex_lock(&g_container_context_lock);
if (!g_container_context)
goto done;
init_run = false;
for (i = 0; i < g_container_context_num; i++) {
if (g_container_context[i]) {
if (plugin_context_destroy(g_container_context[i])
!= SLURM_SUCCESS) {
rc = SLURM_ERROR;
}
}
}
xfree(ops);
xfree(g_container_context);
g_container_context_num = -1;
done:
slurm_mutex_unlock(&g_container_context_lock);
return rc;
}
/* Create a container for the specified job */
extern int container_g_create(uint32_t job_id)
{
int i, rc = SLURM_SUCCESS;
if (job_container_init())
return SLURM_ERROR;
for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
i++) {
rc = (*(ops[i].container_p_create))(job_id);
}
return rc;
}
/*
* Add the calling process to the specified job's container.
* A proctrack container will be generated containing the process
* before container_g_add_cont() is called (see below).
*/
extern int container_g_join(uint32_t job_id, uid_t uid)
{
int i, rc = SLURM_SUCCESS;
if (job_container_init())
return SLURM_ERROR;
for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
i++) {
rc = (*(ops[i].container_p_join))(job_id, uid);
}
return rc;
}
/* Add a proctrack container (PAGG) to the specified job's container
* The PAGG will be the job's cont_id returned by proctrack/sgi_job */
extern int container_g_add_cont(uint32_t job_id, uint64_t cont_id)
{
int i, rc = SLURM_SUCCESS;
if (job_container_init())
return SLURM_ERROR;
for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
i++) {
rc = (*(ops[i].container_p_add_cont))(job_id, cont_id);
}
return rc;
}
/* Delete the container for the specified job */
extern int container_g_delete(uint32_t job_id)
{
int i, rc = SLURM_SUCCESS;
if (job_container_init())
return SLURM_ERROR;
for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
i++) {
rc = (*(ops[i].container_p_delete))(job_id);
}
return rc;
}
/* Restore container information */
extern int container_g_restore(char * dir_name, bool recover)
{
int i, rc = SLURM_SUCCESS;
if (job_container_init())
return SLURM_ERROR;
for (i = 0; ((i < g_container_context_num) && (rc == SLURM_SUCCESS));
i++) {
rc = (*(ops[i].container_p_restore))(dir_name, recover);
}
return rc;
}
/* Note change in configuration (e.g. "DebugFlag=JobContainer" set) */
extern void container_g_reconfig(void)
{
int i;
(void) job_container_init();
for (i = 0; i < g_container_context_num;i++) {
(*(ops[i].container_p_reconfig))();
}
return;
}