/*****************************************************************************\
* set_oomadj.c - prevent slurmd/slurmstepd from being killed by the
* kernel OOM killer
*****************************************************************************
* Written by Hongjia Cao, National University of Defense Technology, China.
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see .
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#include
#include
#include
#include
#include
#include
#include
#include "src/common/log.h"
#if !defined(__FreeBSD__)
extern int set_oom_adj(int adj)
{
int fd;
char oom_adj[16];
char *oom_adj_file = "/proc/self/oom_score_adj";
fd = open(oom_adj_file, O_WRONLY);
if (fd < 0) {
if (errno == ENOENT) {
debug("%s not found. Falling back to oom_adj",
oom_adj_file);
oom_adj_file = "/proc/self/oom_adj";
fd = open(oom_adj_file, O_WRONLY);
if (fd < 0) {
if (errno == ENOENT)
error("%s not found", oom_adj_file);
else
error("failed to open %s: %m",
oom_adj_file);
return -1;
}
/* Convert range from [-1000,1000] to [-17,15]
* for use with older Linux kernel before 2.6.36 */
if (adj < 0)
adj = (adj * 17) / 1000;
else if (adj > 0)
adj = (adj * 15) / 1000;
} else {
error("failed to open %s: %m", oom_adj_file);
return -1;
}
}
if (snprintf(oom_adj, 16, "%d", adj) >= 16) {
close(fd);
return -1;
}
while ((write(fd, oom_adj, strlen(oom_adj)) < 0) && (errno == EINTR))
;
close(fd);
return 0;
}
#else /* __FreeBSD__ */
extern int set_oom_adj(int adj)
{
/* FreeBSD does not handle OOM the same way Linux does */
(void) adj; /* unused argument */
return 0;
}
#endif